7 years ago · be65f9ed26
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13329,15 +13329,6 @@ S:	Odd Fixes
 
				 F:	Documentation/devicetree/bindings/staging/iio/
			
 
				 F:	drivers/staging/iio/
			
 
				 
			
 
				-STAGING - LUSTRE PARALLEL FILESYSTEM
			
 
				-M:	Oleg Drokin <oleg.drokin@intel.com>
			
 
				-M:	Andreas Dilger <andreas.dilger@intel.com>
			
 
				-M:	James Simmons <jsimmons@infradead.org>
			
 
				-L:	lustre-devel@lists.lustre.org (moderated for non-subscribers)
			
 
				-W:	http://wiki.lustre.org/
			
 
				-S:	Maintained
			
 
				-F:	drivers/staging/lustre
			
 
				-
			
 
				 STAGING - NVIDIA COMPLIANT EMBEDDED CONTROLLER INTERFACE (nvec)
			
 
				 M:	Marc Dietrich <marvin24@gmx.de>
			
 
				 L:	ac100@lists.launchpad.net (moderated for non-subscribers)
			
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -84,8 +84,6 @@ source "drivers/staging/netlogic/Kconfig"
 
				 
			
 
				 source "drivers/staging/mt29f_spinand/Kconfig"
			
 
				 
			
 
				-source "drivers/staging/lustre/Kconfig"
			
 
				-
			
 
				 source "drivers/staging/dgnc/Kconfig"
			
 
				 
			
 
				 source "drivers/staging/gs_fpgaboot/Kconfig"
			
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_STAGING_BOARD)	+= board/
 
				 obj-$(CONFIG_LTE_GDM724X)	+= gdm724x/
			
 
				 obj-$(CONFIG_FIREWIRE_SERIAL)	+= fwserial/
			
 
				 obj-$(CONFIG_GOLDFISH)		+= goldfish/
			
 
				-obj-$(CONFIG_LNET)		+= lustre/
			
 
				 obj-$(CONFIG_DGNC)			+= dgnc/
			
 
				 obj-$(CONFIG_MTD_SPINAND_MT29F)	+= mt29f_spinand/
			
 
				 obj-$(CONFIG_GS_FPGABOOT)	+= gs_fpgaboot/
			
--- a/drivers/staging/lustre/Kconfig
+++ b/drivers/staging/lustre/Kconfig
@@ -1,3 +0,0 @@
 
				-source "drivers/staging/lustre/lnet/Kconfig"
			
 
				-
			
 
				-source "drivers/staging/lustre/lustre/Kconfig"
			
--- a/drivers/staging/lustre/Makefile
+++ b/drivers/staging/lustre/Makefile
@@ -1,2 +0,0 @@
 
				-obj-$(CONFIG_LNET)		+= lnet/
			
 
				-obj-$(CONFIG_LUSTRE_FS)		+= lustre/
			
--- a/drivers/staging/lustre/README.txt
+++ b/drivers/staging/lustre/README.txt
@@ -1,83 +0,0 @@
 
				-Lustre Parallel Filesystem Client
			
 
				-=================================
			
 
				-
			
 
				-The Lustre file system is an open-source, parallel file system
			
 
				-that supports many requirements of leadership class HPC simulation
			
 
				-environments.
			
 
				-Born from a research project at Carnegie Mellon University,
			
 
				-the Lustre file system is a widely-used option in HPC.
			
 
				-The Lustre file system provides a POSIX compliant file system interface,
			
 
				-can scale to thousands of clients, petabytes of storage and
			
 
				-hundreds of gigabytes per second of I/O bandwidth.
			
 
				-
			
 
				-Unlike shared disk storage cluster filesystems (e.g. OCFS2, GFS, GPFS),
			
 
				-Lustre has independent Metadata and Data servers that clients can access
			
 
				-in parallel to maximize performance.
			
 
				-
			
 
				-In order to use Lustre client you will need to download the "lustre-client"
			
 
				-package that contains the userspace tools from http://lustre.org/download/
			
 
				-
			
 
				-You will need to install and configure your Lustre servers separately.
			
 
				-
			
 
				-Mount Syntax
			
 
				-============
			
 
				-After you installed the lustre-client tools including mount.lustre binary
			
 
				-you can mount your Lustre filesystem with:
			
 
				-
			
 
				-mount -t lustre mgs:/fsname mnt
			
 
				-
			
 
				-where mgs is the host name or ip address of your Lustre MGS(management service)
			
 
				-fsname is the name of the filesystem you would like to mount.
			
 
				-
			
 
				-
			
 
				-Mount Options
			
 
				-=============
			
 
				-
			
 
				-  noflock
			
 
				-	Disable posix file locking (Applications trying to use
			
 
				-	the functionality will get ENOSYS)
			
 
				-
			
 
				-  localflock
			
 
				-	Enable local flock support, using only client-local flock
			
 
				-	(faster, for applications that require flock but do not run
			
 
				-	 on multiple nodes).
			
 
				-
			
 
				-  flock
			
 
				-	Enable cluster-global posix file locking coherent across all
			
 
				-	client nodes.
			
 
				-
			
 
				-  user_xattr, nouser_xattr
			
 
				-	Support "user." extended attributes (or not)
			
 
				-
			
 
				-  user_fid2path, nouser_fid2path
			
 
				-	Enable FID to path translation by regular users (or not)
			
 
				-
			
 
				-  checksum, nochecksum
			
 
				-	Verify data consistency on the wire and in memory as it passes
			
 
				-	between the layers (or not).
			
 
				-
			
 
				-  lruresize, nolruresize
			
 
				-	Allow lock LRU to be controlled by memory pressure on the server
			
 
				-	(or only 100 (default, controlled by lru_size proc parameter) locks
			
 
				-	 per CPU per server on this client).
			
 
				-
			
 
				-  lazystatfs, nolazystatfs
			
 
				-	Do not block in statfs() if some of the servers are down.
			
 
				-
			
 
				-  32bitapi
			
 
				-	Shrink inode numbers to fit into 32 bits. This is necessary
			
 
				-	if you plan to reexport Lustre filesystem from this client via
			
 
				-	NFSv4.
			
 
				-
			
 
				-  verbose, noverbose
			
 
				-	Enable mount/umount console messages (or not)
			
 
				-
			
 
				-More Information
			
 
				-================
			
 
				-You can get more information at the Lustre website: http://wiki.lustre.org/
			
 
				-
			
 
				-Source for the userspace tools and out-of-tree client and server code
			
 
				-is available at: http://git.hpdd.intel.com/fs/lustre-release.git
			
 
				-
			
 
				-Latest binary packages:
			
 
				-http://lustre.org/download/
			
--- a/drivers/staging/lustre/TODO
+++ b/drivers/staging/lustre/TODO
@@ -1,302 +0,0 @@
 
				-Currently all the work directed toward the lustre upstream client is tracked
			
 
				-at the following link:
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9679
			
 
				-
			
 
				-Under this ticket you will see the following work items that need to be
			
 
				-addressed:
			
 
				-
			
 
				-******************************************************************************
			
 
				-* libcfs cleanup
			
 
				-*
			
 
				-* https://jira.hpdd.intel.com/browse/LU-9859
			
 
				-*
			
 
				-* Track all the cleanups and simplification of the libcfs module. Remove
			
 
				-* functions the kernel provides. Possibly integrate some of the functionality
			
 
				-* into the kernel proper.
			
 
				-*
			
 
				-******************************************************************************
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-100086
			
 
				-
			
 
				-LNET_MINOR conflicts with USERIO_MINOR
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8130
			
 
				-
			
 
				-Fix and simplify libcfs hash handling
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8703
			
 
				-
			
 
				-The current way we handle SMP is wrong. Platforms like ARM and KNL can have
			
 
				-core and NUMA setups with things like NUMA nodes with no cores. We need to
			
 
				-handle such cases. This work also greatly simplified the lustre SMP code.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9019
			
 
				-
			
 
				-Replace libcfs time API with standard kernel APIs. Also migrate away from
			
 
				-jiffies. We found jiffies can vary on nodes which can lead to corner cases
			
 
				-that can break the file system due to nodes having inconsistent behavior.
			
 
				-So move to time64_t and ktime_t as much as possible.
			
 
				-
			
 
				-******************************************************************************
			
 
				-* Proper IB support for ko2iblnd
			
 
				-******************************************************************************
			
 
				-https://jira.hpdd.intel.com/browse/LU-9179
			
 
				-
			
 
				-Poor performance for the ko2iblnd driver. This is related to many of the
			
 
				-patches below that are missing from the linux client.
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9886
			
 
				-
			
 
				-Crash in upstream kiblnd_handle_early_rxs()
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10394 / LU-10526 / LU-10089
			
 
				-
			
 
				-Default to default to using MEM_REG
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10459
			
 
				-
			
 
				-throttle tx based on queue depth
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9943
			
 
				-
			
 
				-correct WR fast reg accounting
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10291
			
 
				-
			
 
				-remove concurrent_sends tunable
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10213
			
 
				-
			
 
				-calculate qp max_send_wrs properly
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9810
			
 
				-
			
 
				-use less CQ entries for each connection
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10129 / LU-9180
			
 
				-
			
 
				-rework map_on_demand behavior
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10129
			
 
				-
			
 
				-query device capabilities
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10015
			
 
				-
			
 
				-fix race at kiblnd_connect_peer
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9983
			
 
				-
			
 
				-allow for discontiguous fragments
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9500
			
 
				-
			
 
				-Don't Page Align remote_addr with FastReg
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9448
			
 
				-
			
 
				-handle empty CPTs
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9507
			
 
				-
			
 
				-Don't Assert On Reconnect with MultiQP
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9472
			
 
				-
			
 
				-Fix FastReg map/unmap for MLX5
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9425
			
 
				-
			
 
				-Turn on 2 sges by default
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8943
			
 
				-
			
 
				-Enable Multiple OPA Endpoints between Nodes
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-5718
			
 
				-
			
 
				-multiple sges for work request
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9094
			
 
				-
			
 
				-kill timedout txs from ibp_tx_queue
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9094
			
 
				-
			
 
				-reconnect peer for REJ_INVALID_SERVICE_ID
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8752
			
 
				-
			
 
				-Stop MLX5 triggering a dump_cqe
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8874
			
 
				-
			
 
				-Move ko2iblnd to latest RDMA changes
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8875 / LU-8874
			
 
				-
			
 
				-Change to new RDMA done callback mechanism
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9164 / LU-8874
			
 
				-
			
 
				-Incorporate RDMA map/unamp API's into ko2iblnd
			
 
				-
			
 
				-******************************************************************************
			
 
				-* sysfs/debugfs fixes
			
 
				-*
			
 
				-* https://jira.hpdd.intel.com/browse/LU-8066
			
 
				-*
			
 
				-* The original migration to sysfs was done in haste without properly working
			
 
				-* utilities to test the changes. This covers the work to restore the proper
			
 
				-* behavior. Huge project to make this right.
			
 
				-*
			
 
				-******************************************************************************
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9431
			
 
				-
			
 
				-The function class_process_proc_param was used for our mass updates of proc
			
 
				-tunables. It didn't work with sysfs and it was just ugly so it was removed.
			
 
				-In the process the ability to mass update thousands of clients was lost. This
			
 
				-work restores this in a sane way.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-https://jira.hpdd.intel.com/browse/LU-9091
			
 
				-
			
 
				-One the major request of users is the ability to pass in parameters into a
			
 
				-sysfs file in various different units. For example we can set max_pages_per_rpc
			
 
				-but this can vary on platforms due to different platform sizes. So you can
			
 
				-set this like max_pages_per_rpc=16MiB. The original code to handle this written
			
 
				-before the string helpers were created so the code doesn't follow that format
			
 
				-but it would be easy to move to. Currently the string helpers does the reverse
			
 
				-of what we need, changing bytes to string. We need to change a string to bytes.
			
 
				-
			
 
				-******************************************************************************
			
 
				-* Proper user land to kernel space interface for Lustre
			
 
				-*
			
 
				-* https://jira.hpdd.intel.com/browse/LU-9680
			
 
				-*
			
 
				-******************************************************************************
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8915
			
 
				-
			
 
				-Don't use linux list structure as user land arguments for lnet selftest.
			
 
				-This code is pretty poor quality and really needs to be reworked.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8834
			
 
				-
			
 
				-The lustre ioctl LL_IOC_FUTIMES_3 is very generic. Need to either work with
			
 
				-other file systems with similar functionality and make a common syscall
			
 
				-interface or rework our server code to automagically do it for us.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-6202
			
 
				-
			
 
				-Cleanup up ioctl handling. We have many obsolete ioctls. Also the way we do
			
 
				-ioctls can be changed over to netlink. This also has the benefit of working
			
 
				-better with HPC systems that do IO forwarding. Such systems don't like ioctls
			
 
				-very well.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9667
			
 
				-
			
 
				-More cleanups by making our utilities use sysfs instead of ioctls for LNet.
			
 
				-Also it has been requested to move the remaining ioctls to the netlink API.
			
 
				-
			
 
				-******************************************************************************
			
 
				-* Misc
			
 
				-******************************************************************************
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-https://jira.hpdd.intel.com/browse/LU-9855
			
 
				-
			
 
				-Clean up obdclass preprocessor code. One of the major eye sores is the various
			
 
				-pointer redirections and macros used by the obdclass. This makes the code very
			
 
				-difficult to understand. It was requested by the Al Viro to clean this up before
			
 
				-we leave staging.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9633
			
 
				-
			
 
				-Migrate to sphinx kernel-doc style comments. Add documents in Documentation.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-6142
			
 
				-
			
 
				-Possible remaining coding style fix. Remove deadcode. Enforce kernel code
			
 
				-style. Other minor misc cleanups...
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8837
			
 
				-
			
 
				-Separate client/server functionality. Functions only used by server can be
			
 
				-removed from client. Most of this has been done but we need a inspect of the
			
 
				-code to make sure.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-8964
			
 
				-
			
 
				-Lustre client readahead/writeback control needs to better suit kernel providings.
			
 
				-Currently its being explored. We could end up replacing the CLIO read ahead
			
 
				-abstract with the kernel proper version.
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9862
			
 
				-
			
 
				-Patch that landed for LU-7890 leads to static checker errors
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-9868
			
 
				-
			
 
				-dcache/namei fixes for lustre
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-https://jira.hpdd.intel.com/browse/LU-10467
			
 
				-
			
 
				-use standard linux wait_events macros work by Neil Brown
			
 
				-
			
 
				-------------------------------------------------------------------------------
			
 
				-
			
 
				-Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger
			
 
				-<andreas.dilger@intel.com>, James Simmons <jsimmons@infradead.org> and
			
 
				-Oleg Drokin <oleg.drokin@intel.com>.
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -1,76 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_LIBCFS_H__
			
 
				-#define __LIBCFS_LIBCFS_H__
			
 
				-
			
 
				-#include <linux/notifier.h>
			
 
				-#include <linux/workqueue.h>
			
 
				-#include <linux/sysctl.h>
			
 
				-
			
 
				-#include <linux/libcfs/libcfs_debug.h>
			
 
				-#include <linux/libcfs/libcfs_private.h>
			
 
				-#include <linux/libcfs/libcfs_fail.h>
			
 
				-
			
 
				-#define LIBCFS_VERSION "0.7.0"
			
 
				-
			
 
				-extern struct blocking_notifier_head libcfs_ioctl_list;
			
 
				-static inline int notifier_from_ioctl_errno(int err)
			
 
				-{
			
 
				-	if (err == -EINVAL)
			
 
				-		return NOTIFY_OK;
			
 
				-	return notifier_from_errno(err) | NOTIFY_STOP_MASK;
			
 
				-}
			
 
				-
			
 
				-int libcfs_setup(void);
			
 
				-
			
 
				-extern struct workqueue_struct *cfs_rehash_wq;
			
 
				-
			
 
				-void lustre_insert_debugfs(struct ctl_table *table);
			
 
				-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
			
 
				-			 void __user *buffer, size_t *lenp,
			
 
				-			 int (*handler)(void *data, int write, loff_t pos,
			
 
				-					void __user *buffer, int len));
			
 
				-
			
 
				-/*
			
 
				- * Memory
			
 
				- */
			
 
				-#if BITS_PER_LONG == 32
			
 
				-/* limit to lowmem on 32-bit systems */
			
 
				-#define NUM_CACHEPAGES \
			
 
				-	min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
			
 
				-#else
			
 
				-#define NUM_CACHEPAGES totalram_pages
			
 
				-#endif
			
 
				-
			
 
				-#endif /* __LIBCFS_LIBCFS_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -1,434 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015 Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_cpu.h
			
 
				- *
			
 
				- * CPU partition
			
 
				- *   . CPU partition is virtual processing unit
			
 
				- *
			
 
				- *   . CPU partition can present 1-N cores, or 1-N NUMA nodes,
			
 
				- *     in other words, CPU partition is a processors pool.
			
 
				- *
			
 
				- * CPU Partition Table (CPT)
			
 
				- *   . a set of CPU partitions
			
 
				- *
			
 
				- *   . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP
			
 
				- *
			
 
				- *   . User can specify total number of CPU partitions while creating a
			
 
				- *     CPT, ID of CPU partition is always start from 0.
			
 
				- *
			
 
				- *     Example: if there are 8 cores on the system, while creating a CPT
			
 
				- *     with cpu_npartitions=4:
			
 
				- *	      core[0, 1] = partition[0], core[2, 3] = partition[1]
			
 
				- *	      core[4, 5] = partition[2], core[6, 7] = partition[3]
			
 
				- *
			
 
				- *	  cpu_npartitions=1:
			
 
				- *	      core[0, 1, ... 7] = partition[0]
			
 
				- *
			
 
				- *   . User can also specify CPU partitions by string pattern
			
 
				- *
			
 
				- *     Examples: cpu_partitions="0[0,1], 1[2,3]"
			
 
				- *	       cpu_partitions="N 0[0-3], 1[4-8]"
			
 
				- *
			
 
				- *     The first character "N" means following numbers are numa ID
			
 
				- *
			
 
				- *   . NUMA allocators, CPU affinity threads are built over CPU partitions,
			
 
				- *     instead of HW CPUs or HW nodes.
			
 
				- *
			
 
				- *   . By default, Lustre modules should refer to the global cfs_cpt_tab,
			
 
				- *     instead of accessing HW CPUs directly, so concurrency of Lustre can be
			
 
				- *     configured by cpu_npartitions of the global cfs_cpt_tab
			
 
				- *
			
 
				- *   . If cpu_npartitions=1(all CPUs in one pool), lustre should work the
			
 
				- *     same way as 2.2 or earlier versions
			
 
				- *
			
 
				- * Author: liang@whamcloud.com
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_CPU_H__
			
 
				-#define __LIBCFS_CPU_H__
			
 
				-
			
 
				-#include <linux/cpu.h>
			
 
				-#include <linux/cpuset.h>
			
 
				-#include <linux/topology.h>
			
 
				-
			
 
				-/* any CPU partition */
			
 
				-#define CFS_CPT_ANY		(-1)
			
 
				-
			
 
				-#ifdef CONFIG_SMP
			
 
				-/** virtual processing unit */
			
 
				-struct cfs_cpu_partition {
			
 
				-	/* CPUs mask for this partition */
			
 
				-	cpumask_var_t			cpt_cpumask;
			
 
				-	/* nodes mask for this partition */
			
 
				-	nodemask_t			*cpt_nodemask;
			
 
				-	/* spread rotor for NUMA allocator */
			
 
				-	unsigned int			cpt_spread_rotor;
			
 
				-};
			
 
				-
			
 
				-
			
 
				-/** descriptor for CPU partitions */
			
 
				-struct cfs_cpt_table {
			
 
				-	/* version, reserved for hotplug */
			
 
				-	unsigned int			ctb_version;
			
 
				-	/* spread rotor for NUMA allocator */
			
 
				-	unsigned int			ctb_spread_rotor;
			
 
				-	/* # of CPU partitions */
			
 
				-	unsigned int			ctb_nparts;
			
 
				-	/* partitions tables */
			
 
				-	struct cfs_cpu_partition	*ctb_parts;
			
 
				-	/* shadow HW CPU to CPU partition ID */
			
 
				-	int				*ctb_cpu2cpt;
			
 
				-	/* all cpus in this partition table */
			
 
				-	cpumask_var_t			ctb_cpumask;
			
 
				-	/* all nodes in this partition table */
			
 
				-	nodemask_t			*ctb_nodemask;
			
 
				-};
			
 
				-
			
 
				-extern struct cfs_cpt_table	*cfs_cpt_tab;
			
 
				-
			
 
				-/**
			
 
				- * return cpumask of CPU partition \a cpt
			
 
				- */
			
 
				-cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
			
 
				-/**
			
 
				- * print string information of cpt-table
			
 
				- */
			
 
				-int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
			
 
				-/**
			
 
				- * return total number of CPU partitions in \a cptab
			
 
				- */
			
 
				-int
			
 
				-cfs_cpt_number(struct cfs_cpt_table *cptab);
			
 
				-/**
			
 
				- * return number of HW cores or hyper-threadings in a CPU partition \a cpt
			
 
				- */
			
 
				-int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt);
			
 
				-/**
			
 
				- * is there any online CPU in CPU partition \a cpt
			
 
				- */
			
 
				-int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
			
 
				-/**
			
 
				- * return nodemask of CPU partition \a cpt
			
 
				- */
			
 
				-nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
			
 
				-/**
			
 
				- * shadow current HW processor ID to CPU-partition ID of \a cptab
			
 
				- */
			
 
				-int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap);
			
 
				-/**
			
 
				- * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab
			
 
				- */
			
 
				-int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
			
 
				-/**
			
 
				- * bind current thread on a CPU-partition \a cpt of \a cptab
			
 
				- */
			
 
				-int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
			
 
				-/**
			
 
				- * add \a cpu to CPU partition @cpt of \a cptab, return 1 for success,
			
 
				- * otherwise 0 is returned
			
 
				- */
			
 
				-int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
			
 
				-/**
			
 
				- * remove \a cpu from CPU partition \a cpt of \a cptab
			
 
				- */
			
 
				-void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
			
 
				-/**
			
 
				- * add all cpus in \a mask to CPU partition \a cpt
			
 
				- * return 1 if successfully set all CPUs, otherwise return 0
			
 
				- */
			
 
				-int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
			
 
				-			int cpt, cpumask_t *mask);
			
 
				-/**
			
 
				- * remove all cpus in \a mask from CPU partition \a cpt
			
 
				- */
			
 
				-void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
			
 
				-			   int cpt, cpumask_t *mask);
			
 
				-/**
			
 
				- * add all cpus in NUMA node \a node to CPU partition \a cpt
			
 
				- * return 1 if successfully set all CPUs, otherwise return 0
			
 
				- */
			
 
				-int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node);
			
 
				-/**
			
 
				- * remove all cpus in NUMA node \a node from CPU partition \a cpt
			
 
				- */
			
 
				-void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
			
 
				-
			
 
				-/**
			
 
				- * add all cpus in node mask \a mask to CPU partition \a cpt
			
 
				- * return 1 if successfully set all CPUs, otherwise return 0
			
 
				- */
			
 
				-int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
			
 
				-			 int cpt, nodemask_t *mask);
			
 
				-/**
			
 
				- * remove all cpus in node mask \a mask from CPU partition \a cpt
			
 
				- */
			
 
				-void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
			
 
				-			    int cpt, nodemask_t *mask);
			
 
				-/**
			
 
				- * unset all cpus for CPU partition \a cpt
			
 
				- */
			
 
				-void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
			
 
				-/**
			
 
				- * convert partition id \a cpt to numa node id, if there are more than one
			
 
				- * nodes in this partition, it might return a different node id each time.
			
 
				- */
			
 
				-int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
			
 
				-
			
 
				-/**
			
 
				- * return number of HTs in the same core of \a cpu
			
 
				- */
			
 
				-int cfs_cpu_ht_nsiblings(int cpu);
			
 
				-
			
 
				-int  cfs_cpu_init(void);
			
 
				-void cfs_cpu_fini(void);
			
 
				-
			
 
				-#else /* !CONFIG_SMP */
			
 
				-struct cfs_cpt_table;
			
 
				-#define cfs_cpt_tab ((struct cfs_cpt_table *)NULL)
			
 
				-
			
 
				-static inline cpumask_var_t *
			
 
				-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-static inline int
			
 
				-cfs_cpt_number(struct cfs_cpt_table *cptab)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline nodemask_t *
			
 
				-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpu_ht_nsiblings(int cpu)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_cpu_init(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline void cfs_cpu_fini(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-#endif /* CONFIG_SMP */
			
 
				-
			
 
				-/**
			
 
				- * destroy a CPU partition table
			
 
				- */
			
 
				-void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
			
 
				-/**
			
 
				- * create a cfs_cpt_table with \a ncpt number of partitions
			
 
				- */
			
 
				-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
			
 
				-
			
 
				-/*
			
 
				- * allocate per-cpu-partition data, returned value is an array of pointers,
			
 
				- * variable can be indexed by CPU ID.
			
 
				- *	cptab != NULL: size of array is number of CPU partitions
			
 
				- *	cptab == NULL: size of array is number of HW cores
			
 
				- */
			
 
				-void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
			
 
				-/*
			
 
				- * destroy per-cpu-partition variable
			
 
				- */
			
 
				-void cfs_percpt_free(void *vars);
			
 
				-int cfs_percpt_number(void *vars);
			
 
				-
			
 
				-#define cfs_percpt_for_each(var, i, vars)		\
			
 
				-	for (i = 0; i < cfs_percpt_number(vars) &&	\
			
 
				-		((var) = (vars)[i]) != NULL; i++)
			
 
				-
			
 
				-/*
			
 
				- * percpu partition lock
			
 
				- *
			
 
				- * There are some use-cases like this in Lustre:
			
 
				- * . each CPU partition has it's own private data which is frequently changed,
			
 
				- *   and mostly by the local CPU partition.
			
 
				- * . all CPU partitions share some global data, these data are rarely changed.
			
 
				- *
			
 
				- * LNet is typical example.
			
 
				- * CPU partition lock is designed for this kind of use-cases:
			
 
				- * . each CPU partition has it's own private lock
			
 
				- * . change on private data just needs to take the private lock
			
 
				- * . read on shared data just needs to take _any_ of private locks
			
 
				- * . change on shared data needs to take _all_ private locks,
			
 
				- *   which is slow and should be really rare.
			
 
				- */
			
 
				-enum {
			
 
				-	CFS_PERCPT_LOCK_EX	= -1,	/* negative */
			
 
				-};
			
 
				-
			
 
				-struct cfs_percpt_lock {
			
 
				-	/* cpu-partition-table for this lock */
			
 
				-	struct cfs_cpt_table     *pcl_cptab;
			
 
				-	/* exclusively locked */
			
 
				-	unsigned int		  pcl_locked;
			
 
				-	/* private lock table */
			
 
				-	spinlock_t		**pcl_locks;
			
 
				-};
			
 
				-
			
 
				-/* return number of private locks */
			
 
				-#define cfs_percpt_lock_num(pcl)	cfs_cpt_number(pcl->pcl_cptab)
			
 
				-
			
 
				-/*
			
 
				- * create a cpu-partition lock based on CPU partition table \a cptab,
			
 
				- * each private lock has extra \a psize bytes padding data
			
 
				- */
			
 
				-struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
			
 
				-					       struct lock_class_key *keys);
			
 
				-/* destroy a cpu-partition lock */
			
 
				-void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
			
 
				-
			
 
				-/* lock private lock \a index of \a pcl */
			
 
				-void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
			
 
				-
			
 
				-/* unlock private lock \a index of \a pcl */
			
 
				-void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
			
 
				-
			
 
				-#define CFS_PERCPT_LOCK_KEYS	256
			
 
				-
			
 
				-/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */
			
 
				-#define cfs_percpt_lock_alloc(cptab)					\
			
 
				-({									\
			
 
				-	static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS];	\
			
 
				-	struct cfs_percpt_lock *___lk;					\
			
 
				-									\
			
 
				-	if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS)		\
			
 
				-		___lk = cfs_percpt_lock_create(cptab, NULL);		\
			
 
				-	else								\
			
 
				-		___lk = cfs_percpt_lock_create(cptab, ___keys);		\
			
 
				-	___lk;								\
			
 
				-})
			
 
				-
			
 
				-/**
			
 
				- * iterate over all CPU partitions in \a cptab
			
 
				- */
			
 
				-#define cfs_cpt_for_each(i, cptab)	\
			
 
				-	for (i = 0; i < cfs_cpt_number(cptab); i++)
			
 
				-
			
 
				-#endif /* __LIBCFS_CPU_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
@@ -1,208 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/* GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				- * information or have any questions.
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Copyright 2012 Xyratex Technology Limited
			
 
				- */
			
 
				-
			
 
				-#ifndef _LIBCFS_CRYPTO_H
			
 
				-#define _LIBCFS_CRYPTO_H
			
 
				-
			
 
				-#include <linux/string.h>
			
 
				-struct page;
			
 
				-
			
 
				-struct cfs_crypto_hash_type {
			
 
				-	char		*cht_name;      /*< hash algorithm name, equal to
			
 
				-					 * format name for crypto api
			
 
				-					 */
			
 
				-	unsigned int    cht_key;	/*< init key by default (valid for
			
 
				-					 * 4 bytes context like crc32, adler
			
 
				-					 */
			
 
				-	unsigned int    cht_size;       /**< hash digest size */
			
 
				-};
			
 
				-
			
 
				-enum cfs_crypto_hash_alg {
			
 
				-	CFS_HASH_ALG_NULL       = 0,
			
 
				-	CFS_HASH_ALG_ADLER32,
			
 
				-	CFS_HASH_ALG_CRC32,
			
 
				-	CFS_HASH_ALG_MD5,
			
 
				-	CFS_HASH_ALG_SHA1,
			
 
				-	CFS_HASH_ALG_SHA256,
			
 
				-	CFS_HASH_ALG_SHA384,
			
 
				-	CFS_HASH_ALG_SHA512,
			
 
				-	CFS_HASH_ALG_CRC32C,
			
 
				-	CFS_HASH_ALG_MAX,
			
 
				-	CFS_HASH_ALG_UNKNOWN	= 0xff
			
 
				-};
			
 
				-
			
 
				-static struct cfs_crypto_hash_type hash_types[] = {
			
 
				-	[CFS_HASH_ALG_NULL] = {
			
 
				-		.cht_name	= "null",
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 0
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_ADLER32] = {
			
 
				-		.cht_name	= "adler32",
			
 
				-		.cht_key	= 1,
			
 
				-		.cht_size	= 4
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_CRC32] = {
			
 
				-		.cht_name	= "crc32",
			
 
				-		.cht_key	= ~0,
			
 
				-		.cht_size	= 4
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_CRC32C] = {
			
 
				-		.cht_name	= "crc32c",
			
 
				-		.cht_key	= ~0,
			
 
				-		.cht_size	= 4
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_MD5] = {
			
 
				-		.cht_name	= "md5",
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 16
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_SHA1] = {
			
 
				-		.cht_name	= "sha1",
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 20
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_SHA256] = {
			
 
				-		.cht_name	= "sha256",
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 32
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_SHA384] = {
			
 
				-		.cht_name	= "sha384",
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 48
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_SHA512] = {
			
 
				-		.cht_name	= "sha512",
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 64
			
 
				-	},
			
 
				-	[CFS_HASH_ALG_MAX] = {
			
 
				-		.cht_name	= NULL,
			
 
				-		.cht_key	= 0,
			
 
				-		.cht_size	= 64
			
 
				-	},
			
 
				-};
			
 
				-
			
 
				-/* Maximum size of hash_types[].cht_size */
			
 
				-#define CFS_CRYPTO_HASH_DIGESTSIZE_MAX	64
			
 
				-
			
 
				-/**
			
 
				- * Return hash algorithm information for the specified algorithm identifier
			
 
				- *
			
 
				- * Hash information includes algorithm name, initial seed, hash size.
			
 
				- *
			
 
				- * \retval	cfs_crypto_hash_type for valid ID (CFS_HASH_ALG_*)
			
 
				- * \retval	NULL for unknown algorithm identifier
			
 
				- */
			
 
				-static inline const struct cfs_crypto_hash_type *
			
 
				-cfs_crypto_hash_type(enum cfs_crypto_hash_alg hash_alg)
			
 
				-{
			
 
				-	struct cfs_crypto_hash_type *ht;
			
 
				-
			
 
				-	if (hash_alg < CFS_HASH_ALG_MAX) {
			
 
				-		ht = &hash_types[hash_alg];
			
 
				-		if (ht->cht_name)
			
 
				-			return ht;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Return hash name for hash algorithm identifier
			
 
				- *
			
 
				- * \param[in]	hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
			
 
				- *
			
 
				- * \retval	string name of known hash algorithm
			
 
				- * \retval	"unknown" if hash algorithm is unknown
			
 
				- */
			
 
				-static inline const char *
			
 
				-cfs_crypto_hash_name(enum cfs_crypto_hash_alg hash_alg)
			
 
				-{
			
 
				-	const struct cfs_crypto_hash_type *ht;
			
 
				-
			
 
				-	ht = cfs_crypto_hash_type(hash_alg);
			
 
				-	if (ht)
			
 
				-		return ht->cht_name;
			
 
				-	return "unknown";
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Return digest size for hash algorithm type
			
 
				- *
			
 
				- * \param[in]	hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
			
 
				- *
			
 
				- * \retval	hash algorithm digest size in bytes
			
 
				- * \retval	0 if hash algorithm type is unknown
			
 
				- */
			
 
				-static inline int cfs_crypto_hash_digestsize(enum cfs_crypto_hash_alg hash_alg)
			
 
				-{
			
 
				-	const struct cfs_crypto_hash_type *ht;
			
 
				-
			
 
				-	ht = cfs_crypto_hash_type(hash_alg);
			
 
				-	if (ht)
			
 
				-		return ht->cht_size;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Find hash algorithm ID for the specified algorithm name
			
 
				- *
			
 
				- * \retval	hash algorithm ID for valid ID (CFS_HASH_ALG_*)
			
 
				- * \retval	CFS_HASH_ALG_UNKNOWN for unknown algorithm name
			
 
				- */
			
 
				-static inline unsigned char cfs_crypto_hash_alg(const char *algname)
			
 
				-{
			
 
				-	enum cfs_crypto_hash_alg hash_alg;
			
 
				-
			
 
				-	for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
			
 
				-		if (!strcmp(hash_types[hash_alg].cht_name, algname))
			
 
				-			return hash_alg;
			
 
				-
			
 
				-	return CFS_HASH_ALG_UNKNOWN;
			
 
				-}
			
 
				-
			
 
				-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
			
 
				-			   const void *buf, unsigned int buf_len,
			
 
				-			   unsigned char *key, unsigned int key_len,
			
 
				-			   unsigned char *hash, unsigned int *hash_len);
			
 
				-
			
 
				-struct ahash_request *
			
 
				-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
			
 
				-		     unsigned char *key, unsigned int key_len);
			
 
				-int cfs_crypto_hash_update_page(struct ahash_request *desc,
			
 
				-				struct page *page, unsigned int offset,
			
 
				-				unsigned int len);
			
 
				-int cfs_crypto_hash_update(struct ahash_request *desc, const void *buf,
			
 
				-			   unsigned int buf_len);
			
 
				-int cfs_crypto_hash_final(struct ahash_request *desc,
			
 
				-			  unsigned char *hash, unsigned int *hash_len);
			
 
				-int cfs_crypto_register(void);
			
 
				-void cfs_crypto_unregister(void);
			
 
				-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg);
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -1,207 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_debug.h
			
 
				- *
			
 
				- * Debug messages and assertions
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_DEBUG_H__
			
 
				-#define __LIBCFS_DEBUG_H__
			
 
				-
			
 
				-#include <linux/limits.h>
			
 
				-#include <uapi/linux/lnet/libcfs_debug.h>
			
 
				-
			
 
				-/*
			
 
				- *  Debugging
			
 
				- */
			
 
				-extern unsigned int libcfs_subsystem_debug;
			
 
				-extern unsigned int libcfs_stack;
			
 
				-extern unsigned int libcfs_debug;
			
 
				-extern unsigned int libcfs_printk;
			
 
				-extern unsigned int libcfs_console_ratelimit;
			
 
				-extern unsigned int libcfs_console_max_delay;
			
 
				-extern unsigned int libcfs_console_min_delay;
			
 
				-extern unsigned int libcfs_console_backoff;
			
 
				-extern unsigned int libcfs_debug_binary;
			
 
				-extern char libcfs_debug_file_path_arr[PATH_MAX];
			
 
				-
			
 
				-int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
			
 
				-int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
			
 
				-
			
 
				-/* Has there been an LBUG? */
			
 
				-extern unsigned int libcfs_catastrophe;
			
 
				-extern unsigned int libcfs_panic_on_lbug;
			
 
				-
			
 
				-/* Enable debug-checks on stack size - except on x86_64 */
			
 
				-#if !defined(__x86_64__)
			
 
				-# ifdef __ia64__
			
 
				-#  define CDEBUG_STACK() (THREAD_SIZE -				 \
			
 
				-			  ((unsigned long)__builtin_dwarf_cfa() &       \
			
 
				-			   (THREAD_SIZE - 1)))
			
 
				-# else
			
 
				-#  define CDEBUG_STACK() (THREAD_SIZE -				 \
			
 
				-			  ((unsigned long)__builtin_frame_address(0) &  \
			
 
				-			   (THREAD_SIZE - 1)))
			
 
				-# endif /* __ia64__ */
			
 
				-
			
 
				-#define __CHECK_STACK(msgdata, mask, cdls)			      \
			
 
				-do {								    \
			
 
				-	if (unlikely(CDEBUG_STACK() > libcfs_stack)) {		  \
			
 
				-		LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL);   \
			
 
				-		libcfs_stack = CDEBUG_STACK();			  \
			
 
				-		libcfs_debug_msg(msgdata,			       \
			
 
				-				 "maximum lustre stack %lu\n",	  \
			
 
				-				 CDEBUG_STACK());		       \
			
 
				-		(msgdata)->msg_mask = mask;			     \
			
 
				-		(msgdata)->msg_cdls = cdls;			     \
			
 
				-		dump_stack();					   \
			
 
				-	      /*panic("LBUG");*/					\
			
 
				-	}							       \
			
 
				-} while (0)
			
 
				-#define CFS_CHECK_STACK(msgdata, mask, cdls)  __CHECK_STACK(msgdata, mask, cdls)
			
 
				-#else /* __x86_64__ */
			
 
				-#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while (0)
			
 
				-#define CDEBUG_STACK() (0L)
			
 
				-#endif /* __x86_64__ */
			
 
				-
			
 
				-#ifndef DEBUG_SUBSYSTEM
			
 
				-# define DEBUG_SUBSYSTEM S_UNDEFINED
			
 
				-#endif
			
 
				-
			
 
				-#define CDEBUG_DEFAULT_MAX_DELAY (600 * HZ)	 /* jiffies */
			
 
				-#define CDEBUG_DEFAULT_MIN_DELAY ((HZ + 1) / 2) /* jiffies */
			
 
				-#define CDEBUG_DEFAULT_BACKOFF   2
			
 
				-struct cfs_debug_limit_state {
			
 
				-	unsigned long   cdls_next;
			
 
				-	unsigned int cdls_delay;
			
 
				-	int	     cdls_count;
			
 
				-};
			
 
				-
			
 
				-struct libcfs_debug_msg_data {
			
 
				-	const char *msg_file;
			
 
				-	const char *msg_fn;
			
 
				-	int	    msg_subsys;
			
 
				-	int	    msg_line;
			
 
				-	int	    msg_mask;
			
 
				-	struct cfs_debug_limit_state *msg_cdls;
			
 
				-};
			
 
				-
			
 
				-#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls)		\
			
 
				-do {								\
			
 
				-	(data)->msg_subsys = DEBUG_SUBSYSTEM;			\
			
 
				-	(data)->msg_file   = __FILE__;				\
			
 
				-	(data)->msg_fn     = __func__;				\
			
 
				-	(data)->msg_line   = __LINE__;				\
			
 
				-	(data)->msg_cdls   = (cdls);				\
			
 
				-	(data)->msg_mask   = (mask);				\
			
 
				-} while (0)
			
 
				-
			
 
				-#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls)	\
			
 
				-	static struct libcfs_debug_msg_data dataname = {	\
			
 
				-	       .msg_subsys = DEBUG_SUBSYSTEM,			\
			
 
				-	       .msg_file   = __FILE__,				\
			
 
				-	       .msg_fn     = __func__,				\
			
 
				-	       .msg_line   = __LINE__,				\
			
 
				-	       .msg_cdls   = (cdls)	 };			\
			
 
				-	dataname.msg_mask   = (mask)
			
 
				-
			
 
				-/**
			
 
				- * Filters out logging messages based on mask and subsystem.
			
 
				- */
			
 
				-static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
			
 
				-{
			
 
				-	return mask & D_CANTMASK ||
			
 
				-		((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
			
 
				-}
			
 
				-
			
 
				-#define __CDEBUG(cdls, mask, format, ...)				\
			
 
				-do {									\
			
 
				-	static struct libcfs_debug_msg_data msgdata;			\
			
 
				-									\
			
 
				-	CFS_CHECK_STACK(&msgdata, mask, cdls);				\
			
 
				-									\
			
 
				-	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {			\
			
 
				-		LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls);	\
			
 
				-		libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__);	\
			
 
				-	}								\
			
 
				-} while (0)
			
 
				-
			
 
				-#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
			
 
				-
			
 
				-#define CDEBUG_LIMIT(mask, format, ...)					\
			
 
				-do {									\
			
 
				-	static struct cfs_debug_limit_state cdls;			\
			
 
				-									\
			
 
				-	__CDEBUG(&cdls, mask, format, ## __VA_ARGS__);			\
			
 
				-} while (0)
			
 
				-
			
 
				-/*
			
 
				- * Lustre Error Checksum: calculates checksum
			
 
				- * of Hex number by XORing the nybbles.
			
 
				- */
			
 
				-#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
			
 
				-			   ((hexnum) >> 8 & 0xf))
			
 
				-
			
 
				-#define CWARN(format, ...)	CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
			
 
				-#define CERROR(format, ...)	CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
			
 
				-#define CNETERR(format, a...)	CDEBUG_LIMIT(D_NETERROR, format, ## a)
			
 
				-#define CEMERG(format, ...)	CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
			
 
				-
			
 
				-#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
			
 
				-#define LCONSOLE_INFO(format, ...)  CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
			
 
				-#define LCONSOLE_WARN(format, ...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
			
 
				-#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
			
 
				-			   "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
			
 
				-#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
			
 
				-
			
 
				-#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
			
 
				-
			
 
				-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
			
 
				-		     const char *format1, ...)
			
 
				-	__printf(2, 3);
			
 
				-
			
 
				-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
			
 
				-		       const char *format1,
			
 
				-		       va_list args, const char *format2, ...)
			
 
				-	__printf(4, 5);
			
 
				-
			
 
				-/* other external symbols that tracefile provides: */
			
 
				-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
			
 
				-			    const char __user *usr_buffer, int usr_buffer_nob);
			
 
				-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
			
 
				-			     const char *knl_buffer, char *append);
			
 
				-
			
 
				-#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
			
 
				-
			
 
				-#endif	/* __LIBCFS_DEBUG_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -1,194 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Oracle Corporation, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef _LIBCFS_FAIL_H
			
 
				-#define _LIBCFS_FAIL_H
			
 
				-
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/wait.h>
			
 
				-
			
 
				-extern unsigned long cfs_fail_loc;
			
 
				-extern unsigned int cfs_fail_val;
			
 
				-extern int cfs_fail_err;
			
 
				-
			
 
				-extern wait_queue_head_t cfs_race_waitq;
			
 
				-extern int cfs_race_state;
			
 
				-
			
 
				-int __cfs_fail_check_set(u32 id, u32 value, int set);
			
 
				-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set);
			
 
				-
			
 
				-enum {
			
 
				-	CFS_FAIL_LOC_NOSET      = 0,
			
 
				-	CFS_FAIL_LOC_ORSET      = 1,
			
 
				-	CFS_FAIL_LOC_RESET      = 2,
			
 
				-	CFS_FAIL_LOC_VALUE      = 3
			
 
				-};
			
 
				-
			
 
				-/* Failure injection control */
			
 
				-#define CFS_FAIL_MASK_SYS    0x0000FF00
			
 
				-#define CFS_FAIL_MASK_LOC   (0x000000FF | CFS_FAIL_MASK_SYS)
			
 
				-
			
 
				-#define CFS_FAILED_BIT       30
			
 
				-/* CFS_FAILED is 0x40000000 */
			
 
				-#define CFS_FAILED		BIT(CFS_FAILED_BIT)
			
 
				-
			
 
				-#define CFS_FAIL_ONCE_BIT    31
			
 
				-/* CFS_FAIL_ONCE is 0x80000000 */
			
 
				-#define CFS_FAIL_ONCE		BIT(CFS_FAIL_ONCE_BIT)
			
 
				-
			
 
				-/* The following flags aren't made to be combined */
			
 
				-#define CFS_FAIL_SKIP	0x20000000 /* skip N times then fail */
			
 
				-#define CFS_FAIL_SOME	0x10000000 /* only fail N times */
			
 
				-#define CFS_FAIL_RAND	0x08000000 /* fail 1/N of the times */
			
 
				-#define CFS_FAIL_USR1	0x04000000 /* user flag */
			
 
				-
			
 
				-#define CFS_FAULT	0x02000000 /* match any CFS_FAULT_CHECK */
			
 
				-
			
 
				-static inline bool CFS_FAIL_PRECHECK(u32 id)
			
 
				-{
			
 
				-	return cfs_fail_loc &&
			
 
				-	       ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) ||
			
 
				-		(cfs_fail_loc & id & CFS_FAULT));
			
 
				-}
			
 
				-
			
 
				-static inline int cfs_fail_check_set(u32 id, u32 value,
			
 
				-				     int set, int quiet)
			
 
				-{
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	if (unlikely(CFS_FAIL_PRECHECK(id))) {
			
 
				-		ret = __cfs_fail_check_set(id, value, set);
			
 
				-		if (ret) {
			
 
				-			if (quiet) {
			
 
				-				CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n",
			
 
				-				       id, value);
			
 
				-			} else {
			
 
				-				LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n",
			
 
				-					      id, value);
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-/* If id hit cfs_fail_loc, return 1, otherwise return 0 */
			
 
				-#define CFS_FAIL_CHECK(id) \
			
 
				-	cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0)
			
 
				-#define CFS_FAIL_CHECK_QUIET(id) \
			
 
				-	cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
			
 
				-
			
 
				-/*
			
 
				- * If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
			
 
				- * otherwise return 0
			
 
				- */
			
 
				-#define CFS_FAIL_CHECK_VALUE(id, value) \
			
 
				-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
			
 
				-#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
			
 
				-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
			
 
				-
			
 
				-/*
			
 
				- * If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
			
 
				- * otherwise return 0
			
 
				- */
			
 
				-#define CFS_FAIL_CHECK_ORSET(id, value) \
			
 
				-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
			
 
				-#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
			
 
				-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
			
 
				-
			
 
				-/*
			
 
				- * If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
			
 
				- * otherwise return 0
			
 
				- */
			
 
				-#define CFS_FAIL_CHECK_RESET(id, value) \
			
 
				-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
			
 
				-#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
			
 
				-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
			
 
				-
			
 
				-static inline int cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
			
 
				-{
			
 
				-	if (unlikely(CFS_FAIL_PRECHECK(id)))
			
 
				-		return __cfs_fail_timeout_set(id, value, ms, set);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */
			
 
				-#define CFS_FAIL_TIMEOUT(id, secs) \
			
 
				-	cfs_fail_timeout_set(id, 0, (secs) * 1000, CFS_FAIL_LOC_NOSET)
			
 
				-
			
 
				-#define CFS_FAIL_TIMEOUT_MS(id, ms) \
			
 
				-	cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
			
 
				-
			
 
				-/*
			
 
				- * If id hit cfs_fail_loc, cfs_fail_loc |= value and
			
 
				- * sleep seconds or milliseconds
			
 
				- */
			
 
				-#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
			
 
				-	cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_ORSET)
			
 
				-
			
 
				-#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \
			
 
				-	cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_RESET)
			
 
				-
			
 
				-#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
			
 
				-	cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
			
 
				-
			
 
				-#define CFS_FAULT_CHECK(id)			\
			
 
				-	CFS_FAIL_CHECK(CFS_FAULT | (id))
			
 
				-
			
 
				-/*
			
 
				- * The idea here is to synchronise two threads to force a race. The
			
 
				- * first thread that calls this with a matching fail_loc is put to
			
 
				- * sleep. The next thread that calls with the same fail_loc wakes up
			
 
				- * the first and continues.
			
 
				- */
			
 
				-static inline void cfs_race(u32 id)
			
 
				-{
			
 
				-	if (CFS_FAIL_PRECHECK(id)) {
			
 
				-		if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
			
 
				-			int rc;
			
 
				-
			
 
				-			cfs_race_state = 0;
			
 
				-			CERROR("cfs_race id %x sleeping\n", id);
			
 
				-			rc = wait_event_interruptible(cfs_race_waitq,
			
 
				-						      !!cfs_race_state);
			
 
				-			CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
			
 
				-		} else {
			
 
				-			CERROR("cfs_fail_race id %x waking\n", id);
			
 
				-			cfs_race_state = 1;
			
 
				-			wake_up(&cfs_race_waitq);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#define CFS_RACE(id) cfs_race(id)
			
 
				-
			
 
				-#endif /* _LIBCFS_FAIL_H */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
@@ -1,869 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015 Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_hash.h
			
 
				- *
			
 
				- * Hashing routines
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_HASH_H__
			
 
				-#define __LIBCFS_HASH_H__
			
 
				-
			
 
				-#include <linux/hash.h>
			
 
				-#include <linux/spinlock.h>
			
 
				-#include <linux/workqueue.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-
			
 
				-/*
			
 
				- * Knuth recommends primes in approximately golden ratio to the maximum
			
 
				- * integer representable by a machine word for multiplicative hashing.
			
 
				- * Chuck Lever verified the effectiveness of this technique:
			
 
				- * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
			
 
				- *
			
 
				- * These primes are chosen to be bit-sparse, that is operations on
			
 
				- * them can use shifts and additions instead of multiplications for
			
 
				- * machines where multiplications are slow.
			
 
				- */
			
 
				-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
			
 
				-#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
			
 
				-/*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
			
 
				-#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
			
 
				-
			
 
				-/** disable debug */
			
 
				-#define CFS_HASH_DEBUG_NONE	0
			
 
				-/*
			
 
				- * record hash depth and output to console when it's too deep,
			
 
				- * computing overhead is low but consume more memory
			
 
				- */
			
 
				-#define CFS_HASH_DEBUG_1	1
			
 
				-/** expensive, check key validation */
			
 
				-#define CFS_HASH_DEBUG_2	2
			
 
				-
			
 
				-#define CFS_HASH_DEBUG_LEVEL	CFS_HASH_DEBUG_NONE
			
 
				-
			
 
				-struct cfs_hash_ops;
			
 
				-struct cfs_hash_lock_ops;
			
 
				-struct cfs_hash_hlist_ops;
			
 
				-
			
 
				-union cfs_hash_lock {
			
 
				-	rwlock_t		rw;		/**< rwlock */
			
 
				-	spinlock_t		spin;		/**< spinlock */
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * cfs_hash_bucket is a container of:
			
 
				- * - lock, counter ...
			
 
				- * - array of hash-head starting from hsb_head[0], hash-head can be one of
			
 
				- *   . struct cfs_hash_head
			
 
				- *   . struct cfs_hash_head_dep
			
 
				- *   . struct cfs_hash_dhead
			
 
				- *   . struct cfs_hash_dhead_dep
			
 
				- *   which depends on requirement of user
			
 
				- * - some extra bytes (caller can require it while creating hash)
			
 
				- */
			
 
				-struct cfs_hash_bucket {
			
 
				-	union cfs_hash_lock	hsb_lock;	/**< bucket lock */
			
 
				-	u32			hsb_count;	/**< current entries */
			
 
				-	u32			hsb_version;	/**< change version */
			
 
				-	unsigned int		hsb_index;	/**< index of bucket */
			
 
				-	int			hsb_depmax;	/**< max depth on bucket */
			
 
				-	long			hsb_head[0];	/**< hash-head array */
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * cfs_hash bucket descriptor, it's normally in stack of caller
			
 
				- */
			
 
				-struct cfs_hash_bd {
			
 
				-	/* address of bucket */
			
 
				-	struct cfs_hash_bucket	*bd_bucket;
			
 
				-	/* offset in bucket */
			
 
				-	unsigned int		 bd_offset;
			
 
				-};
			
 
				-
			
 
				-#define CFS_HASH_NAME_LEN	16	/**< default name length */
			
 
				-#define CFS_HASH_BIGNAME_LEN	64	/**< bigname for param tree */
			
 
				-
			
 
				-#define CFS_HASH_BKT_BITS	3	/**< default bits of bucket */
			
 
				-#define CFS_HASH_BITS_MAX	30	/**< max bits of bucket */
			
 
				-#define CFS_HASH_BITS_MIN	CFS_HASH_BKT_BITS
			
 
				-
			
 
				-/**
			
 
				- * common hash attributes.
			
 
				- */
			
 
				-enum cfs_hash_tag {
			
 
				-	/**
			
 
				-	 * don't need any lock, caller will protect operations with it's
			
 
				-	 * own lock. With this flag:
			
 
				-	 *  . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
			
 
				-	 *    will be ignored.
			
 
				-	 *  . Some functions will be disabled with this flag, i.e:
			
 
				-	 *    cfs_hash_for_each_empty, cfs_hash_rehash
			
 
				-	 */
			
 
				-	CFS_HASH_NO_LOCK	= BIT(0),
			
 
				-	/** no bucket lock, use one spinlock to protect the whole hash */
			
 
				-	CFS_HASH_NO_BKTLOCK	= BIT(1),
			
 
				-	/** rwlock to protect bucket */
			
 
				-	CFS_HASH_RW_BKTLOCK	= BIT(2),
			
 
				-	/** spinlock to protect bucket */
			
 
				-	CFS_HASH_SPIN_BKTLOCK	= BIT(3),
			
 
				-	/** always add new item to tail */
			
 
				-	CFS_HASH_ADD_TAIL	= BIT(4),
			
 
				-	/** hash-table doesn't have refcount on item */
			
 
				-	CFS_HASH_NO_ITEMREF	= BIT(5),
			
 
				-	/** big name for param-tree */
			
 
				-	CFS_HASH_BIGNAME	= BIT(6),
			
 
				-	/** track global count */
			
 
				-	CFS_HASH_COUNTER	= BIT(7),
			
 
				-	/** rehash item by new key */
			
 
				-	CFS_HASH_REHASH_KEY	= BIT(8),
			
 
				-	/** Enable dynamic hash resizing */
			
 
				-	CFS_HASH_REHASH		= BIT(9),
			
 
				-	/** can shrink hash-size */
			
 
				-	CFS_HASH_SHRINK		= BIT(10),
			
 
				-	/** assert hash is empty on exit */
			
 
				-	CFS_HASH_ASSERT_EMPTY	= BIT(11),
			
 
				-	/** record hlist depth */
			
 
				-	CFS_HASH_DEPTH		= BIT(12),
			
 
				-	/**
			
 
				-	 * rehash is always scheduled in a different thread, so current
			
 
				-	 * change on hash table is non-blocking
			
 
				-	 */
			
 
				-	CFS_HASH_NBLK_CHANGE	= BIT(13),
			
 
				-	/**
			
 
				-	 * NB, we typed hs_flags as  u16, please change it
			
 
				-	 * if you need to extend >=16 flags
			
 
				-	 */
			
 
				-};
			
 
				-
			
 
				-/** most used attributes */
			
 
				-#define CFS_HASH_DEFAULT	(CFS_HASH_RW_BKTLOCK | \
			
 
				-				 CFS_HASH_COUNTER | CFS_HASH_REHASH)
			
 
				-
			
 
				-/**
			
 
				- * cfs_hash is a hash-table implementation for general purpose, it can support:
			
 
				- *    . two refcount modes
			
 
				- *      hash-table with & without refcount
			
 
				- *    . four lock modes
			
 
				- *      nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
			
 
				- *    . general operations
			
 
				- *      lookup, add(add_tail or add_head), delete
			
 
				- *    . rehash
			
 
				- *      grows or shrink
			
 
				- *    . iteration
			
 
				- *      locked iteration and unlocked iteration
			
 
				- *    . bigname
			
 
				- *      support long name hash
			
 
				- *    . debug
			
 
				- *      trace max searching depth
			
 
				- *
			
 
				- * Rehash:
			
 
				- * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
			
 
				- * is spawned to handle the rehash in the background, it's possible that other
			
 
				- * processes can concurrently perform additions, deletions, and lookups
			
 
				- * without being blocked on rehash completion, because rehash will release
			
 
				- * the global wrlock for each bucket.
			
 
				- *
			
 
				- * rehash and iteration can't run at the same time because it's too tricky
			
 
				- * to keep both of them safe and correct.
			
 
				- * As they are relatively rare operations, so:
			
 
				- *   . if iteration is in progress while we try to launch rehash, then
			
 
				- *     it just giveup, iterator will launch rehash at the end.
			
 
				- *   . if rehash is in progress while we try to iterate the hash table,
			
 
				- *     then we just wait (shouldn't be very long time), anyway, nobody
			
 
				- *     should expect iteration of whole hash-table to be non-blocking.
			
 
				- *
			
 
				- * During rehashing, a (key,object) pair may be in one of two buckets,
			
 
				- * depending on whether the worker task has yet to transfer the object
			
 
				- * to its new location in the table. Lookups and deletions need to search both
			
 
				- * locations; additions must take care to only insert into the new bucket.
			
 
				- */
			
 
				-
			
 
				-struct cfs_hash {
			
 
				-	/**
			
 
				-	 * serialize with rehash, or serialize all operations if
			
 
				-	 * the hash-table has CFS_HASH_NO_BKTLOCK
			
 
				-	 */
			
 
				-	union cfs_hash_lock		hs_lock;
			
 
				-	/** hash operations */
			
 
				-	struct cfs_hash_ops		*hs_ops;
			
 
				-	/** hash lock operations */
			
 
				-	struct cfs_hash_lock_ops	*hs_lops;
			
 
				-	/** hash list operations */
			
 
				-	struct cfs_hash_hlist_ops	*hs_hops;
			
 
				-	/** hash buckets-table */
			
 
				-	struct cfs_hash_bucket		**hs_buckets;
			
 
				-	/** total number of items on this hash-table */
			
 
				-	atomic_t			hs_count;
			
 
				-	/** hash flags, see cfs_hash_tag for detail */
			
 
				-	u16				hs_flags;
			
 
				-	/** # of extra-bytes for bucket, for user saving extended attributes */
			
 
				-	u16				hs_extra_bytes;
			
 
				-	/** wants to iterate */
			
 
				-	u8				hs_iterating;
			
 
				-	/** hash-table is dying */
			
 
				-	u8				hs_exiting;
			
 
				-	/** current hash bits */
			
 
				-	u8				hs_cur_bits;
			
 
				-	/** min hash bits */
			
 
				-	u8				hs_min_bits;
			
 
				-	/** max hash bits */
			
 
				-	u8				hs_max_bits;
			
 
				-	/** bits for rehash */
			
 
				-	u8				hs_rehash_bits;
			
 
				-	/** bits for each bucket */
			
 
				-	u8				hs_bkt_bits;
			
 
				-	/** resize min threshold */
			
 
				-	u16				hs_min_theta;
			
 
				-	/** resize max threshold */
			
 
				-	u16				hs_max_theta;
			
 
				-	/** resize count */
			
 
				-	u32				hs_rehash_count;
			
 
				-	/** # of iterators (caller of cfs_hash_for_each_*) */
			
 
				-	u32				hs_iterators;
			
 
				-	/** rehash workitem */
			
 
				-	struct work_struct		hs_rehash_work;
			
 
				-	/** refcount on this hash table */
			
 
				-	atomic_t			hs_refcount;
			
 
				-	/** rehash buckets-table */
			
 
				-	struct cfs_hash_bucket		**hs_rehash_buckets;
			
 
				-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
			
 
				-	/** serialize debug members */
			
 
				-	spinlock_t			hs_dep_lock;
			
 
				-	/** max depth */
			
 
				-	unsigned int			hs_dep_max;
			
 
				-	/** id of the deepest bucket */
			
 
				-	unsigned int			hs_dep_bkt;
			
 
				-	/** offset in the deepest bucket */
			
 
				-	unsigned int			hs_dep_off;
			
 
				-	/** bits when we found the max depth */
			
 
				-	unsigned int			hs_dep_bits;
			
 
				-	/** workitem to output max depth */
			
 
				-	struct work_struct		hs_dep_work;
			
 
				-#endif
			
 
				-	/** name of htable */
			
 
				-	char				hs_name[0];
			
 
				-};
			
 
				-
			
 
				-struct cfs_hash_lock_ops {
			
 
				-	/** lock the hash table */
			
 
				-	void    (*hs_lock)(union cfs_hash_lock *lock, int exclusive);
			
 
				-	/** unlock the hash table */
			
 
				-	void    (*hs_unlock)(union cfs_hash_lock *lock, int exclusive);
			
 
				-	/** lock the hash bucket */
			
 
				-	void    (*hs_bkt_lock)(union cfs_hash_lock *lock, int exclusive);
			
 
				-	/** unlock the hash bucket */
			
 
				-	void    (*hs_bkt_unlock)(union cfs_hash_lock *lock, int exclusive);
			
 
				-};
			
 
				-
			
 
				-struct cfs_hash_hlist_ops {
			
 
				-	/** return hlist_head of hash-head of @bd */
			
 
				-	struct hlist_head *(*hop_hhead)(struct cfs_hash *hs,
			
 
				-					struct cfs_hash_bd *bd);
			
 
				-	/** return hash-head size */
			
 
				-	int (*hop_hhead_size)(struct cfs_hash *hs);
			
 
				-	/** add @hnode to hash-head of @bd */
			
 
				-	int (*hop_hnode_add)(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			     struct hlist_node *hnode);
			
 
				-	/** remove @hnode from hash-head of @bd */
			
 
				-	int (*hop_hnode_del)(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			     struct hlist_node *hnode);
			
 
				-};
			
 
				-
			
 
				-struct cfs_hash_ops {
			
 
				-	/** return hashed value from @key */
			
 
				-	unsigned int (*hs_hash)(struct cfs_hash *hs, const void *key,
			
 
				-				unsigned int mask);
			
 
				-	/** return key address of @hnode */
			
 
				-	void *   (*hs_key)(struct hlist_node *hnode);
			
 
				-	/** copy key from @hnode to @key */
			
 
				-	void     (*hs_keycpy)(struct hlist_node *hnode, void *key);
			
 
				-	/**
			
 
				-	 *  compare @key with key of @hnode
			
 
				-	 *  returns 1 on a match
			
 
				-	 */
			
 
				-	int      (*hs_keycmp)(const void *key, struct hlist_node *hnode);
			
 
				-	/** return object address of @hnode, i.e: container_of(...hnode) */
			
 
				-	void *   (*hs_object)(struct hlist_node *hnode);
			
 
				-	/** get refcount of item, always called with holding bucket-lock */
			
 
				-	void     (*hs_get)(struct cfs_hash *hs, struct hlist_node *hnode);
			
 
				-	/** release refcount of item */
			
 
				-	void     (*hs_put)(struct cfs_hash *hs, struct hlist_node *hnode);
			
 
				-	/** release refcount of item, always called with holding bucket-lock */
			
 
				-	void     (*hs_put_locked)(struct cfs_hash *hs,
			
 
				-				  struct hlist_node *hnode);
			
 
				-	/** it's called before removing of @hnode */
			
 
				-	void     (*hs_exit)(struct cfs_hash *hs, struct hlist_node *hnode);
			
 
				-};
			
 
				-
			
 
				-/** total number of buckets in @hs */
			
 
				-#define CFS_HASH_NBKT(hs)	\
			
 
				-	BIT((hs)->hs_cur_bits - (hs)->hs_bkt_bits)
			
 
				-
			
 
				-/** total number of buckets in @hs while rehashing */
			
 
				-#define CFS_HASH_RH_NBKT(hs)	\
			
 
				-	BIT((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)
			
 
				-
			
 
				-/** number of hlist for in bucket */
			
 
				-#define CFS_HASH_BKT_NHLIST(hs)	BIT((hs)->hs_bkt_bits)
			
 
				-
			
 
				-/** total number of hlist in @hs */
			
 
				-#define CFS_HASH_NHLIST(hs)	BIT((hs)->hs_cur_bits)
			
 
				-
			
 
				-/** total number of hlist in @hs while rehashing */
			
 
				-#define CFS_HASH_RH_NHLIST(hs)	BIT((hs)->hs_rehash_bits)
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_no_lock(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* caller will serialize all operations for this hash-table */
			
 
				-	return hs->hs_flags & CFS_HASH_NO_LOCK;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_no_bktlock(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* no bucket lock, one single lock to protect the hash-table */
			
 
				-	return hs->hs_flags & CFS_HASH_NO_BKTLOCK;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_rw_bktlock(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* rwlock to protect hash bucket */
			
 
				-	return hs->hs_flags & CFS_HASH_RW_BKTLOCK;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_spin_bktlock(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* spinlock to protect hash bucket */
			
 
				-	return hs->hs_flags & CFS_HASH_SPIN_BKTLOCK;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_add_tail(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_ADD_TAIL;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_no_itemref(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/*
			
 
				-	 * hash-table doesn't keep refcount on item,
			
 
				-	 * item can't be removed from hash unless it's
			
 
				-	 * ZERO refcount
			
 
				-	 */
			
 
				-	return hs->hs_flags & CFS_HASH_NO_ITEMREF;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_bigname(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_BIGNAME;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_counter(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_COUNTER;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_rehash(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_REHASH;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_rehash_key(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_REHASH_KEY;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_shrink(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_SHRINK;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_assert_empty(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_ASSERT_EMPTY;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_depth(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_DEPTH;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_with_nblk_change(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return hs->hs_flags & CFS_HASH_NBLK_CHANGE;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_is_exiting(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* cfs_hash_destroy is called */
			
 
				-	return hs->hs_exiting;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_is_rehashing(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* rehash is launched */
			
 
				-	return !!hs->hs_rehash_bits;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_is_iterating(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* someone is calling cfs_hash_for_each_* */
			
 
				-	return hs->hs_iterating || hs->hs_iterators;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_bkt_size(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return offsetof(struct cfs_hash_bucket, hsb_head[0]) +
			
 
				-	       hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
			
 
				-	       hs->hs_extra_bytes;
			
 
				-}
			
 
				-
			
 
				-static inline unsigned
			
 
				-cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned int mask)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_hash(hs, key, mask);
			
 
				-}
			
 
				-
			
 
				-static inline void *
			
 
				-cfs_hash_key(struct cfs_hash *hs, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_key(hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_keycpy(struct cfs_hash *hs, struct hlist_node *hnode, void *key)
			
 
				-{
			
 
				-	if (hs->hs_ops->hs_keycpy)
			
 
				-		hs->hs_ops->hs_keycpy(hnode, key);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Returns 1 on a match,
			
 
				- */
			
 
				-static inline int
			
 
				-cfs_hash_keycmp(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_keycmp(key, hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void *
			
 
				-cfs_hash_object(struct cfs_hash *hs, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_object(hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_get(struct cfs_hash *hs, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_get(hs, hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_put_locked(hs, hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_put(struct cfs_hash *hs, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return hs->hs_ops->hs_put(hs, hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_exit(struct cfs_hash *hs, struct hlist_node *hnode)
			
 
				-{
			
 
				-	if (hs->hs_ops->hs_exit)
			
 
				-		hs->hs_ops->hs_exit(hs, hnode);
			
 
				-}
			
 
				-
			
 
				-static inline void cfs_hash_lock(struct cfs_hash *hs, int excl)
			
 
				-{
			
 
				-	hs->hs_lops->hs_lock(&hs->hs_lock, excl);
			
 
				-}
			
 
				-
			
 
				-static inline void cfs_hash_unlock(struct cfs_hash *hs, int excl)
			
 
				-{
			
 
				-	hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
			
 
				-}
			
 
				-
			
 
				-static inline int cfs_hash_dec_and_lock(struct cfs_hash *hs,
			
 
				-					atomic_t *condition)
			
 
				-{
			
 
				-	LASSERT(cfs_hash_with_no_bktlock(hs));
			
 
				-	return atomic_dec_and_lock(condition, &hs->hs_lock.spin);
			
 
				-}
			
 
				-
			
 
				-static inline void cfs_hash_bd_lock(struct cfs_hash *hs,
			
 
				-				    struct cfs_hash_bd *bd, int excl)
			
 
				-{
			
 
				-	hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
			
 
				-}
			
 
				-
			
 
				-static inline void cfs_hash_bd_unlock(struct cfs_hash *hs,
			
 
				-				      struct cfs_hash_bd *bd, int excl)
			
 
				-{
			
 
				-	hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * operations on cfs_hash bucket (bd: bucket descriptor),
			
 
				- * they are normally for hash-table without rehash
			
 
				- */
			
 
				-void cfs_hash_bd_get(struct cfs_hash *hs, const void *key,
			
 
				-		     struct cfs_hash_bd *bd);
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_bd_get_and_lock(struct cfs_hash *hs, const void *key,
			
 
				-			 struct cfs_hash_bd *bd, int excl)
			
 
				-{
			
 
				-	cfs_hash_bd_get(hs, key, bd);
			
 
				-	cfs_hash_bd_lock(hs, bd, excl);
			
 
				-}
			
 
				-
			
 
				-static inline unsigned
			
 
				-cfs_hash_bd_index_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned int index,
			
 
				-		      struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
			
 
				-	bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
			
 
				-}
			
 
				-
			
 
				-static inline void *
			
 
				-cfs_hash_bd_extra_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	return (void *)bd->bd_bucket +
			
 
				-	       cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
			
 
				-}
			
 
				-
			
 
				-static inline u32
			
 
				-cfs_hash_bd_version_get(struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	/* need hold cfs_hash_bd_lock */
			
 
				-	return bd->bd_bucket->hsb_version;
			
 
				-}
			
 
				-
			
 
				-static inline u32
			
 
				-cfs_hash_bd_count_get(struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	/* need hold cfs_hash_bd_lock */
			
 
				-	return bd->bd_bucket->hsb_count;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_bd_depmax_get(struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	return bd->bd_bucket->hsb_depmax;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_bd_compare(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
			
 
				-{
			
 
				-	if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
			
 
				-		return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
			
 
				-
			
 
				-	if (bd1->bd_offset != bd2->bd_offset)
			
 
				-		return bd1->bd_offset - bd2->bd_offset;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			    struct hlist_node *hnode);
			
 
				-void cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			    struct hlist_node *hnode);
			
 
				-void cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
			
 
				-			     struct cfs_hash_bd *bd_new,
			
 
				-			     struct hlist_node *hnode);
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_bd_dec_and_lock(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			 atomic_t *condition)
			
 
				-{
			
 
				-	LASSERT(cfs_hash_with_spin_bktlock(hs));
			
 
				-	return atomic_dec_and_lock(condition, &bd->bd_bucket->hsb_lock.spin);
			
 
				-}
			
 
				-
			
 
				-static inline struct hlist_head *
			
 
				-cfs_hash_bd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	return hs->hs_hops->hop_hhead(hs, bd);
			
 
				-}
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			  const void *key);
			
 
				-struct hlist_node *
			
 
				-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			const void *key);
			
 
				-
			
 
				-/**
			
 
				- * operations on cfs_hash bucket (bd: bucket descriptor),
			
 
				- * they are safe for hash-table with rehash
			
 
				- */
			
 
				-void cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
			
 
				-			  struct cfs_hash_bd *bds);
			
 
				-void cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-			   int excl);
			
 
				-void cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-			     int excl);
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_dual_bd_get_and_lock(struct cfs_hash *hs, const void *key,
			
 
				-			      struct cfs_hash_bd *bds, int excl)
			
 
				-{
			
 
				-	cfs_hash_dual_bd_get(hs, key, bds);
			
 
				-	cfs_hash_dual_bd_lock(hs, bds, excl);
			
 
				-}
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-			       const void *key);
			
 
				-struct hlist_node *
			
 
				-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				const void *key, struct hlist_node *hnode,
			
 
				-				int insist_add);
			
 
				-struct hlist_node *
			
 
				-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				const void *key, struct hlist_node *hnode);
			
 
				-
			
 
				-/* Hash init/cleanup functions */
			
 
				-struct cfs_hash *
			
 
				-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
			
 
				-		unsigned int bkt_bits, unsigned int extra_bytes,
			
 
				-		unsigned int min_theta, unsigned int max_theta,
			
 
				-		struct cfs_hash_ops *ops, unsigned int flags);
			
 
				-
			
 
				-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs);
			
 
				-void cfs_hash_putref(struct cfs_hash *hs);
			
 
				-
			
 
				-/* Hash addition functions */
			
 
				-void cfs_hash_add(struct cfs_hash *hs, const void *key,
			
 
				-		  struct hlist_node *hnode);
			
 
				-int cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
			
 
				-			struct hlist_node *hnode);
			
 
				-void *cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
			
 
				-			      struct hlist_node *hnode);
			
 
				-
			
 
				-/* Hash deletion functions */
			
 
				-void *cfs_hash_del(struct cfs_hash *hs, const void *key,
			
 
				-		   struct hlist_node *hnode);
			
 
				-void *cfs_hash_del_key(struct cfs_hash *hs, const void *key);
			
 
				-
			
 
				-/* Hash lookup/for_each functions */
			
 
				-#define CFS_HASH_LOOP_HOG       1024
			
 
				-
			
 
				-typedef int (*cfs_hash_for_each_cb_t)(struct cfs_hash *hs,
			
 
				-				      struct cfs_hash_bd *bd,
			
 
				-				      struct hlist_node *node,
			
 
				-				      void *data);
			
 
				-void *
			
 
				-cfs_hash_lookup(struct cfs_hash *hs, const void *key);
			
 
				-void
			
 
				-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data);
			
 
				-void
			
 
				-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
			
 
				-		       void *data);
			
 
				-int
			
 
				-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
			
 
				-			 void *data, int start);
			
 
				-int
			
 
				-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
			
 
				-			void *data);
			
 
				-void
			
 
				-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
			
 
				-		      cfs_hash_for_each_cb_t cb, void *data);
			
 
				-typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
			
 
				-void
			
 
				-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t cb, void *data);
			
 
				-
			
 
				-void
			
 
				-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
			
 
				-			cfs_hash_for_each_cb_t cb, void *data);
			
 
				-int  cfs_hash_is_empty(struct cfs_hash *hs);
			
 
				-u64 cfs_hash_size_get(struct cfs_hash *hs);
			
 
				-
			
 
				-/*
			
 
				- * Rehash - Theta is calculated to be the average chained
			
 
				- * hash depth assuming a perfectly uniform hash function.
			
 
				- */
			
 
				-void cfs_hash_rehash_cancel_locked(struct cfs_hash *hs);
			
 
				-void cfs_hash_rehash_cancel(struct cfs_hash *hs);
			
 
				-void cfs_hash_rehash(struct cfs_hash *hs, int do_rehash);
			
 
				-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
			
 
				-			 void *new_key, struct hlist_node *hnode);
			
 
				-
			
 
				-#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
			
 
				-/* Validate hnode references the correct key */
			
 
				-static inline void
			
 
				-cfs_hash_key_validate(struct cfs_hash *hs, const void *key,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	LASSERT(cfs_hash_keycmp(hs, key, hnode));
			
 
				-}
			
 
				-
			
 
				-/* Validate hnode is in the correct bucket */
			
 
				-static inline void
			
 
				-cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			 struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_bd bds[2];
			
 
				-
			
 
				-	cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
			
 
				-	LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
			
 
				-		bds[1].bd_bucket == bd->bd_bucket);
			
 
				-}
			
 
				-
			
 
				-#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_key_validate(struct cfs_hash *hs, const void *key,
			
 
				-		      struct hlist_node *hnode) {}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			 struct hlist_node *hnode) {}
			
 
				-
			
 
				-#endif /* CFS_HASH_DEBUG_LEVEL */
			
 
				-
			
 
				-#define CFS_HASH_THETA_BITS	10
			
 
				-#define CFS_HASH_MIN_THETA	BIT(CFS_HASH_THETA_BITS - 1)
			
 
				-#define CFS_HASH_MAX_THETA	BIT(CFS_HASH_THETA_BITS + 1)
			
 
				-
			
 
				-/* Return integer component of theta */
			
 
				-static inline int __cfs_hash_theta_int(int theta)
			
 
				-{
			
 
				-	return (theta >> CFS_HASH_THETA_BITS);
			
 
				-}
			
 
				-
			
 
				-/* Return a fractional value between 0 and 999 */
			
 
				-static inline int __cfs_hash_theta_frac(int theta)
			
 
				-{
			
 
				-	return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
			
 
				-	       (__cfs_hash_theta_int(theta) * 1000);
			
 
				-}
			
 
				-
			
 
				-static inline int __cfs_hash_theta(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return (atomic_read(&hs->hs_count) <<
			
 
				-		CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-__cfs_hash_set_theta(struct cfs_hash *hs, int min, int max)
			
 
				-{
			
 
				-	LASSERT(min < max);
			
 
				-	hs->hs_min_theta = (u16)min;
			
 
				-	hs->hs_max_theta = (u16)max;
			
 
				-}
			
 
				-
			
 
				-/* Generic debug formatting routines mainly for proc handler */
			
 
				-struct seq_file;
			
 
				-void cfs_hash_debug_header(struct seq_file *m);
			
 
				-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m);
			
 
				-
			
 
				-/*
			
 
				- * Generic djb2 hash algorithm for character arrays.
			
 
				- */
			
 
				-static inline unsigned
			
 
				-cfs_hash_djb2_hash(const void *key, size_t size, unsigned int mask)
			
 
				-{
			
 
				-	unsigned int i, hash = 5381;
			
 
				-
			
 
				-	LASSERT(key);
			
 
				-
			
 
				-	for (i = 0; i < size; i++)
			
 
				-		hash = hash * 33 + ((char *)key)[i];
			
 
				-
			
 
				-	return (hash & mask);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Generic u32 hash algorithm.
			
 
				- */
			
 
				-static inline unsigned
			
 
				-cfs_hash_u32_hash(const u32 key, unsigned int mask)
			
 
				-{
			
 
				-	return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Generic u64 hash algorithm.
			
 
				- */
			
 
				-static inline unsigned
			
 
				-cfs_hash_u64_hash(const u64 key, unsigned int mask)
			
 
				-{
			
 
				-	return ((unsigned int)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
			
 
				-}
			
 
				-
			
 
				-/** iterate over all buckets in @bds (array of struct cfs_hash_bd) */
			
 
				-#define cfs_hash_for_each_bd(bds, n, i)	\
			
 
				-	for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
			
 
				-
			
 
				-/** iterate over all buckets of @hs */
			
 
				-#define cfs_hash_for_each_bucket(hs, bd, pos)			\
			
 
				-	for (pos = 0;						\
			
 
				-	     pos < CFS_HASH_NBKT(hs) &&				\
			
 
				-	     ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
			
 
				-
			
 
				-/** iterate over all hlist of bucket @bd */
			
 
				-#define cfs_hash_bd_for_each_hlist(hs, bd, hlist)		\
			
 
				-	for ((bd)->bd_offset = 0;				\
			
 
				-	     (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) &&	\
			
 
				-	     (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL;	\
			
 
				-	     (bd)->bd_offset++)
			
 
				-
			
 
				-/* !__LIBCFS__HASH_H__ */
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -1,200 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_private.h
			
 
				- *
			
 
				- * Various defines for libcfs.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_PRIVATE_H__
			
 
				-#define __LIBCFS_PRIVATE_H__
			
 
				-
			
 
				-#ifndef DEBUG_SUBSYSTEM
			
 
				-# define DEBUG_SUBSYSTEM S_UNDEFINED
			
 
				-#endif
			
 
				-
			
 
				-#define LASSERTF(cond, fmt, ...)					\
			
 
				-do {									\
			
 
				-	if (unlikely(!(cond))) {					\
			
 
				-		LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL);	\
			
 
				-		libcfs_debug_msg(&__msg_data,				\
			
 
				-				 "ASSERTION( %s ) failed: " fmt, #cond,	\
			
 
				-				 ## __VA_ARGS__);			\
			
 
				-		lbug_with_loc(&__msg_data);				\
			
 
				-	}								\
			
 
				-} while (0)
			
 
				-
			
 
				-#define LASSERT(cond) LASSERTF(cond, "\n")
			
 
				-
			
 
				-#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
			
 
				-/**
			
 
				- * This is for more expensive checks that one doesn't want to be enabled all
			
 
				- * the time. LINVRNT() has to be explicitly enabled by
			
 
				- * CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK option.
			
 
				- */
			
 
				-# define LINVRNT(exp) LASSERT(exp)
			
 
				-#else
			
 
				-# define LINVRNT(exp) ((void)sizeof !!(exp))
			
 
				-#endif
			
 
				-
			
 
				-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msg);
			
 
				-
			
 
				-#define LBUG()							  \
			
 
				-do {								    \
			
 
				-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);	     \
			
 
				-	lbug_with_loc(&msgdata);					\
			
 
				-} while (0)
			
 
				-
			
 
				-/*
			
 
				- * Use #define rather than inline, as lnet_cpt_table() might
			
 
				- * not be defined yet
			
 
				- */
			
 
				-#define kmalloc_cpt(size, flags, cpt) \
			
 
				-	kmalloc_node(size, flags,  cfs_cpt_spread_node(lnet_cpt_table(), cpt))
			
 
				-
			
 
				-#define kzalloc_cpt(size, flags, cpt) \
			
 
				-	kmalloc_node(size, flags | __GFP_ZERO,				\
			
 
				-		     cfs_cpt_spread_node(lnet_cpt_table(), cpt))
			
 
				-
			
 
				-#define kvmalloc_cpt(size, flags, cpt) \
			
 
				-	kvmalloc_node(size, flags,					\
			
 
				-		      cfs_cpt_spread_node(lnet_cpt_table(), cpt))
			
 
				-
			
 
				-#define kvzalloc_cpt(size, flags, cpt) \
			
 
				-	kvmalloc_node(size, flags | __GFP_ZERO,				\
			
 
				-		      cfs_cpt_spread_node(lnet_cpt_table(), cpt))
			
 
				-
			
 
				-/******************************************************************************/
			
 
				-
			
 
				-void libcfs_debug_dumplog(void);
			
 
				-int libcfs_debug_init(unsigned long bufsize);
			
 
				-int libcfs_debug_cleanup(void);
			
 
				-int libcfs_debug_clear_buffer(void);
			
 
				-int libcfs_debug_mark_buffer(const char *text);
			
 
				-
			
 
				-/*
			
 
				- * allocate a variable array, returned value is an array of pointers.
			
 
				- * Caller can specify length of array by count.
			
 
				- */
			
 
				-void *cfs_array_alloc(int count, unsigned int size);
			
 
				-void  cfs_array_free(void *vars);
			
 
				-
			
 
				-#define LASSERT_ATOMIC_ENABLED	  (1)
			
 
				-
			
 
				-#if LASSERT_ATOMIC_ENABLED
			
 
				-
			
 
				-/** assert value of @a is equal to @v */
			
 
				-#define LASSERT_ATOMIC_EQ(a, v)			\
			
 
				-	LASSERTF(atomic_read(a) == v, "value: %d\n", atomic_read((a)))
			
 
				-
			
 
				-/** assert value of @a is unequal to @v */
			
 
				-#define LASSERT_ATOMIC_NE(a, v)		\
			
 
				-	LASSERTF(atomic_read(a) != v, "value: %d\n", atomic_read((a)))
			
 
				-
			
 
				-/** assert value of @a is little than @v */
			
 
				-#define LASSERT_ATOMIC_LT(a, v)		\
			
 
				-	LASSERTF(atomic_read(a) < v, "value: %d\n", atomic_read((a)))
			
 
				-
			
 
				-/** assert value of @a is little/equal to @v */
			
 
				-#define LASSERT_ATOMIC_LE(a, v)		\
			
 
				-	LASSERTF(atomic_read(a) <= v, "value: %d\n", atomic_read((a)))
			
 
				-
			
 
				-/** assert value of @a is great than @v */
			
 
				-#define LASSERT_ATOMIC_GT(a, v)		\
			
 
				-	LASSERTF(atomic_read(a) > v, "value: %d\n", atomic_read((a)))
			
 
				-
			
 
				-/** assert value of @a is great/equal to @v */
			
 
				-#define LASSERT_ATOMIC_GE(a, v)		\
			
 
				-	LASSERTF(atomic_read(a) >= v, "value: %d\n", atomic_read((a)))
			
 
				-
			
 
				-/** assert value of @a is great than @v1 and little than @v2 */
			
 
				-#define LASSERT_ATOMIC_GT_LT(a, v1, v2)			 \
			
 
				-do {							    \
			
 
				-	int __v = atomic_read(a);			   \
			
 
				-	LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v);     \
			
 
				-} while (0)
			
 
				-
			
 
				-/** assert value of @a is great than @v1 and little/equal to @v2 */
			
 
				-#define LASSERT_ATOMIC_GT_LE(a, v1, v2)			 \
			
 
				-do {							    \
			
 
				-	int __v = atomic_read(a);			   \
			
 
				-	LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v);    \
			
 
				-} while (0)
			
 
				-
			
 
				-/** assert value of @a is great/equal to @v1 and little than @v2 */
			
 
				-#define LASSERT_ATOMIC_GE_LT(a, v1, v2)			 \
			
 
				-do {							    \
			
 
				-	int __v = atomic_read(a);			   \
			
 
				-	LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v);    \
			
 
				-} while (0)
			
 
				-
			
 
				-/** assert value of @a is great/equal to @v1 and little/equal to @v2 */
			
 
				-#define LASSERT_ATOMIC_GE_LE(a, v1, v2)			 \
			
 
				-do {							    \
			
 
				-	int __v = atomic_read(a);			   \
			
 
				-	LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v);   \
			
 
				-} while (0)
			
 
				-
			
 
				-#else /* !LASSERT_ATOMIC_ENABLED */
			
 
				-
			
 
				-#define LASSERT_ATOMIC_EQ(a, v)		 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_NE(a, v)		 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_LT(a, v)		 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_LE(a, v)		 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_GT(a, v)		 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_GE(a, v)		 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_GT_LT(a, v1, v2)	 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_GT_LE(a, v1, v2)	 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_GE_LT(a, v1, v2)	 do {} while (0)
			
 
				-#define LASSERT_ATOMIC_GE_LE(a, v1, v2)	 do {} while (0)
			
 
				-
			
 
				-#endif /* LASSERT_ATOMIC_ENABLED */
			
 
				-
			
 
				-#define LASSERT_ATOMIC_ZERO(a)		  LASSERT_ATOMIC_EQ(a, 0)
			
 
				-#define LASSERT_ATOMIC_POS(a)		   LASSERT_ATOMIC_GT(a, 0)
			
 
				-
			
 
				-/* implication */
			
 
				-#define ergo(a, b) (!(a) || (b))
			
 
				-/* logical equivalence */
			
 
				-#define equi(a, b) (!!(a) == !!(b))
			
 
				-
			
 
				-#ifndef HAVE_CFS_SIZE_ROUND
			
 
				-static inline size_t cfs_size_round(int val)
			
 
				-{
			
 
				-	return round_up(val, 8);
			
 
				-}
			
 
				-
			
 
				-#define HAVE_CFS_SIZE_ROUND
			
 
				-#endif
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
@@ -1,102 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_string.h
			
 
				- *
			
 
				- * Generic string manipulation functions.
			
 
				- *
			
 
				- * Author: Nathan Rutman <nathan.rutman@sun.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_STRING_H__
			
 
				-#define __LIBCFS_STRING_H__
			
 
				-
			
 
				-#include <linux/mm.h>
			
 
				-
			
 
				-/* libcfs_string.c */
			
 
				-/* Convert a text string to a bitmask */
			
 
				-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
			
 
				-		 int *oldmask, int minmask, int allmask);
			
 
				-/* trim leading and trailing space characters */
			
 
				-char *cfs_firststr(char *str, size_t size);
			
 
				-
			
 
				-/**
			
 
				- * Structure to represent NULL-less strings.
			
 
				- */
			
 
				-struct cfs_lstr {
			
 
				-	char		*ls_str;
			
 
				-	int		ls_len;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Structure to represent \<range_expr\> token of the syntax.
			
 
				- */
			
 
				-struct cfs_range_expr {
			
 
				-	/*
			
 
				-	 * Link to cfs_expr_list::el_exprs.
			
 
				-	 */
			
 
				-	struct list_head	re_link;
			
 
				-	u32		re_lo;
			
 
				-	u32		re_hi;
			
 
				-	u32		re_stride;
			
 
				-};
			
 
				-
			
 
				-struct cfs_expr_list {
			
 
				-	struct list_head	el_link;
			
 
				-	struct list_head	el_exprs;
			
 
				-};
			
 
				-
			
 
				-int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
			
 
				-int cfs_str2num_check(char *str, int nob, unsigned int *num,
			
 
				-		      unsigned int min, unsigned int max);
			
 
				-int cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list);
			
 
				-int cfs_expr_list_print(char *buffer, int count,
			
 
				-			struct cfs_expr_list *expr_list);
			
 
				-int cfs_expr_list_values(struct cfs_expr_list *expr_list,
			
 
				-			 int max, u32 **values);
			
 
				-static inline void
			
 
				-cfs_expr_list_values_free(u32 *values, int num)
			
 
				-{
			
 
				-	/*
			
 
				-	 * This array is allocated by kvalloc(), so it shouldn't be freed
			
 
				-	 * by OBD_FREE() if it's called by module other than libcfs & LNet,
			
 
				-	 * otherwise we will see fake memory leak
			
 
				-	 */
			
 
				-	kvfree(values);
			
 
				-}
			
 
				-
			
 
				-void cfs_expr_list_free(struct cfs_expr_list *expr_list);
			
 
				-int cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
			
 
				-			struct cfs_expr_list **elpp);
			
 
				-void cfs_expr_list_free_list(struct list_head *list);
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/api.h
+++ b/drivers/staging/lustre/include/linux/lnet/api.h
@@ -1,212 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011 - 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef __LNET_API_H__
			
 
				-#define __LNET_API_H__
			
 
				-
			
 
				-/** \defgroup lnet LNet
			
 
				- *
			
 
				- * The Lustre Networking subsystem.
			
 
				- *
			
 
				- * LNet is an asynchronous message-passing API, which provides an unreliable
			
 
				- * connectionless service that can't guarantee any order. It supports OFA IB,
			
 
				- * TCP/IP, and Cray Interconnects, and routes between heterogeneous networks.
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-
			
 
				-/** \defgroup lnet_init_fini Initialization and cleanup
			
 
				- * The LNet must be properly initialized before any LNet calls can be made.
			
 
				- * @{
			
 
				- */
			
 
				-int LNetNIInit(lnet_pid_t requested_pid);
			
 
				-int LNetNIFini(void);
			
 
				-/** @} lnet_init_fini */
			
 
				-
			
 
				-/** \defgroup lnet_addr LNet addressing and basic types
			
 
				- *
			
 
				- * Addressing scheme and basic data types of LNet.
			
 
				- *
			
 
				- * The LNet API is memory-oriented, so LNet must be able to address not only
			
 
				- * end-points but also memory region within a process address space.
			
 
				- * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process
			
 
				- * in a node. A portal represents an opening in the address space of a
			
 
				- * process. Match bits is criteria to identify a region of memory inside a
			
 
				- * portal, and offset specifies an offset within the memory region.
			
 
				- *
			
 
				- * LNet creates a table of portals for each process during initialization.
			
 
				- * This table has MAX_PORTALS entries and its size can't be dynamically
			
 
				- * changed. A portal stays empty until the owning process starts to add
			
 
				- * memory regions to it. A portal is sometimes called an index because
			
 
				- * it's an entry in the portals table of a process.
			
 
				- *
			
 
				- * \see LNetMEAttach
			
 
				- * @{
			
 
				- */
			
 
				-int LNetGetId(unsigned int index, struct lnet_process_id *id);
			
 
				-int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
			
 
				-
			
 
				-/** @} lnet_addr */
			
 
				-
			
 
				-/** \defgroup lnet_me Match entries
			
 
				- *
			
 
				- * A match entry (abbreviated as ME) describes a set of criteria to accept
			
 
				- * incoming requests.
			
 
				- *
			
 
				- * A portal is essentially a match list plus a set of attributes. A match
			
 
				- * list is a chain of MEs. Each ME includes a pointer to a memory descriptor
			
 
				- * and a set of match criteria. The match criteria can be used to reject
			
 
				- * incoming requests based on process ID or the match bits provided in the
			
 
				- * request. MEs can be dynamically inserted into a match list by LNetMEAttach()
			
 
				- * and LNetMEInsert(), and removed from its list by LNetMEUnlink().
			
 
				- * @{
			
 
				- */
			
 
				-int LNetMEAttach(unsigned int      portal,
			
 
				-		 struct lnet_process_id match_id_in,
			
 
				-		 __u64		   match_bits_in,
			
 
				-		 __u64		   ignore_bits_in,
			
 
				-		 enum lnet_unlink unlink_in,
			
 
				-		 enum lnet_ins_pos pos_in,
			
 
				-		 struct lnet_handle_me *handle_out);
			
 
				-
			
 
				-int LNetMEInsert(struct lnet_handle_me current_in,
			
 
				-		 struct lnet_process_id match_id_in,
			
 
				-		 __u64		   match_bits_in,
			
 
				-		 __u64		   ignore_bits_in,
			
 
				-		 enum lnet_unlink unlink_in,
			
 
				-		 enum lnet_ins_pos position_in,
			
 
				-		 struct lnet_handle_me *handle_out);
			
 
				-
			
 
				-int LNetMEUnlink(struct lnet_handle_me current_in);
			
 
				-/** @} lnet_me */
			
 
				-
			
 
				-/** \defgroup lnet_md Memory descriptors
			
 
				- *
			
 
				- * A memory descriptor contains information about a region of a user's
			
 
				- * memory (either in kernel or user space) and optionally points to an
			
 
				- * event queue where information about the operations performed on the
			
 
				- * memory descriptor are recorded. Memory descriptor is abbreviated as
			
 
				- * MD and can be used interchangeably with the memory region it describes.
			
 
				- *
			
 
				- * The LNet API provides two operations to create MDs: LNetMDAttach()
			
 
				- * and LNetMDBind(); one operation to unlink and release the resources
			
 
				- * associated with a MD: LNetMDUnlink().
			
 
				- * @{
			
 
				- */
			
 
				-int LNetMDAttach(struct lnet_handle_me current_in,
			
 
				-		 struct lnet_md md_in,
			
 
				-		 enum lnet_unlink unlink_in,
			
 
				-		 struct lnet_handle_md *md_handle_out);
			
 
				-
			
 
				-int LNetMDBind(struct lnet_md md_in,
			
 
				-	       enum lnet_unlink unlink_in,
			
 
				-	       struct lnet_handle_md *md_handle_out);
			
 
				-
			
 
				-int LNetMDUnlink(struct lnet_handle_md md_in);
			
 
				-/** @} lnet_md */
			
 
				-
			
 
				-/** \defgroup lnet_eq Events and event queues
			
 
				- *
			
 
				- * Event queues (abbreviated as EQ) are used to log operations performed on
			
 
				- * local MDs. In particular, they signal the completion of a data transmission
			
 
				- * into or out of a MD. They can also be used to hold acknowledgments for
			
 
				- * completed PUT operations and indicate when a MD has been unlinked. Multiple
			
 
				- * MDs can share a single EQ. An EQ may have an optional event handler
			
 
				- * associated with it. If an event handler exists, it will be run for each
			
 
				- * event that is deposited into the EQ.
			
 
				- *
			
 
				- * In addition to the lnet_handle_eq, the LNet API defines two types
			
 
				- * associated with events: The ::lnet_event_kind defines the kinds of events
			
 
				- * that can be stored in an EQ. The lnet_event defines a structure that
			
 
				- * holds the information about with an event.
			
 
				- *
			
 
				- * There are five functions for dealing with EQs: LNetEQAlloc() is used to
			
 
				- * create an EQ and allocate the resources needed, while LNetEQFree()
			
 
				- * releases these resources and free the EQ. LNetEQGet() retrieves the next
			
 
				- * event from an EQ, and LNetEQWait() can be used to block a process until
			
 
				- * an EQ has at least one event. LNetEQPoll() can be used to test or wait
			
 
				- * on multiple EQs.
			
 
				- * @{
			
 
				- */
			
 
				-int LNetEQAlloc(unsigned int       count_in,
			
 
				-		lnet_eq_handler_t  handler,
			
 
				-		struct lnet_handle_eq *handle_out);
			
 
				-
			
 
				-int LNetEQFree(struct lnet_handle_eq eventq_in);
			
 
				-
			
 
				-int LNetEQPoll(struct lnet_handle_eq *eventqs_in,
			
 
				-	       int		 neq_in,
			
 
				-	       int		 timeout_ms,
			
 
				-	       int		 interruptible,
			
 
				-	       struct lnet_event *event_out,
			
 
				-	       int		*which_eq_out);
			
 
				-/** @} lnet_eq */
			
 
				-
			
 
				-/** \defgroup lnet_data Data movement operations
			
 
				- *
			
 
				- * The LNet API provides two data movement operations: LNetPut()
			
 
				- * and LNetGet().
			
 
				- * @{
			
 
				- */
			
 
				-int LNetPut(lnet_nid_t	      self,
			
 
				-	    struct lnet_handle_md md_in,
			
 
				-	    enum lnet_ack_req ack_req_in,
			
 
				-	    struct lnet_process_id target_in,
			
 
				-	    unsigned int      portal_in,
			
 
				-	    __u64	      match_bits_in,
			
 
				-	    unsigned int      offset_in,
			
 
				-	    __u64	      hdr_data_in);
			
 
				-
			
 
				-int LNetGet(lnet_nid_t	      self,
			
 
				-	    struct lnet_handle_md md_in,
			
 
				-	    struct lnet_process_id target_in,
			
 
				-	    unsigned int      portal_in,
			
 
				-	    __u64	      match_bits_in,
			
 
				-	    unsigned int      offset_in);
			
 
				-/** @} lnet_data */
			
 
				-
			
 
				-/** \defgroup lnet_misc Miscellaneous operations.
			
 
				- * Miscellaneous operations.
			
 
				- * @{
			
 
				- */
			
 
				-int LNetSetLazyPortal(int portal);
			
 
				-int LNetClearLazyPortal(int portal);
			
 
				-int LNetCtl(unsigned int cmd, void *arg);
			
 
				-void LNetDebugPeer(struct lnet_process_id id);
			
 
				-
			
 
				-/** @} lnet_misc */
			
 
				-
			
 
				-/** @} lnet */
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -1,652 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- *
			
 
				- * lnet/include/lnet/lib-lnet.h
			
 
				- */
			
 
				-
			
 
				-#ifndef __LNET_LIB_LNET_H__
			
 
				-#define __LNET_LIB_LNET_H__
			
 
				-
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-#include <linux/libcfs/libcfs_cpu.h>
			
 
				-#include <linux/libcfs/libcfs_string.h>
			
 
				-#include <net/sock.h>
			
 
				-
			
 
				-#include <linux/lnet/api.h>
			
 
				-#include <linux/lnet/lib-types.h>
			
 
				-#include <uapi/linux/lnet/lnet-dlc.h>
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-#include <uapi/linux/lnet/lnetctl.h>
			
 
				-#include <uapi/linux/lnet/nidstr.h>
			
 
				-
			
 
				-extern struct lnet the_lnet;	/* THE network */
			
 
				-
			
 
				-#if (BITS_PER_LONG == 32)
			
 
				-/* 2 CPTs, allowing more CPTs might make us under memory pressure */
			
 
				-#define LNET_CPT_MAX_BITS	1
			
 
				-
			
 
				-#else /* 64-bit system */
			
 
				-/*
			
 
				- * 256 CPTs for thousands of CPUs, allowing more CPTs might make us
			
 
				- * under risk of consuming all lh_cookie.
			
 
				- */
			
 
				-#define LNET_CPT_MAX_BITS	8
			
 
				-#endif /* BITS_PER_LONG == 32 */
			
 
				-
			
 
				-/* max allowed CPT number */
			
 
				-#define LNET_CPT_MAX		(1 << LNET_CPT_MAX_BITS)
			
 
				-
			
 
				-#define LNET_CPT_NUMBER		(the_lnet.ln_cpt_number)
			
 
				-#define LNET_CPT_BITS		(the_lnet.ln_cpt_bits)
			
 
				-#define LNET_CPT_MASK		((1ULL << LNET_CPT_BITS) - 1)
			
 
				-
			
 
				-/** exclusive lock */
			
 
				-#define LNET_LOCK_EX		CFS_PERCPT_LOCK_EX
			
 
				-
			
 
				-/* need both kernel and user-land acceptor */
			
 
				-#define LNET_ACCEPTOR_MIN_RESERVED_PORT    512
			
 
				-#define LNET_ACCEPTOR_MAX_RESERVED_PORT    1023
			
 
				-
			
 
				-static inline int lnet_is_route_alive(struct lnet_route *route)
			
 
				-{
			
 
				-	/* gateway is down */
			
 
				-	if (!route->lr_gateway->lp_alive)
			
 
				-		return 0;
			
 
				-	/* no NI status, assume it's alive */
			
 
				-	if ((route->lr_gateway->lp_ping_feats &
			
 
				-	     LNET_PING_FEAT_NI_STATUS) == 0)
			
 
				-		return 1;
			
 
				-	/* has NI status, check # down NIs */
			
 
				-	return route->lr_downis == 0;
			
 
				-}
			
 
				-
			
 
				-static inline int lnet_is_wire_handle_none(struct lnet_handle_wire *wh)
			
 
				-{
			
 
				-	return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
			
 
				-		wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE);
			
 
				-}
			
 
				-
			
 
				-static inline int lnet_md_exhausted(struct lnet_libmd *md)
			
 
				-{
			
 
				-	return (!md->md_threshold ||
			
 
				-		((md->md_options & LNET_MD_MAX_SIZE) &&
			
 
				-		 md->md_offset + md->md_max_size > md->md_length));
			
 
				-}
			
 
				-
			
 
				-static inline int lnet_md_unlinkable(struct lnet_libmd *md)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Should unlink md when its refcount is 0 and either:
			
 
				-	 *  - md has been flagged for deletion (by auto unlink or
			
 
				-	 *    LNetM[DE]Unlink, in the latter case md may not be exhausted).
			
 
				-	 *  - auto unlink is on and md is exhausted.
			
 
				-	 */
			
 
				-	if (md->md_refcount)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (md->md_flags & LNET_MD_FLAG_ZOMBIE)
			
 
				-		return 1;
			
 
				-
			
 
				-	return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) &&
			
 
				-		lnet_md_exhausted(md));
			
 
				-}
			
 
				-
			
 
				-#define lnet_cpt_table()	(the_lnet.ln_cpt_table)
			
 
				-#define lnet_cpt_current()	cfs_cpt_current(the_lnet.ln_cpt_table, 1)
			
 
				-
			
 
				-static inline int
			
 
				-lnet_cpt_of_cookie(__u64 cookie)
			
 
				-{
			
 
				-	unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK;
			
 
				-
			
 
				-	/*
			
 
				-	 * LNET_CPT_NUMBER doesn't have to be power2, which means we can
			
 
				-	 * get illegal cpt from it's invalid cookie
			
 
				-	 */
			
 
				-	return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_res_lock(int cpt)
			
 
				-{
			
 
				-	cfs_percpt_lock(the_lnet.ln_res_lock, cpt);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_res_unlock(int cpt)
			
 
				-{
			
 
				-	cfs_percpt_unlock(the_lnet.ln_res_lock, cpt);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lnet_res_lock_current(void)
			
 
				-{
			
 
				-	int cpt = lnet_cpt_current();
			
 
				-
			
 
				-	lnet_res_lock(cpt);
			
 
				-	return cpt;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_net_lock(int cpt)
			
 
				-{
			
 
				-	cfs_percpt_lock(the_lnet.ln_net_lock, cpt);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_net_unlock(int cpt)
			
 
				-{
			
 
				-	cfs_percpt_unlock(the_lnet.ln_net_lock, cpt);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lnet_net_lock_current(void)
			
 
				-{
			
 
				-	int cpt = lnet_cpt_current();
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-	return cpt;
			
 
				-}
			
 
				-
			
 
				-#define LNET_LOCK()		lnet_net_lock(LNET_LOCK_EX)
			
 
				-#define LNET_UNLOCK()		lnet_net_unlock(LNET_LOCK_EX)
			
 
				-
			
 
				-#define lnet_ptl_lock(ptl)	spin_lock(&(ptl)->ptl_lock)
			
 
				-#define lnet_ptl_unlock(ptl)	spin_unlock(&(ptl)->ptl_lock)
			
 
				-#define lnet_eq_wait_lock()	spin_lock(&the_lnet.ln_eq_wait_lock)
			
 
				-#define lnet_eq_wait_unlock()	spin_unlock(&the_lnet.ln_eq_wait_lock)
			
 
				-#define lnet_ni_lock(ni)	spin_lock(&(ni)->ni_lock)
			
 
				-#define lnet_ni_unlock(ni)	spin_unlock(&(ni)->ni_lock)
			
 
				-
			
 
				-#define MAX_PORTALS		64
			
 
				-
			
 
				-static inline struct lnet_libmd *
			
 
				-lnet_md_alloc(struct lnet_md *umd)
			
 
				-{
			
 
				-	struct lnet_libmd *md;
			
 
				-	unsigned int size;
			
 
				-	unsigned int niov;
			
 
				-
			
 
				-	if (umd->options & LNET_MD_KIOV) {
			
 
				-		niov = umd->length;
			
 
				-		size = offsetof(struct lnet_libmd, md_iov.kiov[niov]);
			
 
				-	} else {
			
 
				-		niov = umd->options & LNET_MD_IOVEC ? umd->length : 1;
			
 
				-		size = offsetof(struct lnet_libmd, md_iov.iov[niov]);
			
 
				-	}
			
 
				-
			
 
				-	md = kzalloc(size, GFP_NOFS);
			
 
				-
			
 
				-	if (md) {
			
 
				-		/* Set here in case of early free */
			
 
				-		md->md_options = umd->options;
			
 
				-		md->md_niov = niov;
			
 
				-		INIT_LIST_HEAD(&md->md_list);
			
 
				-	}
			
 
				-
			
 
				-	return md;
			
 
				-}
			
 
				-
			
 
				-struct lnet_libhandle *lnet_res_lh_lookup(struct lnet_res_container *rec,
			
 
				-					  __u64 cookie);
			
 
				-void lnet_res_lh_initialize(struct lnet_res_container *rec,
			
 
				-			    struct lnet_libhandle *lh);
			
 
				-static inline void
			
 
				-lnet_res_lh_invalidate(struct lnet_libhandle *lh)
			
 
				-{
			
 
				-	/* NB: cookie is still useful, don't reset it */
			
 
				-	list_del(&lh->lh_hash_chain);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_eq2handle(struct lnet_handle_eq *handle, struct lnet_eq *eq)
			
 
				-{
			
 
				-	if (!eq) {
			
 
				-		LNetInvalidateEQHandle(handle);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	handle->cookie = eq->eq_lh.lh_cookie;
			
 
				-}
			
 
				-
			
 
				-static inline struct lnet_eq *
			
 
				-lnet_handle2eq(struct lnet_handle_eq *handle)
			
 
				-{
			
 
				-	struct lnet_libhandle *lh;
			
 
				-
			
 
				-	lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie);
			
 
				-	if (!lh)
			
 
				-		return NULL;
			
 
				-
			
 
				-	return lh_entry(lh, struct lnet_eq, eq_lh);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_md2handle(struct lnet_handle_md *handle, struct lnet_libmd *md)
			
 
				-{
			
 
				-	handle->cookie = md->md_lh.lh_cookie;
			
 
				-}
			
 
				-
			
 
				-static inline struct lnet_libmd *
			
 
				-lnet_handle2md(struct lnet_handle_md *handle)
			
 
				-{
			
 
				-	/* ALWAYS called with resource lock held */
			
 
				-	struct lnet_libhandle *lh;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(handle->cookie);
			
 
				-	lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
			
 
				-				handle->cookie);
			
 
				-	if (!lh)
			
 
				-		return NULL;
			
 
				-
			
 
				-	return lh_entry(lh, struct lnet_libmd, md_lh);
			
 
				-}
			
 
				-
			
 
				-static inline struct lnet_libmd *
			
 
				-lnet_wire_handle2md(struct lnet_handle_wire *wh)
			
 
				-{
			
 
				-	/* ALWAYS called with resource lock held */
			
 
				-	struct lnet_libhandle *lh;
			
 
				-	int cpt;
			
 
				-
			
 
				-	if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
			
 
				-		return NULL;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(wh->wh_object_cookie);
			
 
				-	lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
			
 
				-				wh->wh_object_cookie);
			
 
				-	if (!lh)
			
 
				-		return NULL;
			
 
				-
			
 
				-	return lh_entry(lh, struct lnet_libmd, md_lh);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_me2handle(struct lnet_handle_me *handle, struct lnet_me *me)
			
 
				-{
			
 
				-	handle->cookie = me->me_lh.lh_cookie;
			
 
				-}
			
 
				-
			
 
				-static inline struct lnet_me *
			
 
				-lnet_handle2me(struct lnet_handle_me *handle)
			
 
				-{
			
 
				-	/* ALWAYS called with resource lock held */
			
 
				-	struct lnet_libhandle *lh;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(handle->cookie);
			
 
				-	lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt],
			
 
				-				handle->cookie);
			
 
				-	if (!lh)
			
 
				-		return NULL;
			
 
				-
			
 
				-	return lh_entry(lh, struct lnet_me, me_lh);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_peer_addref_locked(struct lnet_peer *lp)
			
 
				-{
			
 
				-	LASSERT(lp->lp_refcount > 0);
			
 
				-	lp->lp_refcount++;
			
 
				-}
			
 
				-
			
 
				-void lnet_destroy_peer_locked(struct lnet_peer *lp);
			
 
				-
			
 
				-static inline void
			
 
				-lnet_peer_decref_locked(struct lnet_peer *lp)
			
 
				-{
			
 
				-	LASSERT(lp->lp_refcount > 0);
			
 
				-	lp->lp_refcount--;
			
 
				-	if (!lp->lp_refcount)
			
 
				-		lnet_destroy_peer_locked(lp);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lnet_isrouter(struct lnet_peer *lp)
			
 
				-{
			
 
				-	return lp->lp_rtr_refcount ? 1 : 0;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ni_addref_locked(struct lnet_ni *ni, int cpt)
			
 
				-{
			
 
				-	LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
			
 
				-	LASSERT(*ni->ni_refs[cpt] >= 0);
			
 
				-
			
 
				-	(*ni->ni_refs[cpt])++;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ni_addref(struct lnet_ni *ni)
			
 
				-{
			
 
				-	lnet_net_lock(0);
			
 
				-	lnet_ni_addref_locked(ni, 0);
			
 
				-	lnet_net_unlock(0);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ni_decref_locked(struct lnet_ni *ni, int cpt)
			
 
				-{
			
 
				-	LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
			
 
				-	LASSERT(*ni->ni_refs[cpt] > 0);
			
 
				-
			
 
				-	(*ni->ni_refs[cpt])--;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ni_decref(struct lnet_ni *ni)
			
 
				-{
			
 
				-	lnet_net_lock(0);
			
 
				-	lnet_ni_decref_locked(ni, 0);
			
 
				-	lnet_net_unlock(0);
			
 
				-}
			
 
				-
			
 
				-void lnet_ni_free(struct lnet_ni *ni);
			
 
				-struct lnet_ni *
			
 
				-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist);
			
 
				-
			
 
				-static inline int
			
 
				-lnet_nid2peerhash(lnet_nid_t nid)
			
 
				-{
			
 
				-	return hash_long(nid, LNET_PEER_HASH_BITS);
			
 
				-}
			
 
				-
			
 
				-static inline struct list_head *
			
 
				-lnet_net2rnethash(__u32 net)
			
 
				-{
			
 
				-	return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
			
 
				-		LNET_NETTYP(net)) &
			
 
				-		((1U << the_lnet.ln_remote_nets_hbits) - 1)];
			
 
				-}
			
 
				-
			
 
				-extern struct lnet_lnd the_lolnd;
			
 
				-extern int avoid_asym_router_failure;
			
 
				-
			
 
				-int lnet_cpt_of_nid_locked(lnet_nid_t nid);
			
 
				-int lnet_cpt_of_nid(lnet_nid_t nid);
			
 
				-struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
			
 
				-struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
			
 
				-struct lnet_ni *lnet_net2ni(__u32 net);
			
 
				-
			
 
				-extern int portal_rotor;
			
 
				-
			
 
				-int lnet_lib_init(void);
			
 
				-void lnet_lib_exit(void);
			
 
				-
			
 
				-int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, int alive,
			
 
				-		unsigned long when);
			
 
				-void lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
			
 
				-			unsigned long when);
			
 
				-int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
			
 
				-		   unsigned int priority);
			
 
				-int lnet_check_routes(void);
			
 
				-int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
			
 
				-void lnet_destroy_routes(void);
			
 
				-int lnet_get_route(int idx, __u32 *net, __u32 *hops,
			
 
				-		   lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
			
 
				-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
			
 
				-
			
 
				-void lnet_router_debugfs_init(void);
			
 
				-void lnet_router_debugfs_fini(void);
			
 
				-int  lnet_rtrpools_alloc(int im_a_router);
			
 
				-void lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages);
			
 
				-int lnet_rtrpools_adjust(int tiny, int small, int large);
			
 
				-int lnet_rtrpools_enable(void);
			
 
				-void lnet_rtrpools_disable(void);
			
 
				-void lnet_rtrpools_free(int keep_pools);
			
 
				-struct lnet_remotenet *lnet_find_net_locked(__u32 net);
			
 
				-int lnet_dyn_add_ni(lnet_pid_t requested_pid,
			
 
				-		    struct lnet_ioctl_config_data *conf);
			
 
				-int lnet_dyn_del_ni(__u32 net);
			
 
				-int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
			
 
				-
			
 
				-int lnet_islocalnid(lnet_nid_t nid);
			
 
				-int lnet_islocalnet(__u32 net);
			
 
				-
			
 
				-void lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
			
 
				-			unsigned int offset, unsigned int mlen);
			
 
				-void lnet_msg_detach_md(struct lnet_msg *msg, int status);
			
 
				-void lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev);
			
 
				-void lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type);
			
 
				-void lnet_msg_commit(struct lnet_msg *msg, int cpt);
			
 
				-void lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status);
			
 
				-
			
 
				-void lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev);
			
 
				-void lnet_prep_send(struct lnet_msg *msg, int type,
			
 
				-		    struct lnet_process_id target, unsigned int offset,
			
 
				-		    unsigned int len);
			
 
				-int lnet_send(lnet_nid_t nid, struct lnet_msg *msg, lnet_nid_t rtr_nid);
			
 
				-void lnet_return_tx_credits_locked(struct lnet_msg *msg);
			
 
				-void lnet_return_rx_credits_locked(struct lnet_msg *msg);
			
 
				-void lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp);
			
 
				-void lnet_drop_routed_msgs_locked(struct list_head *list, int cpt);
			
 
				-
			
 
				-/* portals functions */
			
 
				-/* portals attributes */
			
 
				-static inline int
			
 
				-lnet_ptl_is_lazy(struct lnet_portal *ptl)
			
 
				-{
			
 
				-	return !!(ptl->ptl_options & LNET_PTL_LAZY);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lnet_ptl_is_unique(struct lnet_portal *ptl)
			
 
				-{
			
 
				-	return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lnet_ptl_is_wildcard(struct lnet_portal *ptl)
			
 
				-{
			
 
				-	return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ptl_setopt(struct lnet_portal *ptl, int opt)
			
 
				-{
			
 
				-	ptl->ptl_options |= opt;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ptl_unsetopt(struct lnet_portal *ptl, int opt)
			
 
				-{
			
 
				-	ptl->ptl_options &= ~opt;
			
 
				-}
			
 
				-
			
 
				-/* match-table functions */
			
 
				-struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable,
			
 
				-				     struct lnet_process_id id, __u64 mbits);
			
 
				-struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
			
 
				-					   struct lnet_process_id id,
			
 
				-					   __u64 mbits, __u64 ignore_bits,
			
 
				-					   enum lnet_ins_pos pos);
			
 
				-int lnet_mt_match_md(struct lnet_match_table *mtable,
			
 
				-		     struct lnet_match_info *info, struct lnet_msg *msg);
			
 
				-
			
 
				-/* portals match/attach functions */
			
 
				-void lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
			
 
				-			struct list_head *matches, struct list_head *drops);
			
 
				-void lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md);
			
 
				-int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg);
			
 
				-
			
 
				-/* initialized and finalize portals */
			
 
				-int lnet_portals_create(void);
			
 
				-void lnet_portals_destroy(void);
			
 
				-
			
 
				-/* message functions */
			
 
				-int lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr,
			
 
				-	       lnet_nid_t fromnid, void *private, int rdma_req);
			
 
				-int lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg);
			
 
				-int lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg);
			
 
				-
			
 
				-void lnet_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
			
 
				-	       int delayed, unsigned int offset, unsigned int mlen,
			
 
				-	       unsigned int rlen);
			
 
				-void lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
			
 
				-		  int delayed, unsigned int offset,
			
 
				-		  unsigned int mlen, unsigned int rlen);
			
 
				-
			
 
				-struct lnet_msg *lnet_create_reply_msg(struct lnet_ni *ni,
			
 
				-				       struct lnet_msg *get_msg);
			
 
				-void lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *msg,
			
 
				-			    unsigned int len);
			
 
				-
			
 
				-void lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int rc);
			
 
				-
			
 
				-void lnet_drop_message(struct lnet_ni *ni, int cpt, void *private,
			
 
				-		       unsigned int nob);
			
 
				-void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
			
 
				-void lnet_recv_delayed_msg_list(struct list_head *head);
			
 
				-
			
 
				-int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt);
			
 
				-void lnet_msg_container_cleanup(struct lnet_msg_container *container);
			
 
				-void lnet_msg_containers_destroy(void);
			
 
				-int lnet_msg_containers_create(void);
			
 
				-
			
 
				-char *lnet_msgtyp2str(int type);
			
 
				-void lnet_print_hdr(struct lnet_hdr *hdr);
			
 
				-int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
			
 
				-
			
 
				-/** \addtogroup lnet_fault_simulation @{ */
			
 
				-
			
 
				-int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data);
			
 
				-int lnet_fault_init(void);
			
 
				-void lnet_fault_fini(void);
			
 
				-
			
 
				-bool lnet_drop_rule_match(struct lnet_hdr *hdr);
			
 
				-
			
 
				-int lnet_delay_rule_add(struct lnet_fault_attr *attr);
			
 
				-int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
			
 
				-int lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
			
 
				-			 struct lnet_fault_stat *stat);
			
 
				-void lnet_delay_rule_reset(void);
			
 
				-void lnet_delay_rule_check(void);
			
 
				-bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg);
			
 
				-
			
 
				-/** @} lnet_fault_simulation */
			
 
				-
			
 
				-void lnet_counters_get(struct lnet_counters *counters);
			
 
				-void lnet_counters_reset(void);
			
 
				-
			
 
				-unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov);
			
 
				-int lnet_extract_iov(int dst_niov, struct kvec *dst,
			
 
				-		     int src_niov, const struct kvec *src,
			
 
				-		      unsigned int offset, unsigned int len);
			
 
				-
			
 
				-unsigned int lnet_kiov_nob(unsigned int niov, struct bio_vec *iov);
			
 
				-int lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
			
 
				-		      int src_niov, const struct bio_vec *src,
			
 
				-		      unsigned int offset, unsigned int len);
			
 
				-
			
 
				-void lnet_copy_iov2iter(struct iov_iter *to,
			
 
				-			unsigned int nsiov, const struct kvec *siov,
			
 
				-			unsigned int soffset, unsigned int nob);
			
 
				-void lnet_copy_kiov2iter(struct iov_iter *to,
			
 
				-			 unsigned int nkiov, const struct bio_vec *kiov,
			
 
				-			 unsigned int kiovoffset, unsigned int nob);
			
 
				-
			
 
				-void lnet_me_unlink(struct lnet_me *me);
			
 
				-
			
 
				-void lnet_md_unlink(struct lnet_libmd *md);
			
 
				-void lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd);
			
 
				-
			
 
				-void lnet_register_lnd(struct lnet_lnd *lnd);
			
 
				-void lnet_unregister_lnd(struct lnet_lnd *lnd);
			
 
				-
			
 
				-int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
			
 
				-		 __u32 local_ip, __u32 peer_ip, int peer_port);
			
 
				-void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
			
 
				-				__u32 peer_ip, int port);
			
 
				-int lnet_count_acceptor_nis(void);
			
 
				-int lnet_acceptor_timeout(void);
			
 
				-int lnet_acceptor_port(void);
			
 
				-
			
 
				-int lnet_count_acceptor_nis(void);
			
 
				-int lnet_acceptor_port(void);
			
 
				-
			
 
				-int lnet_acceptor_start(void);
			
 
				-void lnet_acceptor_stop(void);
			
 
				-
			
 
				-int lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
			
 
				-int lnet_ipif_enumerate(char ***names);
			
 
				-void lnet_ipif_free_enumeration(char **names, int n);
			
 
				-int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
			
 
				-int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
			
 
				-int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
			
 
				-int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
			
 
				-int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
			
 
				-
			
 
				-int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
			
 
				-int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
			
 
				-int lnet_sock_connect(struct socket **sockp, int *fatal,
			
 
				-		      __u32 local_ip, int local_port,
			
 
				-		      __u32 peer_ip, int peer_port);
			
 
				-void libcfs_sock_release(struct socket *sock);
			
 
				-
			
 
				-int lnet_peers_start_down(void);
			
 
				-int lnet_peer_buffer_credits(struct lnet_ni *ni);
			
 
				-
			
 
				-int lnet_router_checker_start(void);
			
 
				-void lnet_router_checker_stop(void);
			
 
				-void lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net);
			
 
				-void lnet_swap_pinginfo(struct lnet_ping_info *info);
			
 
				-
			
 
				-int lnet_parse_ip2nets(char **networksp, char *ip2nets);
			
 
				-int lnet_parse_routes(char *route_str, int *im_a_router);
			
 
				-int lnet_parse_networks(struct list_head *nilist, char *networks);
			
 
				-int lnet_net_unique(__u32 net, struct list_head *nilist);
			
 
				-
			
 
				-int lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt);
			
 
				-struct lnet_peer *lnet_find_peer_locked(struct lnet_peer_table *ptable,
			
 
				-					lnet_nid_t nid);
			
 
				-void lnet_peer_tables_cleanup(struct lnet_ni *ni);
			
 
				-void lnet_peer_tables_destroy(void);
			
 
				-int lnet_peer_tables_create(void);
			
 
				-void lnet_debug_peer(lnet_nid_t nid);
			
 
				-int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
			
 
				-		       char alivness[LNET_MAX_STR_LEN],
			
 
				-		       __u32 *cpt_iter, __u32 *refcount,
			
 
				-		       __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
			
 
				-		       __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
			
 
				-		       __u32 *peer_tx_qnob);
			
 
				-
			
 
				-static inline void
			
 
				-lnet_peer_set_alive(struct lnet_peer *lp)
			
 
				-{
			
 
				-	lp->lp_last_query = jiffies;
			
 
				-	lp->lp_last_alive = jiffies;
			
 
				-	if (!lp->lp_alive)
			
 
				-		lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -1,666 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- *
			
 
				- * lnet/include/lnet/lib-types.h
			
 
				- */
			
 
				-
			
 
				-#ifndef __LNET_LIB_TYPES_H__
			
 
				-#define __LNET_LIB_TYPES_H__
			
 
				-
			
 
				-#include <linux/kthread.h>
			
 
				-#include <linux/uio.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/completion.h>
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-#include <uapi/linux/lnet/lnetctl.h>
			
 
				-
			
 
				-/* Max payload size */
			
 
				-#define LNET_MAX_PAYLOAD      CONFIG_LNET_MAX_PAYLOAD
			
 
				-#if (LNET_MAX_PAYLOAD < LNET_MTU)
			
 
				-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
			
 
				-#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
			
 
				-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
			
 
				-#endif
			
 
				-
			
 
				-/* forward refs */
			
 
				-struct lnet_libmd;
			
 
				-
			
 
				-struct lnet_msg {
			
 
				-	struct list_head	msg_activelist;
			
 
				-	struct list_head	msg_list;	   /* Q for credits/MD */
			
 
				-
			
 
				-	struct lnet_process_id	msg_target;
			
 
				-	/* where is it from, it's only for building event */
			
 
				-	lnet_nid_t		msg_from;
			
 
				-	__u32			msg_type;
			
 
				-
			
 
				-	/* committed for sending */
			
 
				-	unsigned int		msg_tx_committed:1;
			
 
				-	/* CPT # this message committed for sending */
			
 
				-	unsigned int		msg_tx_cpt:15;
			
 
				-	/* committed for receiving */
			
 
				-	unsigned int		msg_rx_committed:1;
			
 
				-	/* CPT # this message committed for receiving */
			
 
				-	unsigned int		msg_rx_cpt:15;
			
 
				-	/* queued for tx credit */
			
 
				-	unsigned int		msg_tx_delayed:1;
			
 
				-	/* queued for RX buffer */
			
 
				-	unsigned int		msg_rx_delayed:1;
			
 
				-	/* ready for pending on RX delay list */
			
 
				-	unsigned int		msg_rx_ready_delay:1;
			
 
				-
			
 
				-	unsigned int	msg_vmflush:1;		/* VM trying to free memory */
			
 
				-	unsigned int	msg_target_is_router:1; /* sending to a router */
			
 
				-	unsigned int	msg_routing:1;		/* being forwarded */
			
 
				-	unsigned int	msg_ack:1;		/* ack on finalize (PUT) */
			
 
				-	unsigned int	msg_sending:1;		/* outgoing message */
			
 
				-	unsigned int	msg_receiving:1;	/* being received */
			
 
				-	unsigned int	msg_txcredit:1;		/* taken an NI send credit */
			
 
				-	unsigned int	msg_peertxcredit:1;	/* taken a peer send credit */
			
 
				-	unsigned int	msg_rtrcredit:1;	/* taken a global router credit */
			
 
				-	unsigned int	msg_peerrtrcredit:1;	/* taken a peer router credit */
			
 
				-	unsigned int	msg_onactivelist:1;	/* on the activelist */
			
 
				-	unsigned int	msg_rdma_get:1;
			
 
				-
			
 
				-	struct lnet_peer	*msg_txpeer;	 /* peer I'm sending to */
			
 
				-	struct lnet_peer	*msg_rxpeer;	 /* peer I received from */
			
 
				-
			
 
				-	void			*msg_private;
			
 
				-	struct lnet_libmd	*msg_md;
			
 
				-
			
 
				-	unsigned int		 msg_len;
			
 
				-	unsigned int		 msg_wanted;
			
 
				-	unsigned int		 msg_offset;
			
 
				-	unsigned int		 msg_niov;
			
 
				-	struct kvec		*msg_iov;
			
 
				-	struct bio_vec		*msg_kiov;
			
 
				-
			
 
				-	struct lnet_event	 msg_ev;
			
 
				-	struct lnet_hdr		 msg_hdr;
			
 
				-};
			
 
				-
			
 
				-struct lnet_libhandle {
			
 
				-	struct list_head	lh_hash_chain;
			
 
				-	__u64			lh_cookie;
			
 
				-};
			
 
				-
			
 
				-#define lh_entry(ptr, type, member) \
			
 
				-	((type *)((char *)(ptr) - (char *)(&((type *)0)->member)))
			
 
				-
			
 
				-struct lnet_eq {
			
 
				-	struct list_head	  eq_list;
			
 
				-	struct lnet_libhandle	  eq_lh;
			
 
				-	unsigned long		  eq_enq_seq;
			
 
				-	unsigned long		  eq_deq_seq;
			
 
				-	unsigned int		  eq_size;
			
 
				-	lnet_eq_handler_t	  eq_callback;
			
 
				-	struct lnet_event	 *eq_events;
			
 
				-	int			**eq_refs;	/* percpt refcount for EQ */
			
 
				-};
			
 
				-
			
 
				-struct lnet_me {
			
 
				-	struct list_head	 me_list;
			
 
				-	struct lnet_libhandle	 me_lh;
			
 
				-	struct lnet_process_id	 me_match_id;
			
 
				-	unsigned int		 me_portal;
			
 
				-	unsigned int		 me_pos;	/* hash offset in mt_hash */
			
 
				-	__u64			 me_match_bits;
			
 
				-	__u64			 me_ignore_bits;
			
 
				-	enum lnet_unlink	 me_unlink;
			
 
				-	struct lnet_libmd	*me_md;
			
 
				-};
			
 
				-
			
 
				-struct lnet_libmd {
			
 
				-	struct list_head	 md_list;
			
 
				-	struct lnet_libhandle	 md_lh;
			
 
				-	struct lnet_me		*md_me;
			
 
				-	char			*md_start;
			
 
				-	unsigned int		 md_offset;
			
 
				-	unsigned int		 md_length;
			
 
				-	unsigned int		 md_max_size;
			
 
				-	int			 md_threshold;
			
 
				-	int			 md_refcount;
			
 
				-	unsigned int		 md_options;
			
 
				-	unsigned int		 md_flags;
			
 
				-	void			*md_user_ptr;
			
 
				-	struct lnet_eq		*md_eq;
			
 
				-	unsigned int		 md_niov;	/* # frags */
			
 
				-	union {
			
 
				-		struct kvec	iov[LNET_MAX_IOV];
			
 
				-		struct bio_vec	kiov[LNET_MAX_IOV];
			
 
				-	} md_iov;
			
 
				-};
			
 
				-
			
 
				-#define LNET_MD_FLAG_ZOMBIE		BIT(0)
			
 
				-#define LNET_MD_FLAG_AUTO_UNLINK	BIT(1)
			
 
				-#define LNET_MD_FLAG_ABORTED		BIT(2)
			
 
				-
			
 
				-struct lnet_test_peer {
			
 
				-	/* info about peers we are trying to fail */
			
 
				-	struct list_head	tp_list;	/* ln_test_peers */
			
 
				-	lnet_nid_t		tp_nid;		/* matching nid */
			
 
				-	unsigned int		tp_threshold;	/* # failures to simulate */
			
 
				-};
			
 
				-
			
 
				-#define LNET_COOKIE_TYPE_MD	1
			
 
				-#define LNET_COOKIE_TYPE_ME	2
			
 
				-#define LNET_COOKIE_TYPE_EQ	3
			
 
				-#define LNET_COOKIE_TYPE_BITS	2
			
 
				-#define LNET_COOKIE_MASK	((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
			
 
				-
			
 
				-struct lnet_ni;			/* forward ref */
			
 
				-
			
 
				-struct lnet_lnd {
			
 
				-	/* fields managed by portals */
			
 
				-	struct list_head	lnd_list;	/* stash in the LND table */
			
 
				-	int			lnd_refcount;	/* # active instances */
			
 
				-
			
 
				-	/* fields initialised by the LND */
			
 
				-	__u32			lnd_type;
			
 
				-
			
 
				-	int  (*lnd_startup)(struct lnet_ni *ni);
			
 
				-	void (*lnd_shutdown)(struct lnet_ni *ni);
			
 
				-	int  (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
			
 
				-
			
 
				-	/*
			
 
				-	 * In data movement APIs below, payload buffers are described as a set
			
 
				-	 * of 'niov' fragments which are...
			
 
				-	 * EITHER
			
 
				-	 *    in virtual memory (struct iovec *iov != NULL)
			
 
				-	 * OR
			
 
				-	 *    in pages (kernel only: plt_kiov_t *kiov != NULL).
			
 
				-	 * The LND may NOT overwrite these fragment descriptors.
			
 
				-	 * An 'offset' and may specify a byte offset within the set of
			
 
				-	 * fragments to start from
			
 
				-	 */
			
 
				-
			
 
				-	/*
			
 
				-	 * Start sending a preformatted message.  'private' is NULL for PUT and
			
 
				-	 * GET messages; otherwise this is a response to an incoming message
			
 
				-	 * and 'private' is the 'private' passed to lnet_parse().  Return
			
 
				-	 * non-zero for immediate failure, otherwise complete later with
			
 
				-	 * lnet_finalize()
			
 
				-	 */
			
 
				-	int (*lnd_send)(struct lnet_ni *ni, void *private,
			
 
				-			struct lnet_msg *msg);
			
 
				-
			
 
				-	/*
			
 
				-	 * Start receiving 'mlen' bytes of payload data, skipping the following
			
 
				-	 * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
			
 
				-	 * lnet_parse().  Return non-zero for immediate failure, otherwise
			
 
				-	 * complete later with lnet_finalize().  This also gives back a receive
			
 
				-	 * credit if the LND does flow control.
			
 
				-	 */
			
 
				-	int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
			
 
				-			int delayed, struct iov_iter *to, unsigned int rlen);
			
 
				-
			
 
				-	/*
			
 
				-	 * lnet_parse() has had to delay processing of this message
			
 
				-	 * (e.g. waiting for a forwarding buffer or send credits).  Give the
			
 
				-	 * LND a chance to free urgently needed resources.  If called, return 0
			
 
				-	 * for success and do NOT give back a receive credit; that has to wait
			
 
				-	 * until lnd_recv() gets called.  On failure return < 0 and
			
 
				-	 * release resources; lnd_recv() will not be called.
			
 
				-	 */
			
 
				-	int (*lnd_eager_recv)(struct lnet_ni *ni, void *private,
			
 
				-			      struct lnet_msg *msg, void **new_privatep);
			
 
				-
			
 
				-	/* notification of peer health */
			
 
				-	void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
			
 
				-
			
 
				-	/* query of peer aliveness */
			
 
				-	void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer,
			
 
				-			  unsigned long *when);
			
 
				-
			
 
				-	/* accept a new connection */
			
 
				-	int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
			
 
				-};
			
 
				-
			
 
				-struct lnet_tx_queue {
			
 
				-	int			tq_credits;	/* # tx credits free */
			
 
				-	int			tq_credits_min;	/* lowest it's been */
			
 
				-	int			tq_credits_max;	/* total # tx credits */
			
 
				-	struct list_head	tq_delayed;	/* delayed TXs */
			
 
				-};
			
 
				-
			
 
				-struct lnet_ni {
			
 
				-	spinlock_t		  ni_lock;
			
 
				-	struct list_head	  ni_list;	/* chain on ln_nis */
			
 
				-	struct list_head	  ni_cptlist;	/* chain on ln_nis_cpt */
			
 
				-	int			  ni_maxtxcredits; /* # tx credits  */
			
 
				-	/* # per-peer send credits */
			
 
				-	int			  ni_peertxcredits;
			
 
				-	/* # per-peer router buffer credits */
			
 
				-	int			  ni_peerrtrcredits;
			
 
				-	/* seconds to consider peer dead */
			
 
				-	int			  ni_peertimeout;
			
 
				-	int			  ni_ncpts;	/* number of CPTs */
			
 
				-	__u32			 *ni_cpts;	/* bond NI on some CPTs */
			
 
				-	lnet_nid_t		  ni_nid;	/* interface's NID */
			
 
				-	void			 *ni_data;	/* instance-specific data */
			
 
				-	struct lnet_lnd		 *ni_lnd;	/* procedural interface */
			
 
				-	struct lnet_tx_queue	**ni_tx_queues;	/* percpt TX queues */
			
 
				-	int			**ni_refs;	/* percpt reference count */
			
 
				-	time64_t		  ni_last_alive;/* when I was last alive */
			
 
				-	struct lnet_ni_status	 *ni_status;	/* my health status */
			
 
				-	/* per NI LND tunables */
			
 
				-	struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
			
 
				-	/* equivalent interfaces to use */
			
 
				-	char			 *ni_interfaces[LNET_MAX_INTERFACES];
			
 
				-	/* original net namespace */
			
 
				-	struct net		 *ni_net_ns;
			
 
				-};
			
 
				-
			
 
				-#define LNET_PROTO_PING_MATCHBITS	0x8000000000000000LL
			
 
				-
			
 
				-/*
			
 
				- * NB: value of these features equal to LNET_PROTO_PING_VERSION_x
			
 
				- * of old LNet, so there shouldn't be any compatibility issue
			
 
				- */
			
 
				-#define LNET_PING_FEAT_INVAL		(0)		/* no feature */
			
 
				-#define LNET_PING_FEAT_BASE		BIT(0)	/* just a ping */
			
 
				-#define LNET_PING_FEAT_NI_STATUS	BIT(1)	/* return NI status */
			
 
				-#define LNET_PING_FEAT_RTE_DISABLED	BIT(2)	/* Routing enabled */
			
 
				-
			
 
				-#define LNET_PING_FEAT_MASK		(LNET_PING_FEAT_BASE | \
			
 
				-					 LNET_PING_FEAT_NI_STATUS)
			
 
				-
			
 
				-/* router checker data, per router */
			
 
				-#define LNET_MAX_RTR_NIS   16
			
 
				-#define LNET_PINGINFO_SIZE offsetof(struct lnet_ping_info, pi_ni[LNET_MAX_RTR_NIS])
			
 
				-struct lnet_rc_data {
			
 
				-	/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
			
 
				-	struct list_head	 rcd_list;
			
 
				-	struct lnet_handle_md	 rcd_mdh;	/* ping buffer MD */
			
 
				-	struct lnet_peer	*rcd_gateway;	/* reference to gateway */
			
 
				-	struct lnet_ping_info	*rcd_pinginfo;	/* ping buffer */
			
 
				-};
			
 
				-
			
 
				-struct lnet_peer {
			
 
				-	struct list_head	 lp_hashlist;	/* chain on peer hash */
			
 
				-	struct list_head	 lp_txq;	/* messages blocking for
			
 
				-						 * tx credits
			
 
				-						 */
			
 
				-	struct list_head	 lp_rtrq;	/* messages blocking for
			
 
				-						 * router credits
			
 
				-						 */
			
 
				-	struct list_head	 lp_rtr_list;	/* chain on router list */
			
 
				-	int			 lp_txcredits;	/* # tx credits available */
			
 
				-	int			 lp_mintxcredits;  /* low water mark */
			
 
				-	int			 lp_rtrcredits;	   /* # router credits */
			
 
				-	int			 lp_minrtrcredits; /* low water mark */
			
 
				-	unsigned int		 lp_alive:1;	   /* alive/dead? */
			
 
				-	unsigned int		 lp_notify:1;	/* notification outstanding? */
			
 
				-	unsigned int		 lp_notifylnd:1;/* outstanding notification
			
 
				-						 * for LND?
			
 
				-						 */
			
 
				-	unsigned int		 lp_notifying:1; /* some thread is handling
			
 
				-						  * notification
			
 
				-						  */
			
 
				-	unsigned int		 lp_ping_notsent;/* SEND event outstanding
			
 
				-						  * from ping
			
 
				-						  */
			
 
				-	int			 lp_alive_count; /* # times router went
			
 
				-						  * dead<->alive
			
 
				-						  */
			
 
				-	long			 lp_txqnob;	 /* ytes queued for sending */
			
 
				-	unsigned long		 lp_timestamp;	 /* time of last aliveness
			
 
				-						  * news
			
 
				-						  */
			
 
				-	unsigned long		 lp_ping_timestamp;/* time of last ping
			
 
				-						    * attempt
			
 
				-						    */
			
 
				-	unsigned long		 lp_ping_deadline; /* != 0 if ping reply
			
 
				-						    * expected
			
 
				-						    */
			
 
				-	unsigned long		 lp_last_alive;	/* when I was last alive */
			
 
				-	unsigned long		 lp_last_query;	/* when lp_ni was queried
			
 
				-						 * last time
			
 
				-						 */
			
 
				-	struct lnet_ni		*lp_ni;		/* interface peer is on */
			
 
				-	lnet_nid_t		 lp_nid;	/* peer's NID */
			
 
				-	int			 lp_refcount;	/* # refs */
			
 
				-	int			 lp_cpt;	/* CPT this peer attached on */
			
 
				-	/* # refs from lnet_route::lr_gateway */
			
 
				-	int			 lp_rtr_refcount;
			
 
				-	/* returned RC ping features */
			
 
				-	unsigned int		 lp_ping_feats;
			
 
				-	struct list_head	 lp_routes;	/* routers on this peer */
			
 
				-	struct lnet_rc_data	*lp_rcd;	/* router checker state */
			
 
				-};
			
 
				-
			
 
				-/* peer hash size */
			
 
				-#define LNET_PEER_HASH_BITS	9
			
 
				-#define LNET_PEER_HASH_SIZE	(1 << LNET_PEER_HASH_BITS)
			
 
				-
			
 
				-/* peer hash table */
			
 
				-struct lnet_peer_table {
			
 
				-	int			 pt_version;	/* /proc validity stamp */
			
 
				-	int			 pt_number;	/* # peers extant */
			
 
				-	/* # zombies to go to deathrow (and not there yet) */
			
 
				-	int			 pt_zombies;
			
 
				-	struct list_head	 pt_deathrow;	/* zombie peers */
			
 
				-	struct list_head	*pt_hash;	/* NID->peer hash */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * peer aliveness is enabled only on routers for peers in a network where the
			
 
				- * lnet_ni::ni_peertimeout has been set to a positive value
			
 
				- */
			
 
				-#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \
			
 
				-					 (lp)->lp_ni->ni_peertimeout > 0)
			
 
				-
			
 
				-struct lnet_route {
			
 
				-	struct list_head	 lr_list;	/* chain on net */
			
 
				-	struct list_head	 lr_gwlist;	/* chain on gateway */
			
 
				-	struct lnet_peer	*lr_gateway;	/* router node */
			
 
				-	__u32			 lr_net;	/* remote network number */
			
 
				-	int			 lr_seq;	/* sequence for round-robin */
			
 
				-	unsigned int		 lr_downis;	/* number of down NIs */
			
 
				-	__u32			 lr_hops;	/* how far I am */
			
 
				-	unsigned int             lr_priority;	/* route priority */
			
 
				-};
			
 
				-
			
 
				-#define LNET_REMOTE_NETS_HASH_DEFAULT	(1U << 7)
			
 
				-#define LNET_REMOTE_NETS_HASH_MAX	(1U << 16)
			
 
				-#define LNET_REMOTE_NETS_HASH_SIZE	(1 << the_lnet.ln_remote_nets_hbits)
			
 
				-
			
 
				-struct lnet_remotenet {
			
 
				-	struct list_head	lrn_list;	/* chain on
			
 
				-						 * ln_remote_nets_hash
			
 
				-						 */
			
 
				-	struct list_head	lrn_routes;	/* routes to me */
			
 
				-	__u32			lrn_net;	/* my net number */
			
 
				-};
			
 
				-
			
 
				-/** lnet message has credit and can be submitted to lnd for send/receive */
			
 
				-#define LNET_CREDIT_OK		0
			
 
				-/** lnet message is waiting for credit */
			
 
				-#define LNET_CREDIT_WAIT	1
			
 
				-
			
 
				-struct lnet_rtrbufpool {
			
 
				-	struct list_head	rbp_bufs;	/* my free buffer pool */
			
 
				-	struct list_head	rbp_msgs;	/* messages blocking
			
 
				-						 * for a buffer
			
 
				-						 */
			
 
				-	int			rbp_npages;	/* # pages in each buffer */
			
 
				-	/* requested number of buffers */
			
 
				-	int			rbp_req_nbuffers;
			
 
				-	/* # buffers actually allocated */
			
 
				-	int			rbp_nbuffers;
			
 
				-	int			rbp_credits;	/* # free buffers
			
 
				-						 * blocked messages
			
 
				-						 */
			
 
				-	int			rbp_mincredits;	/* low water mark */
			
 
				-};
			
 
				-
			
 
				-struct lnet_rtrbuf {
			
 
				-	struct list_head	 rb_list;	/* chain on rbp_bufs */
			
 
				-	struct lnet_rtrbufpool	*rb_pool;	/* owning pool */
			
 
				-	struct bio_vec		 rb_kiov[0];	/* the buffer space */
			
 
				-};
			
 
				-
			
 
				-#define LNET_PEER_HASHSIZE	503	/* prime! */
			
 
				-
			
 
				-#define LNET_TINY_BUF_IDX	0
			
 
				-#define LNET_SMALL_BUF_IDX	1
			
 
				-#define LNET_LARGE_BUF_IDX	2
			
 
				-
			
 
				-/* # different router buffer pools */
			
 
				-#define LNET_NRBPOOLS		(LNET_LARGE_BUF_IDX + 1)
			
 
				-
			
 
				-enum lnet_match_flags {
			
 
				-	/* Didn't match anything */
			
 
				-	LNET_MATCHMD_NONE	= BIT(0),
			
 
				-	/* Matched OK */
			
 
				-	LNET_MATCHMD_OK		= BIT(1),
			
 
				-	/* Must be discarded */
			
 
				-	LNET_MATCHMD_DROP	= BIT(2),
			
 
				-	/* match and buffer is exhausted */
			
 
				-	LNET_MATCHMD_EXHAUSTED	= BIT(3),
			
 
				-	/* match or drop */
			
 
				-	LNET_MATCHMD_FINISH	= (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
			
 
				-};
			
 
				-
			
 
				-/* Options for lnet_portal::ptl_options */
			
 
				-#define LNET_PTL_LAZY		BIT(0)
			
 
				-#define LNET_PTL_MATCH_UNIQUE	BIT(1)	/* unique match, for RDMA */
			
 
				-#define LNET_PTL_MATCH_WILDCARD	BIT(2)	/* wildcard match, request portal */
			
 
				-
			
 
				-/* parameter for matching operations (GET, PUT) */
			
 
				-struct lnet_match_info {
			
 
				-	__u64			mi_mbits;
			
 
				-	struct lnet_process_id	mi_id;
			
 
				-	unsigned int		mi_opc;
			
 
				-	unsigned int		mi_portal;
			
 
				-	unsigned int		mi_rlength;
			
 
				-	unsigned int		mi_roffset;
			
 
				-};
			
 
				-
			
 
				-/* ME hash of RDMA portal */
			
 
				-#define LNET_MT_HASH_BITS		8
			
 
				-#define LNET_MT_HASH_SIZE		(1 << LNET_MT_HASH_BITS)
			
 
				-#define LNET_MT_HASH_MASK		(LNET_MT_HASH_SIZE - 1)
			
 
				-/*
			
 
				- * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
			
 
				- * the last entry is reserved for MEs with ignore-bits
			
 
				- */
			
 
				-#define LNET_MT_HASH_IGNORE		LNET_MT_HASH_SIZE
			
 
				-/*
			
 
				- * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
			
 
				- * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
			
 
				- * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE]
			
 
				- */
			
 
				-#define LNET_MT_BITS_U64		6	/* 2^6 bits */
			
 
				-#define LNET_MT_EXHAUSTED_BITS		(LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
			
 
				-#define LNET_MT_EXHAUSTED_BMAP		((1 << LNET_MT_EXHAUSTED_BITS) + 1)
			
 
				-
			
 
				-/* portal match table */
			
 
				-struct lnet_match_table {
			
 
				-	/* reserved for upcoming patches, CPU partition ID */
			
 
				-	unsigned int		 mt_cpt;
			
 
				-	unsigned int		 mt_portal;	/* portal index */
			
 
				-	/*
			
 
				-	 * match table is set as "enabled" if there's non-exhausted MD
			
 
				-	 * attached on mt_mhash, it's only valid for wildcard portal
			
 
				-	 */
			
 
				-	unsigned int		 mt_enabled;
			
 
				-	/* bitmap to flag whether MEs on mt_hash are exhausted or not */
			
 
				-	__u64			 mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
			
 
				-	struct list_head	*mt_mhash;	/* matching hash */
			
 
				-};
			
 
				-
			
 
				-/* these are only useful for wildcard portal */
			
 
				-/* Turn off message rotor for wildcard portals */
			
 
				-#define	LNET_PTL_ROTOR_OFF	0
			
 
				-/* round-robin dispatch all PUT messages for wildcard portals */
			
 
				-#define	LNET_PTL_ROTOR_ON	1
			
 
				-/* round-robin dispatch routed PUT message for wildcard portals */
			
 
				-#define	LNET_PTL_ROTOR_RR_RT	2
			
 
				-/* dispatch routed PUT message by hashing source NID for wildcard portals */
			
 
				-#define	LNET_PTL_ROTOR_HASH_RT	3
			
 
				-
			
 
				-struct lnet_portal {
			
 
				-	spinlock_t		  ptl_lock;
			
 
				-	unsigned int		  ptl_index;	/* portal ID, reserved */
			
 
				-	/* flags on this portal: lazy, unique... */
			
 
				-	unsigned int		  ptl_options;
			
 
				-	/* list of messages which are stealing buffer */
			
 
				-	struct list_head	  ptl_msg_stealing;
			
 
				-	/* messages blocking for MD */
			
 
				-	struct list_head	  ptl_msg_delayed;
			
 
				-	/* Match table for each CPT */
			
 
				-	struct lnet_match_table	**ptl_mtables;
			
 
				-	/* spread rotor of incoming "PUT" */
			
 
				-	unsigned int		  ptl_rotor;
			
 
				-	/* # active entries for this portal */
			
 
				-	int			  ptl_mt_nmaps;
			
 
				-	/* array of active entries' cpu-partition-id */
			
 
				-	int			  ptl_mt_maps[0];
			
 
				-};
			
 
				-
			
 
				-#define LNET_LH_HASH_BITS	12
			
 
				-#define LNET_LH_HASH_SIZE	(1ULL << LNET_LH_HASH_BITS)
			
 
				-#define LNET_LH_HASH_MASK	(LNET_LH_HASH_SIZE - 1)
			
 
				-
			
 
				-/* resource container (ME, MD, EQ) */
			
 
				-struct lnet_res_container {
			
 
				-	unsigned int		 rec_type;	/* container type */
			
 
				-	__u64			 rec_lh_cookie;	/* cookie generator */
			
 
				-	struct list_head	 rec_active;	/* active resource list */
			
 
				-	struct list_head	*rec_lh_hash;	/* handle hash */
			
 
				-};
			
 
				-
			
 
				-/* message container */
			
 
				-struct lnet_msg_container {
			
 
				-	int			  msc_init;	/* initialized or not */
			
 
				-	/* max # threads finalizing */
			
 
				-	int			  msc_nfinalizers;
			
 
				-	/* msgs waiting to complete finalizing */
			
 
				-	struct list_head	  msc_finalizing;
			
 
				-	struct list_head	  msc_active;	/* active message list */
			
 
				-	/* threads doing finalization */
			
 
				-	void			**msc_finalizers;
			
 
				-};
			
 
				-
			
 
				-/* Router Checker states */
			
 
				-#define LNET_RC_STATE_SHUTDOWN		0	/* not started */
			
 
				-#define LNET_RC_STATE_RUNNING		1	/* started up OK */
			
 
				-#define LNET_RC_STATE_STOPPING		2	/* telling thread to stop */
			
 
				-
			
 
				-struct lnet {
			
 
				-	/* CPU partition table of LNet */
			
 
				-	struct cfs_cpt_table		 *ln_cpt_table;
			
 
				-	/* number of CPTs in ln_cpt_table */
			
 
				-	unsigned int			  ln_cpt_number;
			
 
				-	unsigned int			  ln_cpt_bits;
			
 
				-
			
 
				-	/* protect LNet resources (ME/MD/EQ) */
			
 
				-	struct cfs_percpt_lock		 *ln_res_lock;
			
 
				-	/* # portals */
			
 
				-	int				  ln_nportals;
			
 
				-	/* the vector of portals */
			
 
				-	struct lnet_portal		**ln_portals;
			
 
				-	/* percpt ME containers */
			
 
				-	struct lnet_res_container	**ln_me_containers;
			
 
				-	/* percpt MD container */
			
 
				-	struct lnet_res_container	**ln_md_containers;
			
 
				-
			
 
				-	/* Event Queue container */
			
 
				-	struct lnet_res_container	  ln_eq_container;
			
 
				-	wait_queue_head_t		  ln_eq_waitq;
			
 
				-	spinlock_t			  ln_eq_wait_lock;
			
 
				-	unsigned int			  ln_remote_nets_hbits;
			
 
				-
			
 
				-	/* protect NI, peer table, credits, routers, rtrbuf... */
			
 
				-	struct cfs_percpt_lock		 *ln_net_lock;
			
 
				-	/* percpt message containers for active/finalizing/freed message */
			
 
				-	struct lnet_msg_container	**ln_msg_containers;
			
 
				-	struct lnet_counters		**ln_counters;
			
 
				-	struct lnet_peer_table		**ln_peer_tables;
			
 
				-	/* failure simulation */
			
 
				-	struct list_head		  ln_test_peers;
			
 
				-	struct list_head		  ln_drop_rules;
			
 
				-	struct list_head		  ln_delay_rules;
			
 
				-
			
 
				-	struct list_head		  ln_nis;	/* LND instances */
			
 
				-	/* NIs bond on specific CPT(s) */
			
 
				-	struct list_head		  ln_nis_cpt;
			
 
				-	/* dying LND instances */
			
 
				-	struct list_head		  ln_nis_zombie;
			
 
				-	struct lnet_ni			 *ln_loni;	/* the loopback NI */
			
 
				-
			
 
				-	/* remote networks with routes to them */
			
 
				-	struct list_head		 *ln_remote_nets_hash;
			
 
				-	/* validity stamp */
			
 
				-	__u64				  ln_remote_nets_version;
			
 
				-	/* list of all known routers */
			
 
				-	struct list_head		  ln_routers;
			
 
				-	/* validity stamp */
			
 
				-	__u64				  ln_routers_version;
			
 
				-	/* percpt router buffer pools */
			
 
				-	struct lnet_rtrbufpool		**ln_rtrpools;
			
 
				-
			
 
				-	struct lnet_handle_md		  ln_ping_target_md;
			
 
				-	struct lnet_handle_eq		  ln_ping_target_eq;
			
 
				-	struct lnet_ping_info		 *ln_ping_info;
			
 
				-
			
 
				-	/* router checker startup/shutdown state */
			
 
				-	int				  ln_rc_state;
			
 
				-	/* router checker's event queue */
			
 
				-	struct lnet_handle_eq		  ln_rc_eqh;
			
 
				-	/* rcd still pending on net */
			
 
				-	struct list_head		  ln_rcd_deathrow;
			
 
				-	/* rcd ready for free */
			
 
				-	struct list_head		  ln_rcd_zombie;
			
 
				-	/* serialise startup/shutdown */
			
 
				-	struct completion		  ln_rc_signal;
			
 
				-
			
 
				-	struct mutex			  ln_api_mutex;
			
 
				-	struct mutex			  ln_lnd_mutex;
			
 
				-	struct mutex			  ln_delay_mutex;
			
 
				-	/* Have I called LNetNIInit myself? */
			
 
				-	int				  ln_niinit_self;
			
 
				-	/* LNetNIInit/LNetNIFini counter */
			
 
				-	int				  ln_refcount;
			
 
				-	/* shutdown in progress */
			
 
				-	int				  ln_shutdown;
			
 
				-
			
 
				-	int				  ln_routing;	/* am I a router? */
			
 
				-	lnet_pid_t			  ln_pid;	/* requested pid */
			
 
				-	/* uniquely identifies this ni in this epoch */
			
 
				-	__u64				  ln_interface_cookie;
			
 
				-	/* registered LNDs */
			
 
				-	struct list_head		  ln_lnds;
			
 
				-
			
 
				-	/* test protocol compatibility flags */
			
 
				-	int				  ln_testprotocompat;
			
 
				-
			
 
				-	/*
			
 
				-	 * 0 - load the NIs from the mod params
			
 
				-	 * 1 - do not load the NIs from the mod params
			
 
				-	 * Reverse logic to ensure that other calls to LNetNIInit
			
 
				-	 * need no change
			
 
				-	 */
			
 
				-	bool				  ln_nis_from_mod_params;
			
 
				-
			
 
				-	/*
			
 
				-	 * waitq for router checker.  As long as there are no routes in
			
 
				-	 * the list, the router checker will sleep on this queue.  when
			
 
				-	 * routes are added the thread will wake up
			
 
				-	 */
			
 
				-	wait_queue_head_t		  ln_rc_waitq;
			
 
				-
			
 
				-};
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/socklnd.h
+++ b/drivers/staging/lustre/include/linux/lnet/socklnd.h
@@ -1,87 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012 - 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- *
			
 
				- * lnet/include/lnet/socklnd.h
			
 
				- */
			
 
				-#ifndef __LNET_LNET_SOCKLND_H__
			
 
				-#define __LNET_LNET_SOCKLND_H__
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-#include <uapi/linux/lnet/socklnd.h>
			
 
				-
			
 
				-struct ksock_hello_msg {
			
 
				-	__u32		kshm_magic;	/* magic number of socklnd message */
			
 
				-	__u32		kshm_version;	/* version of socklnd message */
			
 
				-	lnet_nid_t      kshm_src_nid;	/* sender's nid */
			
 
				-	lnet_nid_t	kshm_dst_nid;	/* destination nid */
			
 
				-	lnet_pid_t	kshm_src_pid;	/* sender's pid */
			
 
				-	lnet_pid_t	kshm_dst_pid;	/* destination pid */
			
 
				-	__u64		kshm_src_incarnation; /* sender's incarnation */
			
 
				-	__u64		kshm_dst_incarnation; /* destination's incarnation */
			
 
				-	__u32		kshm_ctype;	/* connection type */
			
 
				-	__u32		kshm_nips;	/* # IP addrs */
			
 
				-	__u32		kshm_ips[0];	/* IP addrs */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct ksock_lnet_msg {
			
 
				-	struct lnet_hdr	ksnm_hdr;	/* lnet hdr */
			
 
				-
			
 
				-	/*
			
 
				-	 * ksnm_payload is removed because of winnt compiler's limitation:
			
 
				-	 * zero-sized array can only be placed at the tail of [nested]
			
 
				-	 * structure definitions. lnet payload will be stored just after
			
 
				-	 * the body of structure ksock_lnet_msg_t
			
 
				-	 */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct ksock_msg {
			
 
				-	__u32	ksm_type;		/* type of socklnd message */
			
 
				-	__u32	ksm_csum;		/* checksum if != 0 */
			
 
				-	__u64	ksm_zc_cookies[2];	/* Zero-Copy request/ACK cookie */
			
 
				-	union {
			
 
				-		struct ksock_lnet_msg lnetmsg; /* lnet message, it's empty if
			
 
				-						* it's NOOP
			
 
				-						*/
			
 
				-	} WIRE_ATTR ksm_u;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-#define KSOCK_MSG_NOOP	0xC0	/* ksm_u empty */
			
 
				-#define KSOCK_MSG_LNET	0xC1	/* lnet msg */
			
 
				-
			
 
				-/*
			
 
				- * We need to know this number to parse hello msg from ksocklnd in
			
 
				- * other LND (usocklnd, for example)
			
 
				- */
			
 
				-#define KSOCK_PROTO_V2	2
			
 
				-#define KSOCK_PROTO_V3	3
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h
@@ -1,149 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2014, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_debug.h
			
 
				- *
			
 
				- * Debug messages and assertions
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef __UAPI_LIBCFS_DEBUG_H__
			
 
				-#define __UAPI_LIBCFS_DEBUG_H__
			
 
				-
			
 
				-/**
			
 
				- * Format for debug message headers
			
 
				- */
			
 
				-struct ptldebug_header {
			
 
				-	__u32 ph_len;
			
 
				-	__u32 ph_flags;
			
 
				-	__u32 ph_subsys;
			
 
				-	__u32 ph_mask;
			
 
				-	__u16 ph_cpu_id;
			
 
				-	__u16 ph_type;
			
 
				-	/* time_t overflow in 2106 */
			
 
				-	__u32 ph_sec;
			
 
				-	__u64 ph_usec;
			
 
				-	__u32 ph_stack;
			
 
				-	__u32 ph_pid;
			
 
				-	__u32 ph_extern_pid;
			
 
				-	__u32 ph_line_num;
			
 
				-} __attribute__((packed));
			
 
				-
			
 
				-#define PH_FLAG_FIRST_RECORD	1
			
 
				-
			
 
				-/* Debugging subsystems (32 bits, non-overlapping) */
			
 
				-#define S_UNDEFINED     0x00000001
			
 
				-#define S_MDC           0x00000002
			
 
				-#define S_MDS           0x00000004
			
 
				-#define S_OSC           0x00000008
			
 
				-#define S_OST           0x00000010
			
 
				-#define S_CLASS         0x00000020
			
 
				-#define S_LOG           0x00000040
			
 
				-#define S_LLITE         0x00000080
			
 
				-#define S_RPC           0x00000100
			
 
				-#define S_MGMT          0x00000200
			
 
				-#define S_LNET          0x00000400
			
 
				-#define S_LND           0x00000800 /* ALL LNDs */
			
 
				-#define S_PINGER        0x00001000
			
 
				-#define S_FILTER        0x00002000
			
 
				-#define S_LIBCFS        0x00004000
			
 
				-#define S_ECHO          0x00008000
			
 
				-#define S_LDLM          0x00010000
			
 
				-#define S_LOV           0x00020000
			
 
				-#define S_LQUOTA        0x00040000
			
 
				-#define S_OSD           0x00080000
			
 
				-#define S_LFSCK         0x00100000
			
 
				-#define S_SNAPSHOT      0x00200000
			
 
				-/* unused */
			
 
				-#define S_LMV           0x00800000 /* b_new_cmd */
			
 
				-/* unused */
			
 
				-#define S_SEC           0x02000000 /* upcall cache */
			
 
				-#define S_GSS           0x04000000 /* b_new_cmd */
			
 
				-/* unused */
			
 
				-#define S_MGC           0x10000000
			
 
				-#define S_MGS           0x20000000
			
 
				-#define S_FID           0x40000000 /* b_new_cmd */
			
 
				-#define S_FLD           0x80000000 /* b_new_cmd */
			
 
				-
			
 
				-#define LIBCFS_DEBUG_SUBSYS_NAMES {					\
			
 
				-	"undefined", "mdc", "mds", "osc", "ost", "class", "log",	\
			
 
				-	"llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter",	\
			
 
				-	"libcfs", "echo", "ldlm", "lov", "lquota", "osd", "lfsck",	\
			
 
				-	"snapshot", "", "lmv", "", "sec", "gss", "", "mgc", "mgs",	\
			
 
				-	"fid", "fld", NULL }
			
 
				-
			
 
				-/* Debugging masks (32 bits, non-overlapping) */
			
 
				-#define D_TRACE         0x00000001 /* ENTRY/EXIT markers */
			
 
				-#define D_INODE         0x00000002
			
 
				-#define D_SUPER         0x00000004
			
 
				-#define D_EXT2          0x00000008 /* anything from ext2_debug */
			
 
				-#define D_MALLOC        0x00000010 /* print malloc, free information */
			
 
				-#define D_CACHE         0x00000020 /* cache-related items */
			
 
				-#define D_INFO          0x00000040 /* general information */
			
 
				-#define D_IOCTL         0x00000080 /* ioctl related information */
			
 
				-#define D_NETERROR      0x00000100 /* network errors */
			
 
				-#define D_NET           0x00000200 /* network communications */
			
 
				-#define D_WARNING       0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
			
 
				-#define D_BUFFS         0x00000800
			
 
				-#define D_OTHER         0x00001000
			
 
				-#define D_DENTRY        0x00002000
			
 
				-#define D_NETTRACE      0x00004000
			
 
				-#define D_PAGE          0x00008000 /* bulk page handling */
			
 
				-#define D_DLMTRACE      0x00010000
			
 
				-#define D_ERROR         0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
			
 
				-#define D_EMERG         0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
			
 
				-#define D_HA            0x00080000 /* recovery and failover */
			
 
				-#define D_RPCTRACE      0x00100000 /* for distributed debugging */
			
 
				-#define D_VFSTRACE      0x00200000
			
 
				-#define D_READA         0x00400000 /* read-ahead */
			
 
				-#define D_MMAP          0x00800000
			
 
				-#define D_CONFIG        0x01000000
			
 
				-#define D_CONSOLE       0x02000000
			
 
				-#define D_QUOTA         0x04000000
			
 
				-#define D_SEC           0x08000000
			
 
				-#define D_LFSCK         0x10000000 /* For both OI scrub and LFSCK */
			
 
				-#define D_HSM           0x20000000
			
 
				-#define D_SNAPSHOT      0x40000000 /* snapshot */
			
 
				-#define D_LAYOUT        0x80000000
			
 
				-
			
 
				-#define LIBCFS_DEBUG_MASKS_NAMES {					\
			
 
				-	"trace", "inode", "super", "ext2", "malloc", "cache", "info",	\
			
 
				-	"ioctl", "neterror", "net", "warning", "buffs", "other",	\
			
 
				-	"dentry", "nettrace", "page", "dlmtrace", "error", "emerg",	\
			
 
				-	"ha", "rpctrace", "vfstrace", "reada", "mmap", "config",	\
			
 
				-	"console", "quota", "sec", "lfsck", "hsm", "snapshot", "layout",\
			
 
				-	NULL }
			
 
				-
			
 
				-#define D_CANTMASK   (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
			
 
				-
			
 
				-#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
			
 
				-
			
 
				-#endif	/* __UAPI_LIBCFS_DEBUG_H__ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
@@ -1,141 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/include/libcfs/libcfs_ioctl.h
			
 
				- *
			
 
				- * Low-level ioctl data structures. Kernel ioctl functions declared here,
			
 
				- * and user space functions are in libcfs/util/ioctl.h.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_IOCTL_H__
			
 
				-#define __LIBCFS_IOCTL_H__
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/ioctl.h>
			
 
				-
			
 
				-#define LIBCFS_IOCTL_VERSION	0x0001000a
			
 
				-#define LIBCFS_IOCTL_VERSION2	0x0001000b
			
 
				-
			
 
				-struct libcfs_ioctl_hdr {
			
 
				-	__u32 ioc_len;
			
 
				-	__u32 ioc_version;
			
 
				-};
			
 
				-
			
 
				-/** max size to copy from userspace */
			
 
				-#define LIBCFS_IOC_DATA_MAX	(128 * 1024)
			
 
				-
			
 
				-struct libcfs_ioctl_data {
			
 
				-	struct libcfs_ioctl_hdr ioc_hdr;
			
 
				-
			
 
				-	__u64 ioc_nid;
			
 
				-	__u64 ioc_u64[1];
			
 
				-
			
 
				-	__u32 ioc_flags;
			
 
				-	__u32 ioc_count;
			
 
				-	__u32 ioc_net;
			
 
				-	__u32 ioc_u32[7];
			
 
				-
			
 
				-	__u32 ioc_inllen1;
			
 
				-	char *ioc_inlbuf1;
			
 
				-	__u32 ioc_inllen2;
			
 
				-	char *ioc_inlbuf2;
			
 
				-
			
 
				-	__u32 ioc_plen1; /* buffers in userspace */
			
 
				-	void __user *ioc_pbuf1;
			
 
				-	__u32 ioc_plen2; /* buffers in userspace */
			
 
				-	void __user *ioc_pbuf2;
			
 
				-
			
 
				-	char ioc_bulk[0];
			
 
				-};
			
 
				-
			
 
				-struct libcfs_debug_ioctl_data {
			
 
				-	struct libcfs_ioctl_hdr hdr;
			
 
				-	unsigned int subs;
			
 
				-	unsigned int debug;
			
 
				-};
			
 
				-
			
 
				-/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */
			
 
				-#define LIBCFS_IOC_DEBUG_MASK		   _IOWR('f', 250, long)
			
 
				-#define IOCTL_LIBCFS_TYPE		   long
			
 
				-
			
 
				-#define IOC_LIBCFS_TYPE			   ('e')
			
 
				-#define IOC_LIBCFS_MIN_NR		   30
			
 
				-/* libcfs ioctls */
			
 
				-/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */
			
 
				-#define IOC_LIBCFS_CLEAR_DEBUG		   _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_MARK_DEBUG		   _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
			
 
				-/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */
			
 
				-/* lnet ioctls */
			
 
				-#define IOC_LIBCFS_GET_NI		   _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_FAIL_NID		   _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_NOTIFY_ROUTER	   _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_UNCONFIGURE		   _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
			
 
				-/*	 IOC_LIBCFS_PORTALS_COMPATIBILITY  _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */
			
 
				-#define IOC_LIBCFS_LNET_DIST		   _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_CONFIGURE		   _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_TESTPROTOCOMPAT	   _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_PING			   _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
			
 
				-/*	IOC_LIBCFS_DEBUG_PEER		   _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */
			
 
				-#define IOC_LIBCFS_LNETST		   _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_LNET_FAULT		   _IOWR('e', 64, IOCTL_LIBCFS_TYPE)
			
 
				-/* lnd ioctls */
			
 
				-#define IOC_LIBCFS_REGISTER_MYNID	   _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_CLOSE_CONNECTION	   _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_PUSH_CONNECTION	   _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_GET_CONN		   _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_DEL_PEER		   _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_ADD_PEER		   _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_GET_PEER		   _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
			
 
				-/* ioctl 77 is free for use */
			
 
				-#define IOC_LIBCFS_ADD_INTERFACE	   _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_DEL_INTERFACE	   _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
			
 
				-#define IOC_LIBCFS_GET_INTERFACE	   _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
			
 
				-
			
 
				-/*
			
 
				- * DLC Specific IOCTL numbers.
			
 
				- * In order to maintain backward compatibility with any possible external
			
 
				- * tools which might be accessing the IOCTL numbers, a new group of IOCTL
			
 
				- * number have been allocated.
			
 
				- */
			
 
				-#define IOCTL_CONFIG_SIZE		struct lnet_ioctl_config_data
			
 
				-#define IOC_LIBCFS_ADD_ROUTE		_IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_DEL_ROUTE		_IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_GET_ROUTE		_IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_ADD_NET		_IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_DEL_NET		_IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_GET_NET		_IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_CONFIG_RTR		_IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_ADD_BUF		_IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_GET_BUF		_IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_GET_PEER_INFO	_IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_GET_LNET_STATS	_IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE)
			
 
				-#define IOC_LIBCFS_MAX_NR		91
			
 
				-
			
 
				-#endif /* __LIBCFS_IOCTL_H__ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
@@ -1,150 +0,0 @@
 
				-/*
			
 
				- * LGPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This library is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU Lesser General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2.1 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This library is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
			
 
				- * Lesser General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU Lesser General Public
			
 
				- * License along with this library.
			
 
				- *
			
 
				- * LGPL HEADER END
			
 
				- *
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2014, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * Author: Amir Shehata <amir.shehata@intel.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef LNET_DLC_H
			
 
				-#define LNET_DLC_H
			
 
				-
			
 
				-#include <uapi/linux/lnet/libcfs_ioctl.h>
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-
			
 
				-#define MAX_NUM_SHOW_ENTRIES	32
			
 
				-#define LNET_MAX_STR_LEN	128
			
 
				-#define LNET_MAX_SHOW_NUM_CPT	128
			
 
				-#define LNET_UNDEFINED_HOPS	((__u32)(-1))
			
 
				-
			
 
				-struct lnet_ioctl_config_lnd_cmn_tunables {
			
 
				-	__u32 lct_version;
			
 
				-	__u32 lct_peer_timeout;
			
 
				-	__u32 lct_peer_tx_credits;
			
 
				-	__u32 lct_peer_rtr_credits;
			
 
				-	__u32 lct_max_tx_credits;
			
 
				-};
			
 
				-
			
 
				-struct lnet_ioctl_config_o2iblnd_tunables {
			
 
				-	__u32 lnd_version;
			
 
				-	__u32 lnd_peercredits_hiw;
			
 
				-	__u32 lnd_map_on_demand;
			
 
				-	__u32 lnd_concurrent_sends;
			
 
				-	__u32 lnd_fmr_pool_size;
			
 
				-	__u32 lnd_fmr_flush_trigger;
			
 
				-	__u32 lnd_fmr_cache;
			
 
				-	__u16 lnd_conns_per_peer;
			
 
				-	__u16 pad;
			
 
				-};
			
 
				-
			
 
				-struct lnet_ioctl_config_lnd_tunables {
			
 
				-	struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
			
 
				-	union {
			
 
				-		struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
			
 
				-	} lt_tun_u;
			
 
				-};
			
 
				-
			
 
				-struct lnet_ioctl_net_config {
			
 
				-	char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
			
 
				-	__u32 ni_status;
			
 
				-	__u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT];
			
 
				-	char cfg_bulk[0];
			
 
				-};
			
 
				-
			
 
				-#define LNET_TINY_BUF_IDX	0
			
 
				-#define LNET_SMALL_BUF_IDX	1
			
 
				-#define LNET_LARGE_BUF_IDX	2
			
 
				-
			
 
				-/* # different router buffer pools */
			
 
				-#define LNET_NRBPOOLS		(LNET_LARGE_BUF_IDX + 1)
			
 
				-
			
 
				-struct lnet_ioctl_pool_cfg {
			
 
				-	struct {
			
 
				-		__u32 pl_npages;
			
 
				-		__u32 pl_nbuffers;
			
 
				-		__u32 pl_credits;
			
 
				-		__u32 pl_mincredits;
			
 
				-	} pl_pools[LNET_NRBPOOLS];
			
 
				-	__u32 pl_routing;
			
 
				-};
			
 
				-
			
 
				-struct lnet_ioctl_config_data {
			
 
				-	struct libcfs_ioctl_hdr cfg_hdr;
			
 
				-
			
 
				-	__u32 cfg_net;
			
 
				-	__u32 cfg_count;
			
 
				-	__u64 cfg_nid;
			
 
				-	__u32 cfg_ncpts;
			
 
				-
			
 
				-	union {
			
 
				-		struct {
			
 
				-			__u32 rtr_hop;
			
 
				-			__u32 rtr_priority;
			
 
				-			__u32 rtr_flags;
			
 
				-		} cfg_route;
			
 
				-		struct {
			
 
				-			char net_intf[LNET_MAX_STR_LEN];
			
 
				-			__s32 net_peer_timeout;
			
 
				-			__s32 net_peer_tx_credits;
			
 
				-			__s32 net_peer_rtr_credits;
			
 
				-			__s32 net_max_tx_credits;
			
 
				-			__u32 net_cksum_algo;
			
 
				-			__u32 net_interface_count;
			
 
				-		} cfg_net;
			
 
				-		struct {
			
 
				-			__u32 buf_enable;
			
 
				-			__s32 buf_tiny;
			
 
				-			__s32 buf_small;
			
 
				-			__s32 buf_large;
			
 
				-		} cfg_buffers;
			
 
				-	} cfg_config_u;
			
 
				-
			
 
				-	char cfg_bulk[0];
			
 
				-};
			
 
				-
			
 
				-struct lnet_ioctl_peer {
			
 
				-	struct libcfs_ioctl_hdr pr_hdr;
			
 
				-	__u32 pr_count;
			
 
				-	__u32 pr_pad;
			
 
				-	__u64 pr_nid;
			
 
				-
			
 
				-	union {
			
 
				-		struct {
			
 
				-			char cr_aliveness[LNET_MAX_STR_LEN];
			
 
				-			__u32 cr_refcount;
			
 
				-			__u32 cr_ni_peer_tx_credits;
			
 
				-			__u32 cr_peer_tx_credits;
			
 
				-			__u32 cr_peer_rtr_credits;
			
 
				-			__u32 cr_peer_min_rtr_credits;
			
 
				-			__u32 cr_peer_tx_qnob;
			
 
				-			__u32 cr_ncpt;
			
 
				-		} pr_peer_credits;
			
 
				-	} pr_lnd_u;
			
 
				-};
			
 
				-
			
 
				-struct lnet_ioctl_lnet_stats {
			
 
				-	struct libcfs_ioctl_hdr st_hdr;
			
 
				-	struct lnet_counters st_cntrs;
			
 
				-};
			
 
				-
			
 
				-#endif /* LNET_DLC_H */
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h
@@ -1,669 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012 - 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef __LNET_TYPES_H__
			
 
				-#define __LNET_TYPES_H__
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/bvec.h>
			
 
				-
			
 
				-/** \addtogroup lnet
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-#define LNET_VERSION		"0.6.0"
			
 
				-
			
 
				-/** \addtogroup lnet_addr
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/** Portal reserved for LNet's own use.
			
 
				- * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
			
 
				- */
			
 
				-#define LNET_RESERVED_PORTAL	0
			
 
				-
			
 
				-/**
			
 
				- * Address of an end-point in an LNet network.
			
 
				- *
			
 
				- * A node can have multiple end-points and hence multiple addresses.
			
 
				- * An LNet network can be a simple network (e.g. tcp0) or a network of
			
 
				- * LNet networks connected by LNet routers. Therefore an end-point address
			
 
				- * has two parts: network ID, and address within a network.
			
 
				- *
			
 
				- * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
			
 
				- */
			
 
				-typedef __u64 lnet_nid_t;
			
 
				-/**
			
 
				- * ID of a process in a node. Shortened as PID to distinguish from
			
 
				- * lnet_process_id, the global process ID.
			
 
				- */
			
 
				-typedef __u32 lnet_pid_t;
			
 
				-
			
 
				-/** wildcard NID that matches any end-point address */
			
 
				-#define LNET_NID_ANY	((lnet_nid_t)(-1))
			
 
				-/** wildcard PID that matches any lnet_pid_t */
			
 
				-#define LNET_PID_ANY	((lnet_pid_t)(-1))
			
 
				-
			
 
				-#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
			
 
				-#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
			
 
				-#define LNET_PID_LUSTRE	  12345
			
 
				-
			
 
				-#define LNET_TIME_FOREVER (-1)
			
 
				-
			
 
				-/* how an LNET NID encodes net:address */
			
 
				-/** extract the address part of an lnet_nid_t */
			
 
				-
			
 
				-static inline __u32 LNET_NIDADDR(lnet_nid_t nid)
			
 
				-{
			
 
				-	return nid & 0xffffffff;
			
 
				-}
			
 
				-
			
 
				-static inline __u32 LNET_NIDNET(lnet_nid_t nid)
			
 
				-{
			
 
				-	return (nid >> 32) & 0xffffffff;
			
 
				-}
			
 
				-
			
 
				-static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr)
			
 
				-{
			
 
				-	return (((__u64)net) << 32) | addr;
			
 
				-}
			
 
				-
			
 
				-static inline __u32 LNET_NETNUM(__u32 net)
			
 
				-{
			
 
				-	return net & 0xffff;
			
 
				-}
			
 
				-
			
 
				-static inline __u32 LNET_NETTYP(__u32 net)
			
 
				-{
			
 
				-	return (net >> 16) & 0xffff;
			
 
				-}
			
 
				-
			
 
				-static inline __u32 LNET_MKNET(__u32 type, __u32 num)
			
 
				-{
			
 
				-	return (type << 16) | num;
			
 
				-}
			
 
				-
			
 
				-#define WIRE_ATTR	__packed
			
 
				-
			
 
				-/* Packed version of lnet_process_id to transfer via network */
			
 
				-struct lnet_process_id_packed {
			
 
				-	/* node id / process id */
			
 
				-	lnet_nid_t	nid;
			
 
				-	lnet_pid_t	pid;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-/*
			
 
				- * The wire handle's interface cookie only matches one network interface in
			
 
				- * one epoch (i.e. new cookie when the interface restarts or the node
			
 
				- * reboots).  The object cookie only matches one object on that interface
			
 
				- * during that object's lifetime (i.e. no cookie re-use).
			
 
				- */
			
 
				-struct lnet_handle_wire {
			
 
				-	__u64	wh_interface_cookie;
			
 
				-	__u64	wh_object_cookie;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-enum lnet_msg_type {
			
 
				-	LNET_MSG_ACK = 0,
			
 
				-	LNET_MSG_PUT,
			
 
				-	LNET_MSG_GET,
			
 
				-	LNET_MSG_REPLY,
			
 
				-	LNET_MSG_HELLO,
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * The variant fields of the portals message header are aligned on an 8
			
 
				- * byte boundary in the message header.  Note that all types used in these
			
 
				- * wire structs MUST be fixed size and the smaller types are placed at the
			
 
				- * end.
			
 
				- */
			
 
				-struct lnet_ack {
			
 
				-	struct lnet_handle_wire	dst_wmd;
			
 
				-	__u64			match_bits;
			
 
				-	__u32			mlength;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_put {
			
 
				-	struct lnet_handle_wire	ack_wmd;
			
 
				-	__u64			match_bits;
			
 
				-	__u64			hdr_data;
			
 
				-	__u32			ptl_index;
			
 
				-	__u32			offset;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_get {
			
 
				-	struct lnet_handle_wire	return_wmd;
			
 
				-	__u64			match_bits;
			
 
				-	__u32			ptl_index;
			
 
				-	__u32			src_offset;
			
 
				-	__u32			sink_length;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_reply {
			
 
				-	struct lnet_handle_wire	dst_wmd;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_hello {
			
 
				-	__u64			incarnation;
			
 
				-	__u32			type;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_hdr {
			
 
				-	lnet_nid_t	dest_nid;
			
 
				-	lnet_nid_t	src_nid;
			
 
				-	lnet_pid_t	dest_pid;
			
 
				-	lnet_pid_t	src_pid;
			
 
				-	__u32		type;		/* enum lnet_msg_type */
			
 
				-	__u32		payload_length;	/* payload data to follow */
			
 
				-	/*<------__u64 aligned------->*/
			
 
				-	union {
			
 
				-		struct lnet_ack		ack;
			
 
				-		struct lnet_put		put;
			
 
				-		struct lnet_get		get;
			
 
				-		struct lnet_reply	reply;
			
 
				-		struct lnet_hello	hello;
			
 
				-	} msg;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-/*
			
 
				- * A HELLO message contains a magic number and protocol version
			
 
				- * code in the header's dest_nid, the peer's NID in the src_nid, and
			
 
				- * LNET_MSG_HELLO in the type field.  All other common fields are zero
			
 
				- * (including payload_size; i.e. no payload).
			
 
				- * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
			
 
				- * running the same protocol and to find out its NID. These LNDs should
			
 
				- * exchange HELLO messages when a connection is first established.  Individual
			
 
				- * LNDs can put whatever else they fancy in struct lnet_hdr::msg.
			
 
				- */
			
 
				-struct lnet_magicversion {
			
 
				-	__u32	magic;		/* LNET_PROTO_TCP_MAGIC */
			
 
				-	__u16	version_major;	/* increment on incompatible change */
			
 
				-	__u16	version_minor;	/* increment on compatible change */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-/* PROTO MAGIC for LNDs */
			
 
				-#define LNET_PROTO_IB_MAGIC		0x0be91b91
			
 
				-#define LNET_PROTO_GNI_MAGIC		0xb00fbabe /* ask Kim */
			
 
				-#define LNET_PROTO_TCP_MAGIC		0xeebc0ded
			
 
				-#define LNET_PROTO_ACCEPTOR_MAGIC	0xacce7100
			
 
				-#define LNET_PROTO_PING_MAGIC		0x70696E67 /* 'ping' */
			
 
				-
			
 
				-/* Placeholder for a future "unified" protocol across all LNDs */
			
 
				-/*
			
 
				- * Current LNDs that receive a request with this magic will respond with a
			
 
				- * "stub" reply using their current protocol
			
 
				- */
			
 
				-#define LNET_PROTO_MAGIC		0x45726963 /* ! */
			
 
				-
			
 
				-#define LNET_PROTO_TCP_VERSION_MAJOR	1
			
 
				-#define LNET_PROTO_TCP_VERSION_MINOR	0
			
 
				-
			
 
				-/* Acceptor connection request */
			
 
				-struct lnet_acceptor_connreq {
			
 
				-	__u32	acr_magic;		/* PTL_ACCEPTOR_PROTO_MAGIC */
			
 
				-	__u32	acr_version;		/* protocol version */
			
 
				-	__u64	acr_nid;		/* target NID */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-#define LNET_PROTO_ACCEPTOR_VERSION	1
			
 
				-
			
 
				-struct lnet_ni_status {
			
 
				-	lnet_nid_t	ns_nid;
			
 
				-	__u32		ns_status;
			
 
				-	__u32		ns_unused;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_ping_info {
			
 
				-	__u32			pi_magic;
			
 
				-	__u32			pi_features;
			
 
				-	lnet_pid_t		pi_pid;
			
 
				-	__u32			pi_nnis;
			
 
				-	struct lnet_ni_status	pi_ni[0];
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct lnet_counters {
			
 
				-	__u32	msgs_alloc;
			
 
				-	__u32	msgs_max;
			
 
				-	__u32	errors;
			
 
				-	__u32	send_count;
			
 
				-	__u32	recv_count;
			
 
				-	__u32	route_count;
			
 
				-	__u32	drop_count;
			
 
				-	__u64	send_length;
			
 
				-	__u64	recv_length;
			
 
				-	__u64	route_length;
			
 
				-	__u64	drop_length;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-#define LNET_NI_STATUS_UP      0x15aac0de
			
 
				-#define LNET_NI_STATUS_DOWN    0xdeadface
			
 
				-#define LNET_NI_STATUS_INVALID 0x00000000
			
 
				-
			
 
				-#define LNET_MAX_INTERFACES    16
			
 
				-
			
 
				-/**
			
 
				- * Objects maintained by the LNet are accessed through handles. Handle types
			
 
				- * have names of the form lnet_handle_xx, where xx is one of the two letter
			
 
				- * object type codes ('eq' for event queue, 'md' for memory descriptor, and
			
 
				- * 'me' for match entry). Each type of object is given a unique handle type
			
 
				- * to enhance type checking.
			
 
				- */
			
 
				-#define LNET_WIRE_HANDLE_COOKIE_NONE   (-1)
			
 
				-
			
 
				-struct lnet_handle_eq {
			
 
				-	u64	cookie;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Invalidate eq handle @h.
			
 
				- */
			
 
				-static inline void LNetInvalidateEQHandle(struct lnet_handle_eq *h)
			
 
				-{
			
 
				-	h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Check whether eq handle @h is invalid.
			
 
				- *
			
 
				- * @return 1 if handle is invalid, 0 if valid.
			
 
				- */
			
 
				-static inline int LNetEQHandleIsInvalid(struct lnet_handle_eq h)
			
 
				-{
			
 
				-	return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
			
 
				-}
			
 
				-
			
 
				-struct lnet_handle_md {
			
 
				-	u64	cookie;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Invalidate md handle @h.
			
 
				- */
			
 
				-static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h)
			
 
				-{
			
 
				-	h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Check whether eq handle @h is invalid.
			
 
				- *
			
 
				- * @return 1 if handle is invalid, 0 if valid.
			
 
				- */
			
 
				-static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h)
			
 
				-{
			
 
				-	return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
			
 
				-}
			
 
				-
			
 
				-struct lnet_handle_me {
			
 
				-	u64	cookie;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Global process ID.
			
 
				- */
			
 
				-struct lnet_process_id {
			
 
				-	/** node id */
			
 
				-	lnet_nid_t nid;
			
 
				-	/** process id */
			
 
				-	lnet_pid_t pid;
			
 
				-};
			
 
				-/** @} lnet_addr */
			
 
				-
			
 
				-/** \addtogroup lnet_me
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Specifies whether the match entry or memory descriptor should be unlinked
			
 
				- * automatically (LNET_UNLINK) or not (LNET_RETAIN).
			
 
				- */
			
 
				-enum lnet_unlink {
			
 
				-	LNET_RETAIN = 0,
			
 
				-	LNET_UNLINK
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Values of the type lnet_ins_pos are used to control where a new match
			
 
				- * entry is inserted. The value LNET_INS_BEFORE is used to insert the new
			
 
				- * entry before the current entry or before the head of the list. The value
			
 
				- * LNET_INS_AFTER is used to insert the new entry after the current entry
			
 
				- * or after the last item in the list.
			
 
				- */
			
 
				-enum lnet_ins_pos {
			
 
				-	/** insert ME before current position or head of the list */
			
 
				-	LNET_INS_BEFORE,
			
 
				-	/** insert ME after current position or tail of the list */
			
 
				-	LNET_INS_AFTER,
			
 
				-	/** attach ME at tail of local CPU partition ME list */
			
 
				-	LNET_INS_LOCAL
			
 
				-};
			
 
				-
			
 
				-/** @} lnet_me */
			
 
				-
			
 
				-/** \addtogroup lnet_md
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Defines the visible parts of a memory descriptor. Values of this type
			
 
				- * are used to initialize memory descriptors.
			
 
				- */
			
 
				-struct lnet_md {
			
 
				-	/**
			
 
				-	 * Specify the memory region associated with the memory descriptor.
			
 
				-	 * If the options field has:
			
 
				-	 * - LNET_MD_KIOV bit set: The start field points to the starting
			
 
				-	 * address of an array of struct bio_vec and the length field specifies
			
 
				-	 * the number of entries in the array. The length can't be bigger
			
 
				-	 * than LNET_MAX_IOV. The struct bio_vec is used to describe page-based
			
 
				-	 * fragments that are not necessarily mapped in virtual memory.
			
 
				-	 * - LNET_MD_IOVEC bit set: The start field points to the starting
			
 
				-	 * address of an array of struct iovec and the length field specifies
			
 
				-	 * the number of entries in the array. The length can't be bigger
			
 
				-	 * than LNET_MAX_IOV. The struct iovec is used to describe fragments
			
 
				-	 * that have virtual addresses.
			
 
				-	 * - Otherwise: The memory region is contiguous. The start field
			
 
				-	 * specifies the starting address for the memory region and the
			
 
				-	 * length field specifies its length.
			
 
				-	 *
			
 
				-	 * When the memory region is fragmented, all fragments but the first
			
 
				-	 * one must start on page boundary, and all but the last must end on
			
 
				-	 * page boundary.
			
 
				-	 */
			
 
				-	void		*start;
			
 
				-	unsigned int	 length;
			
 
				-	/**
			
 
				-	 * Specifies the maximum number of operations that can be performed
			
 
				-	 * on the memory descriptor. An operation is any action that could
			
 
				-	 * possibly generate an event. In the usual case, the threshold value
			
 
				-	 * is decremented for each operation on the MD. When the threshold
			
 
				-	 * drops to zero, the MD becomes inactive and does not respond to
			
 
				-	 * operations. A threshold value of LNET_MD_THRESH_INF indicates that
			
 
				-	 * there is no bound on the number of operations that may be applied
			
 
				-	 * to a MD.
			
 
				-	 */
			
 
				-	int		 threshold;
			
 
				-	/**
			
 
				-	 * Specifies the largest incoming request that the memory descriptor
			
 
				-	 * should respond to. When the unused portion of a MD (length -
			
 
				-	 * local offset) falls below this value, the MD becomes inactive and
			
 
				-	 * does not respond to further operations. This value is only used
			
 
				-	 * if the LNET_MD_MAX_SIZE option is set.
			
 
				-	 */
			
 
				-	int		 max_size;
			
 
				-	/**
			
 
				-	 * Specifies the behavior of the memory descriptor. A bitwise OR
			
 
				-	 * of the following values can be used:
			
 
				-	 * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
			
 
				-	 * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
			
 
				-	 * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
			
 
				-	 *   region is provided by the incoming request. By default, the
			
 
				-	 *   offset is maintained locally. When maintained locally, the
			
 
				-	 *   offset is incremented by the length of the request so that
			
 
				-	 *   the next operation (PUT or GET) will access the next part of
			
 
				-	 *   the memory region. Note that only one offset variable exists
			
 
				-	 *   per memory descriptor. If both PUT and GET operations are
			
 
				-	 *   performed on a memory descriptor, the offset is updated each time.
			
 
				-	 * - LNET_MD_TRUNCATE: The length provided in the incoming request can
			
 
				-	 *   be reduced to match the memory available in the region (determined
			
 
				-	 *   by subtracting the offset from the length of the memory region).
			
 
				-	 *   By default, if the length in the incoming operation is greater
			
 
				-	 *   than the amount of memory available, the operation is rejected.
			
 
				-	 * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
			
 
				-	 *   incoming PUT operations, even if requested. By default,
			
 
				-	 *   acknowledgments are sent for PUT operations that request an
			
 
				-	 *   acknowledgment. Acknowledgments are never sent for GET operations.
			
 
				-	 *   The data sent in the REPLY serves as an implicit acknowledgment.
			
 
				-	 * - LNET_MD_KIOV: The start and length fields specify an array of
			
 
				-	 *   struct bio_vec.
			
 
				-	 * - LNET_MD_IOVEC: The start and length fields specify an array of
			
 
				-	 *   struct iovec.
			
 
				-	 * - LNET_MD_MAX_SIZE: The max_size field is valid.
			
 
				-	 *
			
 
				-	 * Note:
			
 
				-	 * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
			
 
				-	 *   capability for memory descriptors. They can't be both set.
			
 
				-	 * - When LNET_MD_MAX_SIZE is set, the total length of the memory
			
 
				-	 *   region (i.e. sum of all fragment lengths) must not be less than
			
 
				-	 *   \a max_size.
			
 
				-	 */
			
 
				-	unsigned int	 options;
			
 
				-	/**
			
 
				-	 * A user-specified value that is associated with the memory
			
 
				-	 * descriptor. The value does not need to be a pointer, but must fit
			
 
				-	 * in the space used by a pointer. This value is recorded in events
			
 
				-	 * associated with operations on this MD.
			
 
				-	 */
			
 
				-	void		*user_ptr;
			
 
				-	/**
			
 
				-	 * A handle for the event queue used to log the operations performed on
			
 
				-	 * the memory region. If this argument is a NULL handle (i.e. nullified
			
 
				-	 * by LNetInvalidateHandle()), operations performed on this memory
			
 
				-	 * descriptor are not logged.
			
 
				-	 */
			
 
				-	struct lnet_handle_eq eq_handle;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Max Transfer Unit (minimum supported everywhere).
			
 
				- * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
			
 
				- * these limits are system wide and not interface-local.
			
 
				- */
			
 
				-#define LNET_MTU_BITS	20
			
 
				-#define LNET_MTU	(1 << LNET_MTU_BITS)
			
 
				-
			
 
				-/** limit on the number of fragments in discontiguous MDs */
			
 
				-#define LNET_MAX_IOV	256
			
 
				-
			
 
				-/**
			
 
				- * Options for the MD structure. See lnet_md::options.
			
 
				- */
			
 
				-#define LNET_MD_OP_PUT		(1 << 0)
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_OP_GET		(1 << 1)
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_MANAGE_REMOTE	(1 << 2)
			
 
				-/* unused			(1 << 3) */
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_TRUNCATE	(1 << 4)
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_ACK_DISABLE	(1 << 5)
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_IOVEC		(1 << 6)
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_MAX_SIZE	(1 << 7)
			
 
				-/** See lnet_md::options. */
			
 
				-#define LNET_MD_KIOV		(1 << 8)
			
 
				-
			
 
				-/* For compatibility with Cray Portals */
			
 
				-#define LNET_MD_PHYS		0
			
 
				-
			
 
				-/** Infinite threshold on MD operations. See lnet_md::threshold */
			
 
				-#define LNET_MD_THRESH_INF	(-1)
			
 
				-
			
 
				-/** @} lnet_md */
			
 
				-
			
 
				-/** \addtogroup lnet_eq
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Six types of events can be logged in an event queue.
			
 
				- */
			
 
				-enum lnet_event_kind {
			
 
				-	/** An incoming GET operation has completed on the MD. */
			
 
				-	LNET_EVENT_GET		= 1,
			
 
				-	/**
			
 
				-	 * An incoming PUT operation has completed on the MD. The
			
 
				-	 * underlying layers will not alter the memory (on behalf of this
			
 
				-	 * operation) once this event has been logged.
			
 
				-	 */
			
 
				-	LNET_EVENT_PUT,
			
 
				-	/**
			
 
				-	 * A REPLY operation has completed. This event is logged after the
			
 
				-	 * data (if any) from the REPLY has been written into the MD.
			
 
				-	 */
			
 
				-	LNET_EVENT_REPLY,
			
 
				-	/** An acknowledgment has been received. */
			
 
				-	LNET_EVENT_ACK,
			
 
				-	/**
			
 
				-	 * An outgoing send (PUT or GET) operation has completed. This event
			
 
				-	 * is logged after the entire buffer has been sent and it is safe for
			
 
				-	 * the caller to reuse the buffer.
			
 
				-	 *
			
 
				-	 * Note:
			
 
				-	 * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
			
 
				-	 *   happen even when the message has not yet been put out on wire.
			
 
				-	 * - It's unsafe to assume that in an outgoing GET operation
			
 
				-	 *   the LNET_EVENT_SEND event would happen before the
			
 
				-	 *   LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
			
 
				-	 *   LNET_EVENT_ACK events in an outgoing PUT operation.
			
 
				-	 */
			
 
				-	LNET_EVENT_SEND,
			
 
				-	/**
			
 
				-	 * A MD has been unlinked. Note that LNetMDUnlink() does not
			
 
				-	 * necessarily trigger an LNET_EVENT_UNLINK event.
			
 
				-	 * \see LNetMDUnlink
			
 
				-	 */
			
 
				-	LNET_EVENT_UNLINK,
			
 
				-};
			
 
				-
			
 
				-#define LNET_SEQ_GT(a, b)      (((signed long)((a) - (b))) > 0)
			
 
				-
			
 
				-/**
			
 
				- * Information about an event on a MD.
			
 
				- */
			
 
				-struct lnet_event {
			
 
				-	/** The identifier (nid, pid) of the target. */
			
 
				-	struct lnet_process_id	target;
			
 
				-	/** The identifier (nid, pid) of the initiator. */
			
 
				-	struct lnet_process_id	initiator;
			
 
				-	/**
			
 
				-	 * The NID of the immediate sender. If the request has been forwarded
			
 
				-	 * by routers, this is the NID of the last hop; otherwise it's the
			
 
				-	 * same as the initiator.
			
 
				-	 */
			
 
				-	lnet_nid_t		sender;
			
 
				-	/** Indicates the type of the event. */
			
 
				-	enum lnet_event_kind	type;
			
 
				-	/** The portal table index specified in the request */
			
 
				-	unsigned int		pt_index;
			
 
				-	/** A copy of the match bits specified in the request. */
			
 
				-	__u64			match_bits;
			
 
				-	/** The length (in bytes) specified in the request. */
			
 
				-	unsigned int		rlength;
			
 
				-	/**
			
 
				-	 * The length (in bytes) of the data that was manipulated by the
			
 
				-	 * operation. For truncated operations, the manipulated length will be
			
 
				-	 * the number of bytes specified by the MD (possibly with an offset,
			
 
				-	 * see lnet_md). For all other operations, the manipulated length
			
 
				-	 * will be the length of the requested operation, i.e. rlength.
			
 
				-	 */
			
 
				-	unsigned int		mlength;
			
 
				-	/**
			
 
				-	 * The handle to the MD associated with the event. The handle may be
			
 
				-	 * invalid if the MD has been unlinked.
			
 
				-	 */
			
 
				-	struct lnet_handle_md	md_handle;
			
 
				-	/**
			
 
				-	 * A snapshot of the state of the MD immediately after the event has
			
 
				-	 * been processed. In particular, the threshold field in md will
			
 
				-	 * reflect the value of the threshold after the operation occurred.
			
 
				-	 */
			
 
				-	struct lnet_md		md;
			
 
				-	/**
			
 
				-	 * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
			
 
				-	 * \see LNetPut
			
 
				-	 */
			
 
				-	__u64			hdr_data;
			
 
				-	/**
			
 
				-	 * Indicates the completion status of the operation. It's 0 for
			
 
				-	 * successful operations, otherwise it's an error code.
			
 
				-	 */
			
 
				-	int			status;
			
 
				-	/**
			
 
				-	 * Indicates whether the MD has been unlinked. Note that:
			
 
				-	 * - An event with unlinked set is the last event on the MD.
			
 
				-	 * - This field is also set for an explicit LNET_EVENT_UNLINK event.
			
 
				-	 * \see LNetMDUnlink
			
 
				-	 */
			
 
				-	int			unlinked;
			
 
				-	/**
			
 
				-	 * The displacement (in bytes) into the memory region that the
			
 
				-	 * operation used. The offset can be determined by the operation for
			
 
				-	 * a remote managed MD or by the local MD.
			
 
				-	 * \see lnet_md::options
			
 
				-	 */
			
 
				-	unsigned int		offset;
			
 
				-	/**
			
 
				-	 * The sequence number for this event. Sequence numbers are unique
			
 
				-	 * to each event.
			
 
				-	 */
			
 
				-	volatile unsigned long	sequence;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Event queue handler function type.
			
 
				- *
			
 
				- * The EQ handler runs for each event that is deposited into the EQ. The
			
 
				- * handler is supplied with a pointer to the event that triggered the
			
 
				- * handler invocation.
			
 
				- *
			
 
				- * The handler must not block, must be reentrant, and must not call any LNet
			
 
				- * API functions. It should return as quickly as possible.
			
 
				- */
			
 
				-typedef void (*lnet_eq_handler_t)(struct lnet_event *event);
			
 
				-#define LNET_EQ_HANDLER_NONE NULL
			
 
				-/** @} lnet_eq */
			
 
				-
			
 
				-/** \addtogroup lnet_data
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Specify whether an acknowledgment should be sent by target when the PUT
			
 
				- * operation completes (i.e., when the data has been written to a MD of the
			
 
				- * target process).
			
 
				- *
			
 
				- * \see lnet_md::options for the discussion on LNET_MD_ACK_DISABLE by which
			
 
				- * acknowledgments can be disabled for a MD.
			
 
				- */
			
 
				-enum lnet_ack_req {
			
 
				-	/** Request an acknowledgment */
			
 
				-	LNET_ACK_REQ,
			
 
				-	/** Request that no acknowledgment should be generated. */
			
 
				-	LNET_NOACK_REQ
			
 
				-};
			
 
				-/** @} lnet_data */
			
 
				-
			
 
				-/** @} lnet */
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
@@ -1,123 +0,0 @@
 
				-/*
			
 
				- *   This file is part of Portals, http://www.sf.net/projects/lustre/
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- * header for lnet ioctl
			
 
				- */
			
 
				-#ifndef _LNETCTL_H_
			
 
				-#define _LNETCTL_H_
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-
			
 
				-/** \addtogroup lnet_fault_simulation
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-enum {
			
 
				-	LNET_CTL_DROP_ADD,
			
 
				-	LNET_CTL_DROP_DEL,
			
 
				-	LNET_CTL_DROP_RESET,
			
 
				-	LNET_CTL_DROP_LIST,
			
 
				-	LNET_CTL_DELAY_ADD,
			
 
				-	LNET_CTL_DELAY_DEL,
			
 
				-	LNET_CTL_DELAY_RESET,
			
 
				-	LNET_CTL_DELAY_LIST,
			
 
				-};
			
 
				-
			
 
				-#define LNET_ACK_BIT		(1 << 0)
			
 
				-#define LNET_PUT_BIT		(1 << 1)
			
 
				-#define LNET_GET_BIT		(1 << 2)
			
 
				-#define LNET_REPLY_BIT		(1 << 3)
			
 
				-
			
 
				-/** ioctl parameter for LNet fault simulation */
			
 
				-struct lnet_fault_attr {
			
 
				-	/**
			
 
				-	 * source NID of drop rule
			
 
				-	 * LNET_NID_ANY is wildcard for all sources
			
 
				-	 * 255.255.255.255@net is wildcard for all addresses from @net
			
 
				-	 */
			
 
				-	lnet_nid_t			fa_src;
			
 
				-	/** destination NID of drop rule, see \a dr_src for details */
			
 
				-	lnet_nid_t			fa_dst;
			
 
				-	/**
			
 
				-	 * Portal mask to drop, -1 means all portals, for example:
			
 
				-	 * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) |
			
 
				-	 *		 (1 << LDLM_CANCEL_REQUEST_PORTAL)
			
 
				-	 *
			
 
				-	 * If it is non-zero then only PUT and GET will be filtered, otherwise
			
 
				-	 * there is no portal filter, all matched messages will be checked.
			
 
				-	 */
			
 
				-	__u64				fa_ptl_mask;
			
 
				-	/**
			
 
				-	 * message types to drop, for example:
			
 
				-	 * dra_type = LNET_DROP_ACK_BIT | LNET_DROP_PUT_BIT
			
 
				-	 *
			
 
				-	 * If it is non-zero then only specified message types are filtered,
			
 
				-	 * otherwise all message types will be checked.
			
 
				-	 */
			
 
				-	__u32				fa_msg_mask;
			
 
				-	union {
			
 
				-		/** message drop simulation */
			
 
				-		struct {
			
 
				-			/** drop rate of this rule */
			
 
				-			__u32			da_rate;
			
 
				-			/**
			
 
				-			 * time interval of message drop, it is exclusive
			
 
				-			 * with da_rate
			
 
				-			 */
			
 
				-			__u32			da_interval;
			
 
				-		} drop;
			
 
				-		/** message latency simulation */
			
 
				-		struct {
			
 
				-			__u32			la_rate;
			
 
				-			/**
			
 
				-			 * time interval of message delay, it is exclusive
			
 
				-			 * with la_rate
			
 
				-			 */
			
 
				-			__u32			la_interval;
			
 
				-			/** latency to delay */
			
 
				-			__u32			la_latency;
			
 
				-		} delay;
			
 
				-		__u64			space[8];
			
 
				-	} u;
			
 
				-};
			
 
				-
			
 
				-/** fault simluation stats */
			
 
				-struct lnet_fault_stat {
			
 
				-	/** total # matched messages */
			
 
				-	__u64				fs_count;
			
 
				-	/** # dropped LNET_MSG_PUT by this rule */
			
 
				-	__u64				fs_put;
			
 
				-	/** # dropped LNET_MSG_ACK by this rule */
			
 
				-	__u64				fs_ack;
			
 
				-	/** # dropped LNET_MSG_GET by this rule */
			
 
				-	__u64				fs_get;
			
 
				-	/** # dropped LNET_MSG_REPLY by this rule */
			
 
				-	__u64				fs_reply;
			
 
				-	union {
			
 
				-		struct {
			
 
				-			/** total # dropped messages */
			
 
				-			__u64			ds_dropped;
			
 
				-		} drop;
			
 
				-		struct {
			
 
				-			/** total # delayed messages */
			
 
				-			__u64			ls_delayed;
			
 
				-		} delay;
			
 
				-		__u64			space[8];
			
 
				-	} u;
			
 
				-};
			
 
				-
			
 
				-/** @} lnet_fault_simulation */
			
 
				-
			
 
				-#define LNET_DEV_ID 0
			
 
				-#define LNET_DEV_PATH "/dev/lnet"
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h
@@ -1,556 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011 - 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- *
			
 
				- * lnet/include/lnet/lnetst.h
			
 
				- *
			
 
				- * Author: Liang Zhen <liang.zhen@intel.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef __LNET_ST_H__
			
 
				-#define __LNET_ST_H__
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-#define LST_FEAT_NONE		(0)
			
 
				-#define LST_FEAT_BULK_LEN	(1 << 0)	/* enable variable page size */
			
 
				-
			
 
				-#define LST_FEATS_EMPTY		(LST_FEAT_NONE)
			
 
				-#define LST_FEATS_MASK		(LST_FEAT_NONE | LST_FEAT_BULK_LEN)
			
 
				-
			
 
				-#define LST_NAME_SIZE		32	/* max name buffer length */
			
 
				-
			
 
				-#define LSTIO_DEBUG		0xC00	/* debug */
			
 
				-#define LSTIO_SESSION_NEW	0xC01	/* create session */
			
 
				-#define LSTIO_SESSION_END	0xC02	/* end session */
			
 
				-#define LSTIO_SESSION_INFO	0xC03	/* query session */
			
 
				-#define LSTIO_GROUP_ADD		0xC10	/* add group */
			
 
				-#define LSTIO_GROUP_LIST	0xC11	/* list all groups in session */
			
 
				-#define LSTIO_GROUP_INFO	0xC12	/* query default information of
			
 
				-					 * specified group
			
 
				-					 */
			
 
				-#define LSTIO_GROUP_DEL		0xC13	/* delete group */
			
 
				-#define LSTIO_NODES_ADD		0xC14	/* add nodes to specified group */
			
 
				-#define LSTIO_GROUP_UPDATE      0xC15	/* update group */
			
 
				-#define LSTIO_BATCH_ADD		0xC20	/* add batch */
			
 
				-#define LSTIO_BATCH_START	0xC21	/* start batch */
			
 
				-#define LSTIO_BATCH_STOP	0xC22	/* stop batch */
			
 
				-#define LSTIO_BATCH_DEL		0xC23	/* delete batch */
			
 
				-#define LSTIO_BATCH_LIST	0xC24	/* show all batches in the session */
			
 
				-#define LSTIO_BATCH_INFO	0xC25	/* show defail of specified batch */
			
 
				-#define LSTIO_TEST_ADD		0xC26	/* add test (to batch) */
			
 
				-#define LSTIO_BATCH_QUERY	0xC27	/* query batch status */
			
 
				-#define LSTIO_STAT_QUERY	0xC30	/* get stats */
			
 
				-
			
 
				-struct lst_sid {
			
 
				-	lnet_nid_t	ses_nid;	/* nid of console node */
			
 
				-	__u64		ses_stamp;	/* time stamp */
			
 
				-};					/*** session id */
			
 
				-
			
 
				-extern struct lst_sid LST_INVALID_SID;
			
 
				-
			
 
				-struct lst_bid {
			
 
				-	__u64	bat_id;		/* unique id in session */
			
 
				-};				/*** batch id (group of tests) */
			
 
				-
			
 
				-/* Status of test node */
			
 
				-#define LST_NODE_ACTIVE		0x1	/* node in this session */
			
 
				-#define LST_NODE_BUSY		0x2	/* node is taken by other session */
			
 
				-#define LST_NODE_DOWN		0x4	/* node is down */
			
 
				-#define LST_NODE_UNKNOWN	0x8	/* node not in session */
			
 
				-
			
 
				-struct lstcon_node_ent {
			
 
				-	struct lnet_process_id	nde_id;		/* id of node */
			
 
				-	int			nde_state;	/* state of node */
			
 
				-};				/*** node entry, for list_group command */
			
 
				-
			
 
				-struct lstcon_ndlist_ent {
			
 
				-	int	nle_nnode;	/* # of nodes */
			
 
				-	int	nle_nactive;	/* # of active nodes */
			
 
				-	int	nle_nbusy;	/* # of busy nodes */
			
 
				-	int	nle_ndown;	/* # of down nodes */
			
 
				-	int	nle_nunknown;	/* # of unknown nodes */
			
 
				-};				/*** node_list entry, for list_batch command */
			
 
				-
			
 
				-struct lstcon_test_ent {
			
 
				-	int	tse_type;       /* test type */
			
 
				-	int	tse_loop;       /* loop count */
			
 
				-	int	tse_concur;     /* concurrency of test */
			
 
				-};				/* test summary entry, for
			
 
				-				 * list_batch command
			
 
				-				 */
			
 
				-
			
 
				-struct lstcon_batch_ent {
			
 
				-	int	bae_state;	/* batch status */
			
 
				-	int	bae_timeout;	/* batch timeout */
			
 
				-	int	bae_ntest;	/* # of tests in the batch */
			
 
				-};				/* batch summary entry, for
			
 
				-				 * list_batch command
			
 
				-				 */
			
 
				-
			
 
				-struct lstcon_test_batch_ent {
			
 
				-	struct lstcon_ndlist_ent   tbe_cli_nle;	/* client (group) node_list
			
 
				-						 * entry
			
 
				-						 */
			
 
				-	struct lstcon_ndlist_ent   tbe_srv_nle;	/* server (group) node_list
			
 
				-						 * entry
			
 
				-						 */
			
 
				-	union {
			
 
				-		struct lstcon_test_ent	tbe_test; /* test entry */
			
 
				-		struct lstcon_batch_ent tbe_batch;/* batch entry */
			
 
				-	} u;
			
 
				-};				/* test/batch verbose information entry,
			
 
				-				 * for list_batch command
			
 
				-				 */
			
 
				-
			
 
				-struct lstcon_rpc_ent {
			
 
				-	struct list_head	rpe_link;	/* link chain */
			
 
				-	struct lnet_process_id	rpe_peer;	/* peer's id */
			
 
				-	struct timeval		rpe_stamp;	/* time stamp of RPC */
			
 
				-	int			rpe_state;	/* peer's state */
			
 
				-	int			rpe_rpc_errno;	/* RPC errno */
			
 
				-
			
 
				-	struct lst_sid		rpe_sid;	/* peer's session id */
			
 
				-	int			rpe_fwk_errno;	/* framework errno */
			
 
				-	int			rpe_priv[4];	/* private data */
			
 
				-	char			rpe_payload[0];	/* private reply payload */
			
 
				-};
			
 
				-
			
 
				-struct lstcon_trans_stat {
			
 
				-	int	trs_rpc_stat[4];	/* RPCs stat (0: total 1: failed
			
 
				-					 * 2: finished
			
 
				-					 * 4: reserved
			
 
				-					 */
			
 
				-	int	trs_rpc_errno;		/* RPC errno */
			
 
				-	int	trs_fwk_stat[8];	/* framework stat */
			
 
				-	int	trs_fwk_errno;		/* errno of the first remote error */
			
 
				-	void	*trs_fwk_private;	/* private framework stat */
			
 
				-};
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_rpc_stat_total(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_rpc_stat_success(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_rpc_stat_failure(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_sesop_stat_success(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_sesop_stat_failure(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_sesqry_stat_active(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_sesqry_stat_busy(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_sesqry_stat_unknown(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_tsbop_stat_success(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_tsbop_stat_failure(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_tsbqry_stat_idle(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_tsbqry_stat_run(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_tsbqry_stat_failure(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_statqry_stat_success(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lstcon_statqry_stat_failure(struct lstcon_trans_stat *stat, int inc)
			
 
				-{
			
 
				-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				-}
			
 
				-
			
 
				-/* create a session */
			
 
				-struct lstio_session_new_args {
			
 
				-	int		 lstio_ses_key;		/* IN: local key */
			
 
				-	int		 lstio_ses_timeout;	/* IN: session timeout */
			
 
				-	int		 lstio_ses_force;	/* IN: force create ? */
			
 
				-	/** IN: session features */
			
 
				-	unsigned int	 lstio_ses_feats;
			
 
				-	struct lst_sid __user *lstio_ses_idp;	/* OUT: session id */
			
 
				-	int		 lstio_ses_nmlen;	/* IN: name length */
			
 
				-	char __user	 *lstio_ses_namep;	/* IN: session name */
			
 
				-};
			
 
				-
			
 
				-/* query current session */
			
 
				-struct lstio_session_info_args {
			
 
				-	struct lst_sid __user	*lstio_ses_idp;		/* OUT: session id */
			
 
				-	int __user		*lstio_ses_keyp;	/* OUT: local key */
			
 
				-	/** OUT: session features */
			
 
				-	unsigned int __user	*lstio_ses_featp;
			
 
				-	struct lstcon_ndlist_ent __user *lstio_ses_ndinfo;/* OUT: */
			
 
				-	int			 lstio_ses_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_ses_namep;	/* OUT: session name */
			
 
				-};
			
 
				-
			
 
				-/* delete a session */
			
 
				-struct lstio_session_end_args {
			
 
				-	int			lstio_ses_key;	/* IN: session key */
			
 
				-};
			
 
				-
			
 
				-#define LST_OPC_SESSION		1
			
 
				-#define LST_OPC_GROUP		2
			
 
				-#define LST_OPC_NODES		3
			
 
				-#define LST_OPC_BATCHCLI	4
			
 
				-#define LST_OPC_BATCHSRV	5
			
 
				-
			
 
				-struct lstio_debug_args {
			
 
				-	int			 lstio_dbg_key;		/* IN: session key */
			
 
				-	int			 lstio_dbg_type;	/* IN: debug
			
 
				-							 * session|batch|
			
 
				-							 * group|nodes list
			
 
				-							 */
			
 
				-	int			 lstio_dbg_flags;	/* IN: reserved debug
			
 
				-							 * flags
			
 
				-							 */
			
 
				-	int			 lstio_dbg_timeout;	/* IN: timeout of
			
 
				-							 * debug
			
 
				-							 */
			
 
				-	int			 lstio_dbg_nmlen;	/* IN: len of name */
			
 
				-	char __user		*lstio_dbg_namep;	/* IN: name of
			
 
				-							 * group|batch
			
 
				-							 */
			
 
				-	int			 lstio_dbg_count;	/* IN: # of test nodes
			
 
				-							 * to debug
			
 
				-							 */
			
 
				-	struct lnet_process_id __user *lstio_dbg_idsp;	/* IN: id of test
			
 
				-							 * nodes
			
 
				-							 */
			
 
				-	struct list_head __user	*lstio_dbg_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-struct lstio_group_add_args {
			
 
				-	int		 lstio_grp_key;		/* IN: session key */
			
 
				-	int		 lstio_grp_nmlen;	/* IN: name length */
			
 
				-	char __user	*lstio_grp_namep;	/* IN: group name */
			
 
				-};
			
 
				-
			
 
				-struct lstio_group_del_args {
			
 
				-	int		 lstio_grp_key;		/* IN: session key */
			
 
				-	int		 lstio_grp_nmlen;	/* IN: name length */
			
 
				-	char __user	*lstio_grp_namep;	/* IN: group name */
			
 
				-};
			
 
				-
			
 
				-#define LST_GROUP_CLEAN		1	/* remove inactive nodes in the group */
			
 
				-#define LST_GROUP_REFRESH	2	/* refresh inactive nodes
			
 
				-					 * in the group
			
 
				-					 */
			
 
				-#define LST_GROUP_RMND		3	/* delete nodes from the group */
			
 
				-
			
 
				-struct lstio_group_update_args {
			
 
				-	int			 lstio_grp_key;		/* IN: session key */
			
 
				-	int			 lstio_grp_opc;		/* IN: OPC */
			
 
				-	int			 lstio_grp_args;	/* IN: arguments */
			
 
				-	int			 lstio_grp_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_grp_namep;	/* IN: group name */
			
 
				-	int			 lstio_grp_count;	/* IN: # of nodes id */
			
 
				-	struct lnet_process_id __user *lstio_grp_idsp;	/* IN: array of nodes */
			
 
				-	struct list_head __user	*lstio_grp_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-struct lstio_group_nodes_args {
			
 
				-	int			 lstio_grp_key;		/* IN: session key */
			
 
				-	int			 lstio_grp_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_grp_namep;	/* IN: group name */
			
 
				-	int			 lstio_grp_count;	/* IN: # of nodes */
			
 
				-	/** OUT: session features */
			
 
				-	unsigned int __user	*lstio_grp_featp;
			
 
				-	struct lnet_process_id __user *lstio_grp_idsp;	/* IN: nodes */
			
 
				-	struct list_head __user	*lstio_grp_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-struct lstio_group_list_args {
			
 
				-	int	 lstio_grp_key;		/* IN: session key */
			
 
				-	int	 lstio_grp_idx;		/* IN: group idx */
			
 
				-	int	 lstio_grp_nmlen;	/* IN: name len */
			
 
				-	char __user *lstio_grp_namep;	/* OUT: name */
			
 
				-};
			
 
				-
			
 
				-struct lstio_group_info_args {
			
 
				-	int			 lstio_grp_key;		/* IN: session key */
			
 
				-	int			 lstio_grp_nmlen;	/* IN: name len */
			
 
				-	char __user		*lstio_grp_namep;	/* IN: name */
			
 
				-	struct lstcon_ndlist_ent __user *lstio_grp_entp;/* OUT: description
			
 
				-							 * of group
			
 
				-							 */
			
 
				-	int __user		*lstio_grp_idxp;	/* IN/OUT: node index */
			
 
				-	int __user		*lstio_grp_ndentp;	/* IN/OUT: # of nodent */
			
 
				-	struct lstcon_node_ent __user *lstio_grp_dentsp;/* OUT: nodent array */
			
 
				-};
			
 
				-
			
 
				-#define LST_DEFAULT_BATCH	"batch"			/* default batch name */
			
 
				-
			
 
				-struct lstio_batch_add_args {
			
 
				-	int	 lstio_bat_key;		/* IN: session key */
			
 
				-	int	 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user *lstio_bat_namep;	/* IN: batch name */
			
 
				-};
			
 
				-
			
 
				-struct lstio_batch_del_args {
			
 
				-	int	 lstio_bat_key;		/* IN: session key */
			
 
				-	int	 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user *lstio_bat_namep;	/* IN: batch name */
			
 
				-};
			
 
				-
			
 
				-struct lstio_batch_run_args {
			
 
				-	int			 lstio_bat_key;		/* IN: session key */
			
 
				-	int			 lstio_bat_timeout;	/* IN: timeout for
			
 
				-							 * the batch
			
 
				-							 */
			
 
				-	int			 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_bat_namep;	/* IN: batch name */
			
 
				-	struct list_head __user	*lstio_bat_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-struct lstio_batch_stop_args {
			
 
				-	int			 lstio_bat_key;		/* IN: session key */
			
 
				-	int			 lstio_bat_force;	/* IN: abort unfinished
			
 
				-							 * test RPC
			
 
				-							 */
			
 
				-	int			 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_bat_namep;	/* IN: batch name */
			
 
				-	struct list_head __user	*lstio_bat_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-struct lstio_batch_query_args {
			
 
				-	int			 lstio_bat_key;		/* IN: session key */
			
 
				-	int			 lstio_bat_testidx;	/* IN: test index */
			
 
				-	int			 lstio_bat_client;	/* IN: we testing
			
 
				-							 * client?
			
 
				-							 */
			
 
				-	int			 lstio_bat_timeout;	/* IN: timeout for
			
 
				-							 * waiting
			
 
				-							 */
			
 
				-	int			 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_bat_namep;	/* IN: batch name */
			
 
				-	struct list_head __user	*lstio_bat_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-struct lstio_batch_list_args {
			
 
				-	int	 lstio_bat_key;		/* IN: session key */
			
 
				-	int	 lstio_bat_idx;		/* IN: index */
			
 
				-	int	 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user *lstio_bat_namep;	/* IN: batch name */
			
 
				-};
			
 
				-
			
 
				-struct lstio_batch_info_args {
			
 
				-	int			 lstio_bat_key;		/* IN: session key */
			
 
				-	int			 lstio_bat_nmlen;	/* IN: name length */
			
 
				-	char __user		*lstio_bat_namep;	/* IN: name */
			
 
				-	int			 lstio_bat_server;	/* IN: query server
			
 
				-							 * or not
			
 
				-							 */
			
 
				-	int			 lstio_bat_testidx;	/* IN: test index */
			
 
				-	struct lstcon_test_batch_ent __user *lstio_bat_entp;/* OUT: batch ent */
			
 
				-
			
 
				-	int __user		*lstio_bat_idxp;	/* IN/OUT: index of node */
			
 
				-	int __user		*lstio_bat_ndentp;	/* IN/OUT: # of nodent */
			
 
				-	struct lstcon_node_ent __user *lstio_bat_dentsp;/* array of nodent */
			
 
				-};
			
 
				-
			
 
				-/* add stat in session */
			
 
				-struct lstio_stat_args {
			
 
				-	int			 lstio_sta_key;		/* IN: session key */
			
 
				-	int			 lstio_sta_timeout;	/* IN: timeout for
			
 
				-							 * stat request
			
 
				-							 */
			
 
				-	int			 lstio_sta_nmlen;	/* IN: group name
			
 
				-							 * length
			
 
				-							 */
			
 
				-	char __user		*lstio_sta_namep;	/* IN: group name */
			
 
				-	int			 lstio_sta_count;	/* IN: # of pid */
			
 
				-	struct lnet_process_id __user *lstio_sta_idsp;	/* IN: pid */
			
 
				-	struct list_head __user	*lstio_sta_resultp;	/* OUT: list head of
			
 
				-							 * result buffer
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-enum lst_test_type {
			
 
				-	LST_TEST_BULK	= 1,
			
 
				-	LST_TEST_PING	= 2
			
 
				-};
			
 
				-
			
 
				-/* create a test in a batch */
			
 
				-#define LST_MAX_CONCUR	1024	/* Max concurrency of test */
			
 
				-
			
 
				-struct lstio_test_args {
			
 
				-	int		  lstio_tes_key;	/* IN: session key */
			
 
				-	int		  lstio_tes_bat_nmlen;	/* IN: batch name len */
			
 
				-	char __user	 *lstio_tes_bat_name;	/* IN: batch name */
			
 
				-	int		  lstio_tes_type;	/* IN: test type */
			
 
				-	int		  lstio_tes_oneside;	/* IN: one sided test */
			
 
				-	int		  lstio_tes_loop;	/* IN: loop count */
			
 
				-	int		  lstio_tes_concur;	/* IN: concurrency */
			
 
				-
			
 
				-	int		  lstio_tes_dist;	/* IN: node distribution in
			
 
				-						 * destination groups
			
 
				-						 */
			
 
				-	int		  lstio_tes_span;	/* IN: node span in
			
 
				-						 * destination groups
			
 
				-						 */
			
 
				-	int		  lstio_tes_sgrp_nmlen;	/* IN: source group
			
 
				-						 * name length
			
 
				-						 */
			
 
				-	char __user	 *lstio_tes_sgrp_name;	/* IN: group name */
			
 
				-	int		  lstio_tes_dgrp_nmlen;	/* IN: destination group
			
 
				-						 * name length
			
 
				-						 */
			
 
				-	char __user	 *lstio_tes_dgrp_name;	/* IN: group name */
			
 
				-
			
 
				-	int		  lstio_tes_param_len;	/* IN: param buffer len */
			
 
				-	void __user	 *lstio_tes_param;	/* IN: parameter for specified
			
 
				-						 * test: lstio_bulk_param_t,
			
 
				-						 * lstio_ping_param_t,
			
 
				-						 * ... more
			
 
				-						 */
			
 
				-	int __user	 *lstio_tes_retp;	/* OUT: private returned
			
 
				-						 * value
			
 
				-						 */
			
 
				-	struct list_head __user *lstio_tes_resultp;/* OUT: list head of
			
 
				-						    * result buffer
			
 
				-						    */
			
 
				-};
			
 
				-
			
 
				-enum lst_brw_type {
			
 
				-	LST_BRW_READ	= 1,
			
 
				-	LST_BRW_WRITE	= 2
			
 
				-};
			
 
				-
			
 
				-enum lst_brw_flags {
			
 
				-	LST_BRW_CHECK_NONE	= 1,
			
 
				-	LST_BRW_CHECK_SIMPLE	= 2,
			
 
				-	LST_BRW_CHECK_FULL	= 3
			
 
				-};
			
 
				-
			
 
				-struct lst_test_bulk_param {
			
 
				-	int	blk_opc;	/* bulk operation code */
			
 
				-	int	blk_size;       /* size (bytes) */
			
 
				-	int	blk_time;       /* time of running the test*/
			
 
				-	int	blk_flags;      /* reserved flags */
			
 
				-	int	blk_cli_off;	/* bulk offset on client */
			
 
				-	int	blk_srv_off;	/* reserved: bulk offset on server */
			
 
				-};
			
 
				-
			
 
				-struct lst_test_ping_param {
			
 
				-	int	png_size;	/* size of ping message */
			
 
				-	int	png_time;	/* time */
			
 
				-	int	png_loop;	/* loop */
			
 
				-	int	png_flags;	/* reserved flags */
			
 
				-};
			
 
				-
			
 
				-struct srpc_counters {
			
 
				-	__u32 errors;
			
 
				-	__u32 rpcs_sent;
			
 
				-	__u32 rpcs_rcvd;
			
 
				-	__u32 rpcs_dropped;
			
 
				-	__u32 rpcs_expired;
			
 
				-	__u64 bulk_get;
			
 
				-	__u64 bulk_put;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct sfw_counters {
			
 
				-	/** milliseconds since current session started */
			
 
				-	__u32 running_ms;
			
 
				-	__u32 active_batches;
			
 
				-	__u32 zombie_sessions;
			
 
				-	__u32 brw_errors;
			
 
				-	__u32 ping_errors;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h
@@ -1,119 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-#ifndef _LNET_NIDSTRINGS_H
			
 
				-#define _LNET_NIDSTRINGS_H
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-
			
 
				-/**
			
 
				- *  Lustre Network Driver types.
			
 
				- */
			
 
				-enum {
			
 
				-	/*
			
 
				-	 * Only add to these values (i.e. don't ever change or redefine them):
			
 
				-	 * network addresses depend on them...
			
 
				-	 */
			
 
				-	QSWLND		= 1,
			
 
				-	SOCKLND		= 2,
			
 
				-	GMLND		= 3,
			
 
				-	PTLLND		= 4,
			
 
				-	O2IBLND		= 5,
			
 
				-	CIBLND		= 6,
			
 
				-	OPENIBLND	= 7,
			
 
				-	IIBLND		= 8,
			
 
				-	LOLND		= 9,
			
 
				-	RALND		= 10,
			
 
				-	VIBLND		= 11,
			
 
				-	MXLND		= 12,
			
 
				-	GNILND		= 13,
			
 
				-	GNIIPLND	= 14,
			
 
				-};
			
 
				-
			
 
				-struct list_head;
			
 
				-
			
 
				-#define LNET_NIDSTR_COUNT  1024    /* # of nidstrings */
			
 
				-#define LNET_NIDSTR_SIZE   32      /* size of each one (see below for usage) */
			
 
				-
			
 
				-/* support decl needed by both kernel and user space */
			
 
				-char *libcfs_next_nidstring(void);
			
 
				-int libcfs_isknown_lnd(__u32 lnd);
			
 
				-char *libcfs_lnd2modname(__u32 lnd);
			
 
				-char *libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size);
			
 
				-static inline char *libcfs_lnd2str(__u32 lnd)
			
 
				-{
			
 
				-	return libcfs_lnd2str_r(lnd, libcfs_next_nidstring(),
			
 
				-				LNET_NIDSTR_SIZE);
			
 
				-}
			
 
				-
			
 
				-int libcfs_str2lnd(const char *str);
			
 
				-char *libcfs_net2str_r(__u32 net, char *buf, size_t buf_size);
			
 
				-static inline char *libcfs_net2str(__u32 net)
			
 
				-{
			
 
				-	return libcfs_net2str_r(net, libcfs_next_nidstring(),
			
 
				-				LNET_NIDSTR_SIZE);
			
 
				-}
			
 
				-
			
 
				-char *libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size);
			
 
				-static inline char *libcfs_nid2str(lnet_nid_t nid)
			
 
				-{
			
 
				-	return libcfs_nid2str_r(nid, libcfs_next_nidstring(),
			
 
				-				LNET_NIDSTR_SIZE);
			
 
				-}
			
 
				-
			
 
				-__u32 libcfs_str2net(const char *str);
			
 
				-lnet_nid_t libcfs_str2nid(const char *str);
			
 
				-int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
			
 
				-char *libcfs_id2str(struct lnet_process_id id);
			
 
				-void cfs_free_nidlist(struct list_head *list);
			
 
				-int cfs_parse_nidlist(char *str, int len, struct list_head *list);
			
 
				-int cfs_print_nidlist(char *buffer, int count, struct list_head *list);
			
 
				-int cfs_match_nid(lnet_nid_t nid, struct list_head *list);
			
 
				-
			
 
				-int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
			
 
				-int cfs_ip_addr_match(__u32 addr, struct list_head *list);
			
 
				-bool cfs_nidrange_is_contiguous(struct list_head *nidlist);
			
 
				-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
			
 
				-			       char *max_nid, size_t nidstr_length);
			
 
				-
			
 
				-struct netstrfns {
			
 
				-	__u32	nf_type;
			
 
				-	char	*nf_name;
			
 
				-	char	*nf_modname;
			
 
				-	void	(*nf_addr2str)(__u32 addr, char *str, size_t size);
			
 
				-	int	(*nf_str2addr)(const char *str, int nob, __u32 *addr);
			
 
				-	int	(*nf_parse_addrlist)(char *str, int len,
			
 
				-				     struct list_head *list);
			
 
				-	int	(*nf_print_addrlist)(char *buffer, int count,
			
 
				-				     struct list_head *list);
			
 
				-	int	(*nf_match_addr)(__u32 addr, struct list_head *list);
			
 
				-	bool	(*nf_is_contiguous)(struct list_head *nidlist);
			
 
				-	void	(*nf_min_max)(struct list_head *nidlist, __u32 *min_nid,
			
 
				-			      __u32 *max_nid);
			
 
				-};
			
 
				-
			
 
				-#endif /* _LNET_NIDSTRINGS_H */
			
--- a/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h
@@ -1,44 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * #defines shared between socknal implementation and utilities
			
 
				- */
			
 
				-#ifndef __UAPI_LNET_SOCKLND_H__
			
 
				-#define __UAPI_LNET_SOCKLND_H__
			
 
				-
			
 
				-#define SOCKLND_CONN_NONE     (-1)
			
 
				-#define SOCKLND_CONN_ANY	0
			
 
				-#define SOCKLND_CONN_CONTROL	1
			
 
				-#define SOCKLND_CONN_BULK_IN	2
			
 
				-#define SOCKLND_CONN_BULK_OUT	3
			
 
				-#define SOCKLND_CONN_NTYPES	4
			
 
				-
			
 
				-#define SOCKLND_CONN_ACK	SOCKLND_CONN_BULK_IN
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h
@@ -1,261 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef _UAPI_LUSTRE_CFG_H_
			
 
				-#define _UAPI_LUSTRE_CFG_H_
			
 
				-
			
 
				-#include <linux/errno.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <uapi/linux/lustre/lustre_user.h>
			
 
				-
			
 
				-/** \defgroup cfg cfg
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * 1cf6
			
 
				- * lcfG
			
 
				- */
			
 
				-#define LUSTRE_CFG_VERSION 0x1cf60001
			
 
				-#define LUSTRE_CFG_MAX_BUFCOUNT 8
			
 
				-
			
 
				-#define LCFG_HDR_SIZE(count) \
			
 
				-	__ALIGN_KERNEL(offsetof(struct lustre_cfg, lcfg_buflens[(count)]), 8)
			
 
				-
			
 
				-/** If the LCFG_REQUIRED bit is set in a configuration command,
			
 
				- * then the client is required to understand this parameter
			
 
				- * in order to mount the filesystem. If it does not understand
			
 
				- * a REQUIRED command the client mount will fail.
			
 
				- */
			
 
				-#define LCFG_REQUIRED	0x0001000
			
 
				-
			
 
				-enum lcfg_command_type {
			
 
				-	LCFG_ATTACH		  = 0x00cf001, /**< create a new obd instance */
			
 
				-	LCFG_DETACH		  = 0x00cf002, /**< destroy obd instance */
			
 
				-	LCFG_SETUP		  = 0x00cf003, /**< call type-specific setup */
			
 
				-	LCFG_CLEANUP		  = 0x00cf004, /**< call type-specific cleanup
			
 
				-						 */
			
 
				-	LCFG_ADD_UUID		  = 0x00cf005, /**< add a nid to a niduuid */
			
 
				-	LCFG_DEL_UUID		  = 0x00cf006, /**< remove a nid from
			
 
				-						 *  a niduuid
			
 
				-						 */
			
 
				-	LCFG_MOUNTOPT		  = 0x00cf007, /**< create a profile
			
 
				-						 * (mdc, osc)
			
 
				-						 */
			
 
				-	LCFG_DEL_MOUNTOPT	  = 0x00cf008, /**< destroy a profile */
			
 
				-	LCFG_SET_TIMEOUT	  = 0x00cf009, /**< set obd_timeout */
			
 
				-	LCFG_SET_UPCALL		  = 0x00cf00a, /**< deprecated */
			
 
				-	LCFG_ADD_CONN		  = 0x00cf00b, /**< add a failover niduuid to
			
 
				-						 *  an obd
			
 
				-						 */
			
 
				-	LCFG_DEL_CONN		  = 0x00cf00c, /**< remove a failover niduuid */
			
 
				-	LCFG_LOV_ADD_OBD	  = 0x00cf00d, /**< add an osc to a lov */
			
 
				-	LCFG_LOV_DEL_OBD	  = 0x00cf00e, /**< remove an osc from a lov */
			
 
				-	LCFG_PARAM		  = 0x00cf00f, /**< set a proc parameter */
			
 
				-	LCFG_MARKER		  = 0x00cf010, /**< metadata about next
			
 
				-						 *  cfg rec
			
 
				-						 */
			
 
				-	LCFG_LOG_START		  = 0x00ce011, /**< mgc only, process a
			
 
				-						 *  cfg log
			
 
				-						 */
			
 
				-	LCFG_LOG_END		  = 0x00ce012, /**< stop processing updates */
			
 
				-	LCFG_LOV_ADD_INA	  = 0x00ce013, /**< like LOV_ADD_OBD,
			
 
				-						 *  inactive
			
 
				-						 */
			
 
				-	LCFG_ADD_MDC		  = 0x00cf014, /**< add an mdc to a lmv */
			
 
				-	LCFG_DEL_MDC		  = 0x00cf015, /**< remove an mdc from a lmv */
			
 
				-	LCFG_SPTLRPC_CONF	  = 0x00ce016, /**< security */
			
 
				-	LCFG_POOL_NEW		  = 0x00ce020, /**< create an ost pool name */
			
 
				-	LCFG_POOL_ADD		  = 0x00ce021, /**< add an ost to a pool */
			
 
				-	LCFG_POOL_REM		  = 0x00ce022, /**< remove an ost from a pool */
			
 
				-	LCFG_POOL_DEL		  = 0x00ce023, /**< destroy an ost pool name */
			
 
				-	LCFG_SET_LDLM_TIMEOUT	  = 0x00ce030, /**< set ldlm_timeout */
			
 
				-	LCFG_PRE_CLEANUP	  = 0x00cf031, /**< call type-specific pre
			
 
				-						 * cleanup cleanup
			
 
				-						 */
			
 
				-	LCFG_SET_PARAM		  = 0x00ce032, /**< use set_param syntax to set
			
 
				-						 * a proc parameters
			
 
				-						 */
			
 
				-};
			
 
				-
			
 
				-struct lustre_cfg_bufs {
			
 
				-	void  *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT];
			
 
				-	__u32 lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT];
			
 
				-	__u32 lcfg_bufcount;
			
 
				-};
			
 
				-
			
 
				-struct lustre_cfg {
			
 
				-	__u32 lcfg_version;
			
 
				-	__u32 lcfg_command;
			
 
				-
			
 
				-	__u32 lcfg_num;
			
 
				-	__u32 lcfg_flags;
			
 
				-	__u64 lcfg_nid;
			
 
				-	__u32 lcfg_nal;		/* not used any more */
			
 
				-
			
 
				-	__u32 lcfg_bufcount;
			
 
				-	__u32 lcfg_buflens[0];
			
 
				-};
			
 
				-
			
 
				-enum cfg_record_type {
			
 
				-	PORTALS_CFG_TYPE	= 1,
			
 
				-	LUSTRE_CFG_TYPE		= 123,
			
 
				-};
			
 
				-
			
 
				-#define LUSTRE_CFG_BUFLEN(lcfg, idx)					\
			
 
				-	((lcfg)->lcfg_bufcount <= (idx) ? 0 : (lcfg)->lcfg_buflens[(idx)])
			
 
				-
			
 
				-static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs,
			
 
				-				       __u32 index, void *buf, __u32 buflen)
			
 
				-{
			
 
				-	if (index >= LUSTRE_CFG_MAX_BUFCOUNT)
			
 
				-		return;
			
 
				-
			
 
				-	if (!bufs)
			
 
				-		return;
			
 
				-
			
 
				-	if (bufs->lcfg_bufcount <= index)
			
 
				-		bufs->lcfg_bufcount = index + 1;
			
 
				-
			
 
				-	bufs->lcfg_buf[index] = buf;
			
 
				-	bufs->lcfg_buflen[index] = buflen;
			
 
				-}
			
 
				-
			
 
				-static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs,
			
 
				-					      __u32 index, char *str)
			
 
				-{
			
 
				-	lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0);
			
 
				-}
			
 
				-
			
 
				-static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs,
			
 
				-					 char *name)
			
 
				-{
			
 
				-	memset((bufs), 0, sizeof(*bufs));
			
 
				-	if (name)
			
 
				-		lustre_cfg_bufs_set_string(bufs, 0, name);
			
 
				-}
			
 
				-
			
 
				-static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index)
			
 
				-{
			
 
				-	__u32 i;
			
 
				-	size_t offset;
			
 
				-	__u32 bufcount;
			
 
				-
			
 
				-	if (!lcfg)
			
 
				-		return NULL;
			
 
				-
			
 
				-	bufcount = lcfg->lcfg_bufcount;
			
 
				-	if (index >= bufcount)
			
 
				-		return NULL;
			
 
				-
			
 
				-	offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
			
 
				-	for (i = 0; i < index; i++)
			
 
				-		offset += __ALIGN_KERNEL(lcfg->lcfg_buflens[i], 8);
			
 
				-	return (char *)lcfg + offset;
			
 
				-}
			
 
				-
			
 
				-static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
			
 
				-					struct lustre_cfg *lcfg)
			
 
				-{
			
 
				-	__u32 i;
			
 
				-
			
 
				-	bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
			
 
				-	for (i = 0; i < bufs->lcfg_bufcount; i++) {
			
 
				-		bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i];
			
 
				-		bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens)
			
 
				-{
			
 
				-	__u32 i;
			
 
				-	__u32 len;
			
 
				-
			
 
				-	len = LCFG_HDR_SIZE(bufcount);
			
 
				-	for (i = 0; i < bufcount; i++)
			
 
				-		len += __ALIGN_KERNEL(buflens[i], 8);
			
 
				-
			
 
				-	return __ALIGN_KERNEL(len, 8);
			
 
				-}
			
 
				-
			
 
				-static inline void lustre_cfg_init(struct lustre_cfg *lcfg, int cmd,
			
 
				-				   struct lustre_cfg_bufs *bufs)
			
 
				-{
			
 
				-	char *ptr;
			
 
				-	__u32 i;
			
 
				-
			
 
				-	lcfg->lcfg_version = LUSTRE_CFG_VERSION;
			
 
				-	lcfg->lcfg_command = cmd;
			
 
				-	lcfg->lcfg_bufcount = bufs->lcfg_bufcount;
			
 
				-
			
 
				-	ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
			
 
				-	for (i = 0; i < lcfg->lcfg_bufcount; i++) {
			
 
				-		lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i];
			
 
				-		if (bufs->lcfg_buf[i]) {
			
 
				-			memcpy(ptr, bufs->lcfg_buf[i], bufs->lcfg_buflen[i]);
			
 
				-			ptr += __ALIGN_KERNEL(bufs->lcfg_buflen[i], 8);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline int lustre_cfg_sanity_check(void *buf, size_t len)
			
 
				-{
			
 
				-	struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
			
 
				-
			
 
				-	if (!lcfg)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* check that the first bits of the struct are valid */
			
 
				-	if (len < LCFG_HDR_SIZE(0))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (lcfg->lcfg_version != LUSTRE_CFG_VERSION)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* check that the buflens are valid */
			
 
				-	if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* make sure all the pointers point inside the data */
			
 
				-	if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/** @} cfg */
			
 
				-
			
 
				-#endif /* _UAPI_LUSTRE_CFG_H_ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h
@@ -1,293 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2014, Intel Corporation.
			
 
				- *
			
 
				- * Copyright 2016 Cray Inc, all rights reserved.
			
 
				- * Author: Ben Evans.
			
 
				- *
			
 
				- * all fid manipulation functions go here
			
 
				- *
			
 
				- * FIDS are globally unique within a Lustre filessytem, and are made up
			
 
				- * of three parts: sequence, Object ID, and version.
			
 
				- *
			
 
				- */
			
 
				-#ifndef _UAPI_LUSTRE_FID_H_
			
 
				-#define _UAPI_LUSTRE_FID_H_
			
 
				-
			
 
				-#include <uapi/linux/lustre/lustre_idl.h>
			
 
				-
			
 
				-/** returns fid object sequence */
			
 
				-static inline __u64 fid_seq(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid->f_seq;
			
 
				-}
			
 
				-
			
 
				-/** returns fid object id */
			
 
				-static inline __u32 fid_oid(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid->f_oid;
			
 
				-}
			
 
				-
			
 
				-/** returns fid object version */
			
 
				-static inline __u32 fid_ver(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid->f_ver;
			
 
				-}
			
 
				-
			
 
				-static inline void fid_zero(struct lu_fid *fid)
			
 
				-{
			
 
				-	memset(fid, 0, sizeof(*fid));
			
 
				-}
			
 
				-
			
 
				-static inline __u64 fid_ver_oid(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return (__u64)fid_ver(fid) << 32 | fid_oid(fid);
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_mdt0(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_OST_MDT0;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_mdt(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
			
 
				-};
			
 
				-
			
 
				-static inline bool fid_seq_is_echo(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_ECHO;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_echo(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq_is_echo(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_llog(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_LLOG;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_llog(const struct lu_fid *fid)
			
 
				-{
			
 
				-	/* file with OID == 0 is not llog but contains last oid */
			
 
				-	return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_rsvd(__u64 seq)
			
 
				-{
			
 
				-	return seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD;
			
 
				-};
			
 
				-
			
 
				-static inline bool fid_seq_is_special(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_SPECIAL;
			
 
				-};
			
 
				-
			
 
				-static inline bool fid_seq_is_local_file(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_LOCAL_FILE ||
			
 
				-	       seq == FID_SEQ_LOCAL_NAME;
			
 
				-};
			
 
				-
			
 
				-static inline bool fid_seq_is_root(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_ROOT;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_dot(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_DOT_LUSTRE;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_default(__u64 seq)
			
 
				-{
			
 
				-	return seq == FID_SEQ_LOV_DEFAULT;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_mdt0(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq_is_mdt0(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Check if a fid is igif or not.
			
 
				- * \param fid the fid to be tested.
			
 
				- * \return true if the fid is an igif; otherwise false.
			
 
				- */
			
 
				-static inline bool fid_seq_is_igif(__u64 seq)
			
 
				-{
			
 
				-	return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_igif(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq_is_igif(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Check if a fid is idif or not.
			
 
				- * \param fid the fid to be tested.
			
 
				- * \return true if the fid is an idif; otherwise false.
			
 
				- */
			
 
				-static inline bool fid_seq_is_idif(__u64 seq)
			
 
				-{
			
 
				-	return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_idif(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq_is_idif(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_local_file(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq_is_local_file(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_seq_is_norm(__u64 seq)
			
 
				-{
			
 
				-	return (seq >= FID_SEQ_NORMAL);
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_norm(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq_is_norm(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-/* convert an OST objid into an IDIF FID SEQ number */
			
 
				-static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx)
			
 
				-{
			
 
				-	return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff);
			
 
				-}
			
 
				-
			
 
				-/* convert a packed IDIF FID into an OST objid */
			
 
				-static inline __u64 fid_idif_id(__u64 seq, __u32 oid, __u32 ver)
			
 
				-{
			
 
				-	return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid;
			
 
				-}
			
 
				-
			
 
				-static inline __u32 idif_ost_idx(__u64 seq)
			
 
				-{
			
 
				-	return (seq >> 16) & 0xffff;
			
 
				-}
			
 
				-
			
 
				-/* extract ost index from IDIF FID */
			
 
				-static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return idif_ost_idx(fid_seq(fid));
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Get inode number from an igif.
			
 
				- * \param fid an igif to get inode number from.
			
 
				- * \return inode number for the igif.
			
 
				- */
			
 
				-static inline ino_t lu_igif_ino(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_seq(fid);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Get inode generation from an igif.
			
 
				- * \param fid an igif to get inode generation from.
			
 
				- * \return inode generation for the igif.
			
 
				- */
			
 
				-static inline __u32 lu_igif_gen(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid_oid(fid);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Build igif from the inode number/generation.
			
 
				- */
			
 
				-static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
			
 
				-{
			
 
				-	fid->f_seq = ino;
			
 
				-	fid->f_oid = gen;
			
 
				-	fid->f_ver = 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Fids are transmitted across network (in the sender byte-ordering),
			
 
				- * and stored on disk in big-endian order.
			
 
				- */
			
 
				-static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
			
 
				-{
			
 
				-	dst->f_seq = __cpu_to_le64(fid_seq(src));
			
 
				-	dst->f_oid = __cpu_to_le32(fid_oid(src));
			
 
				-	dst->f_ver = __cpu_to_le32(fid_ver(src));
			
 
				-}
			
 
				-
			
 
				-static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
			
 
				-{
			
 
				-	dst->f_seq = __le64_to_cpu(fid_seq(src));
			
 
				-	dst->f_oid = __le32_to_cpu(fid_oid(src));
			
 
				-	dst->f_ver = __le32_to_cpu(fid_ver(src));
			
 
				-}
			
 
				-
			
 
				-static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
			
 
				-{
			
 
				-	dst->f_seq = __cpu_to_be64(fid_seq(src));
			
 
				-	dst->f_oid = __cpu_to_be32(fid_oid(src));
			
 
				-	dst->f_ver = __cpu_to_be32(fid_ver(src));
			
 
				-}
			
 
				-
			
 
				-static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
			
 
				-{
			
 
				-	dst->f_seq = __be64_to_cpu(fid_seq(src));
			
 
				-	dst->f_oid = __be32_to_cpu(fid_oid(src));
			
 
				-	dst->f_ver = __be32_to_cpu(fid_ver(src));
			
 
				-}
			
 
				-
			
 
				-static inline bool fid_is_sane(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return fid && ((fid_seq(fid) >= FID_SEQ_START && !fid_ver(fid)) ||
			
 
				-			fid_is_igif(fid) || fid_is_idif(fid) ||
			
 
				-			fid_seq_is_rsvd(fid_seq(fid)));
			
 
				-}
			
 
				-
			
 
				-static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
			
 
				-{
			
 
				-	return !memcmp(f0, f1, sizeof(*f0));
			
 
				-}
			
 
				-
			
 
				-static inline int lu_fid_cmp(const struct lu_fid *f0,
			
 
				-			     const struct lu_fid *f1)
			
 
				-{
			
 
				-	if (fid_seq(f0) != fid_seq(f1))
			
 
				-		return fid_seq(f0) > fid_seq(f1) ? 1 : -1;
			
 
				-
			
 
				-	if (fid_oid(f0) != fid_oid(f1))
			
 
				-		return fid_oid(f0) > fid_oid(f1) ? 1 : -1;
			
 
				-
			
 
				-	if (fid_ver(f0) != fid_ver(f1))
			
 
				-		return fid_ver(f0) > fid_ver(f1) ? 1 : -1;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h
@@ -1,72 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2014, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * FIEMAP data structures and flags. This header file will be used until
			
 
				- * fiemap.h is available in the upstream kernel.
			
 
				- *
			
 
				- * Author: Kalpak Shah <kalpak.shah@sun.com>
			
 
				- * Author: Andreas Dilger <adilger@sun.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef _LUSTRE_FIEMAP_H
			
 
				-#define _LUSTRE_FIEMAP_H
			
 
				-
			
 
				-#include <stddef.h>
			
 
				-#include <linux/fiemap.h>
			
 
				-
			
 
				-/* XXX: We use fiemap_extent::fe_reserved[0] */
			
 
				-#define fe_device	fe_reserved[0]
			
 
				-
			
 
				-static inline size_t fiemap_count_to_size(size_t extent_count)
			
 
				-{
			
 
				-	return sizeof(struct fiemap) + extent_count *
			
 
				-				       sizeof(struct fiemap_extent);
			
 
				-}
			
 
				-
			
 
				-static inline unsigned int fiemap_size_to_count(size_t array_size)
			
 
				-{
			
 
				-	return (array_size - sizeof(struct fiemap)) /
			
 
				-		sizeof(struct fiemap_extent);
			
 
				-}
			
 
				-
			
 
				-#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
			
 
				-
			
 
				-#ifdef FIEMAP_FLAGS_COMPAT
			
 
				-#undef FIEMAP_FLAGS_COMPAT
			
 
				-#endif
			
 
				-
			
 
				-/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
			
 
				-#define FIEMAP_EXTENT_NO_DIRECT	 0x40000000 /* Data mapping undefined */
			
 
				-#define FIEMAP_EXTENT_NET	 0x80000000 /* Data stored remotely.
			
 
				-					     * Sets NO_DIRECT flag
			
 
				-					     */
			
 
				-
			
 
				-#endif /* _LUSTRE_FIEMAP_H */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
@@ -1,2690 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * Lustre wire protocol definitions.
			
 
				- */
			
 
				-
			
 
				-/** \defgroup lustreidl lustreidl
			
 
				- *
			
 
				- * Lustre wire protocol definitions.
			
 
				- *
			
 
				- * ALL structs passing over the wire should be declared here.  Structs
			
 
				- * that are used in interfaces with userspace should go in lustre_user.h.
			
 
				- *
			
 
				- * All structs being declared here should be built from simple fixed-size
			
 
				- * types (__u8, __u16, __u32, __u64) or be built from other types or
			
 
				- * structs also declared in this file.  Similarly, all flags and magic
			
 
				- * values in those structs should also be declared here.  This ensures
			
 
				- * that the Lustre wire protocol is not influenced by external dependencies.
			
 
				- *
			
 
				- * The only other acceptable items in this file are VERY SIMPLE accessor
			
 
				- * functions to avoid callers grubbing inside the structures. Nothing that
			
 
				- * depends on external functions or definitions should be in here.
			
 
				- *
			
 
				- * Structs must be properly aligned to put 64-bit values on an 8-byte
			
 
				- * boundary.  Any structs being added here must also be added to
			
 
				- * utils/wirecheck.c and "make newwiretest" run to regenerate the
			
 
				- * utils/wiretest.c sources.  This allows us to verify that wire structs
			
 
				- * have the proper alignment/size on all architectures.
			
 
				- *
			
 
				- * DO NOT CHANGE any of the structs, flags, values declared here and used
			
 
				- * in released Lustre versions.  Some structs may have padding fields that
			
 
				- * can be used.  Some structs might allow addition at the end (verify this
			
 
				- * in the code to ensure that new/old clients that see this larger struct
			
 
				- * do not fail, otherwise you need to implement protocol compatibility).
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-#ifndef _LUSTRE_IDL_H_
			
 
				-#define _LUSTRE_IDL_H_
			
 
				-
			
 
				-#include <asm/byteorder.h>
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnet-types.h>
			
 
				-/* Defn's shared with user-space. */
			
 
				-#include <uapi/linux/lustre/lustre_user.h>
			
 
				-#include <uapi/linux/lustre/lustre_ver.h>
			
 
				-
			
 
				-/*
			
 
				- *  GENERAL STUFF
			
 
				- */
			
 
				-/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
			
 
				- * FOO_REPLY_PORTAL   is for incoming replies on the FOO
			
 
				- * FOO_BULK_PORTAL    is for incoming bulk on the FOO
			
 
				- */
			
 
				-
			
 
				-/* Lustre service names are following the format
			
 
				- * service name + MDT + seq name
			
 
				- */
			
 
				-#define LUSTRE_MDT_MAXNAMELEN	80
			
 
				-
			
 
				-#define CONNMGR_REQUEST_PORTAL	  1
			
 
				-#define CONNMGR_REPLY_PORTAL	    2
			
 
				-/*#define OSC_REQUEST_PORTAL	    3 */
			
 
				-#define OSC_REPLY_PORTAL		4
			
 
				-/*#define OSC_BULK_PORTAL	       5 */
			
 
				-#define OST_IO_PORTAL		   6
			
 
				-#define OST_CREATE_PORTAL	       7
			
 
				-#define OST_BULK_PORTAL		 8
			
 
				-/*#define MDC_REQUEST_PORTAL	    9 */
			
 
				-#define MDC_REPLY_PORTAL	       10
			
 
				-/*#define MDC_BULK_PORTAL	      11 */
			
 
				-#define MDS_REQUEST_PORTAL	     12
			
 
				-/*#define MDS_REPLY_PORTAL	     13 */
			
 
				-#define MDS_BULK_PORTAL		14
			
 
				-#define LDLM_CB_REQUEST_PORTAL	 15
			
 
				-#define LDLM_CB_REPLY_PORTAL	   16
			
 
				-#define LDLM_CANCEL_REQUEST_PORTAL     17
			
 
				-#define LDLM_CANCEL_REPLY_PORTAL       18
			
 
				-/*#define PTLBD_REQUEST_PORTAL	   19 */
			
 
				-/*#define PTLBD_REPLY_PORTAL	     20 */
			
 
				-/*#define PTLBD_BULK_PORTAL	      21 */
			
 
				-#define MDS_SETATTR_PORTAL	     22
			
 
				-#define MDS_READPAGE_PORTAL	    23
			
 
				-#define OUT_PORTAL		    24
			
 
				-
			
 
				-#define MGC_REPLY_PORTAL	       25
			
 
				-#define MGS_REQUEST_PORTAL	     26
			
 
				-#define MGS_REPLY_PORTAL	       27
			
 
				-#define OST_REQUEST_PORTAL	     28
			
 
				-#define FLD_REQUEST_PORTAL	     29
			
 
				-#define SEQ_METADATA_PORTAL	    30
			
 
				-#define SEQ_DATA_PORTAL		31
			
 
				-#define SEQ_CONTROLLER_PORTAL	  32
			
 
				-#define MGS_BULK_PORTAL		33
			
 
				-
			
 
				-/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com,
			
 
				- *						n8851@cray.com
			
 
				- */
			
 
				-
			
 
				-/* packet types */
			
 
				-#define PTL_RPC_MSG_REQUEST 4711
			
 
				-#define PTL_RPC_MSG_ERR     4712
			
 
				-#define PTL_RPC_MSG_REPLY   4713
			
 
				-
			
 
				-/* DON'T use swabbed values of MAGIC as magic! */
			
 
				-#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3
			
 
				-#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B
			
 
				-
			
 
				-#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2
			
 
				-
			
 
				-#define PTLRPC_MSG_VERSION  0x00000003
			
 
				-#define LUSTRE_VERSION_MASK 0xffff0000
			
 
				-#define LUSTRE_OBD_VERSION  0x00010000
			
 
				-#define LUSTRE_MDS_VERSION  0x00020000
			
 
				-#define LUSTRE_OST_VERSION  0x00030000
			
 
				-#define LUSTRE_DLM_VERSION  0x00040000
			
 
				-#define LUSTRE_LOG_VERSION  0x00050000
			
 
				-#define LUSTRE_MGS_VERSION  0x00060000
			
 
				-
			
 
				-/**
			
 
				- * Describes a range of sequence, lsr_start is included but lsr_end is
			
 
				- * not in the range.
			
 
				- * Same structure is used in fld module where lsr_index field holds mdt id
			
 
				- * of the home mdt.
			
 
				- */
			
 
				-struct lu_seq_range {
			
 
				-	__u64 lsr_start;
			
 
				-	__u64 lsr_end;
			
 
				-	__u32 lsr_index;
			
 
				-	__u32 lsr_flags;
			
 
				-};
			
 
				-
			
 
				-struct lu_seq_range_array {
			
 
				-	__u32 lsra_count;
			
 
				-	__u32 lsra_padding;
			
 
				-	struct lu_seq_range lsra_lsr[0];
			
 
				-};
			
 
				-
			
 
				-#define LU_SEQ_RANGE_MDT	0x0
			
 
				-#define LU_SEQ_RANGE_OST	0x1
			
 
				-#define LU_SEQ_RANGE_ANY	0x3
			
 
				-
			
 
				-#define LU_SEQ_RANGE_MASK	0x3
			
 
				-
			
 
				-/** \defgroup lu_fid lu_fid
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat.
			
 
				- * Deprecated since HSM and SOM attributes are now stored in separate on-disk
			
 
				- * xattr.
			
 
				- */
			
 
				-enum lma_compat {
			
 
				-	LMAC_HSM	= 0x00000001,
			
 
				-/*	LMAC_SOM	= 0x00000002, obsolete since 2.8.0 */
			
 
				-	LMAC_NOT_IN_OI	= 0x00000004, /* the object does NOT need OI mapping */
			
 
				-	LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
			
 
				-				       * under /O/<seq>/d<x>.
			
 
				-				       */
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Masks for all features that should be supported by a Lustre version to
			
 
				- * access a specific file.
			
 
				- * This information is stored in lustre_mdt_attrs::lma_incompat.
			
 
				- */
			
 
				-enum lma_incompat {
			
 
				-	LMAI_RELEASED		= 0x00000001, /* file is released */
			
 
				-	LMAI_AGENT		= 0x00000002, /* agent inode */
			
 
				-	LMAI_REMOTE_PARENT	= 0x00000004, /* the parent of the object
			
 
				-					       * is on the remote MDT
			
 
				-					       */
			
 
				-};
			
 
				-
			
 
				-#define LMA_INCOMPAT_SUPP	(LMAI_AGENT | LMAI_REMOTE_PARENT)
			
 
				-
			
 
				-/**
			
 
				- * fid constants
			
 
				- */
			
 
				-enum {
			
 
				-	/** LASTID file has zero OID */
			
 
				-	LUSTRE_FID_LASTID_OID = 0UL,
			
 
				-	/** initial fid id value */
			
 
				-	LUSTRE_FID_INIT_OID  = 1UL
			
 
				-};
			
 
				-
			
 
				-/* copytool uses a 32b bitmask field to encode archive-Ids during register
			
 
				- * with MDT thru kuc.
			
 
				- * archive num = 0 => all
			
 
				- * archive num from 1 to 32
			
 
				- */
			
 
				-#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8)
			
 
				-
			
 
				-/**
			
 
				- * Note that reserved SEQ numbers below 12 will conflict with ldiskfs
			
 
				- * inodes in the IGIF namespace, so these reserved SEQ numbers can be
			
 
				- * used for other purposes and not risk collisions with existing inodes.
			
 
				- *
			
 
				- * Different FID Format
			
 
				- * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
			
 
				- */
			
 
				-enum fid_seq {
			
 
				-	FID_SEQ_OST_MDT0	= 0,
			
 
				-	FID_SEQ_LLOG		= 1, /* unnamed llogs */
			
 
				-	FID_SEQ_ECHO		= 2,
			
 
				-	FID_SEQ_OST_MDT1	= 3,
			
 
				-	FID_SEQ_OST_MAX		= 9, /* Max MDT count before OST_on_FID */
			
 
				-	FID_SEQ_LLOG_NAME	= 10, /* named llogs */
			
 
				-	FID_SEQ_RSVD		= 11,
			
 
				-	FID_SEQ_IGIF		= 12,
			
 
				-	FID_SEQ_IGIF_MAX	= 0x0ffffffffULL,
			
 
				-	FID_SEQ_IDIF		= 0x100000000ULL,
			
 
				-	FID_SEQ_IDIF_MAX	= 0x1ffffffffULL,
			
 
				-	/* Normal FID sequence starts from this value, i.e. 1<<33 */
			
 
				-	FID_SEQ_START		= 0x200000000ULL,
			
 
				-	/* sequence for local pre-defined FIDs listed in local_oid */
			
 
				-	FID_SEQ_LOCAL_FILE	= 0x200000001ULL,
			
 
				-	FID_SEQ_DOT_LUSTRE	= 0x200000002ULL,
			
 
				-	/* sequence is used for local named objects FIDs generated
			
 
				-	 * by local_object_storage library
			
 
				-	 */
			
 
				-	FID_SEQ_LOCAL_NAME	= 0x200000003ULL,
			
 
				-	/* Because current FLD will only cache the fid sequence, instead
			
 
				-	 * of oid on the client side, if the FID needs to be exposed to
			
 
				-	 * clients sides, it needs to make sure all of fids under one
			
 
				-	 * sequence will be located in one MDT.
			
 
				-	 */
			
 
				-	FID_SEQ_SPECIAL		= 0x200000004ULL,
			
 
				-	FID_SEQ_QUOTA		= 0x200000005ULL,
			
 
				-	FID_SEQ_QUOTA_GLB	= 0x200000006ULL,
			
 
				-	FID_SEQ_ROOT		= 0x200000007ULL,  /* Located on MDT0 */
			
 
				-	FID_SEQ_NORMAL		= 0x200000400ULL,
			
 
				-	FID_SEQ_LOV_DEFAULT	= 0xffffffffffffffffULL
			
 
				-};
			
 
				-
			
 
				-#define OBIF_OID_MAX_BITS	   32
			
 
				-#define OBIF_MAX_OID		(1ULL << OBIF_OID_MAX_BITS)
			
 
				-#define OBIF_OID_MASK	       ((1ULL << OBIF_OID_MAX_BITS) - 1)
			
 
				-#define IDIF_OID_MAX_BITS	   48
			
 
				-#define IDIF_MAX_OID		(1ULL << IDIF_OID_MAX_BITS)
			
 
				-#define IDIF_OID_MASK	       ((1ULL << IDIF_OID_MAX_BITS) - 1)
			
 
				-
			
 
				-/** OID for FID_SEQ_SPECIAL */
			
 
				-enum special_oid {
			
 
				-	/* Big Filesystem Lock to serialize rename operations */
			
 
				-	FID_OID_SPECIAL_BFL     = 1UL,
			
 
				-};
			
 
				-
			
 
				-/** OID for FID_SEQ_DOT_LUSTRE */
			
 
				-enum dot_lustre_oid {
			
 
				-	FID_OID_DOT_LUSTRE  = 1UL,
			
 
				-	FID_OID_DOT_LUSTRE_OBF = 2UL,
			
 
				-};
			
 
				-
			
 
				-/** OID for FID_SEQ_ROOT */
			
 
				-enum root_oid {
			
 
				-	FID_OID_ROOT		= 1UL,
			
 
				-	FID_OID_ECHO_ROOT	= 2UL,
			
 
				-};
			
 
				-
			
 
				-/** @} lu_fid */
			
 
				-
			
 
				-/** \defgroup lu_dir lu_dir
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Enumeration of possible directory entry attributes.
			
 
				- *
			
 
				- * Attributes follow directory entry header in the order they appear in this
			
 
				- * enumeration.
			
 
				- */
			
 
				-enum lu_dirent_attrs {
			
 
				-	LUDA_FID		= 0x0001,
			
 
				-	LUDA_TYPE		= 0x0002,
			
 
				-	LUDA_64BITHASH		= 0x0004,
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Layout of readdir pages, as transmitted on wire.
			
 
				- */
			
 
				-struct lu_dirent {
			
 
				-	/** valid if LUDA_FID is set. */
			
 
				-	struct lu_fid lde_fid;
			
 
				-	/** a unique entry identifier: a hash or an offset. */
			
 
				-	__u64	 lde_hash;
			
 
				-	/** total record length, including all attributes. */
			
 
				-	__u16	 lde_reclen;
			
 
				-	/** name length */
			
 
				-	__u16	 lde_namelen;
			
 
				-	/** optional variable size attributes following this entry.
			
 
				-	 *  taken from enum lu_dirent_attrs.
			
 
				-	 */
			
 
				-	__u32	 lde_attrs;
			
 
				-	/** name is followed by the attributes indicated in ->ldp_attrs, in
			
 
				-	 *  their natural order. After the last attribute, padding bytes are
			
 
				-	 *  added to make ->lde_reclen a multiple of 8.
			
 
				-	 */
			
 
				-	char	  lde_name[0];
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Definitions of optional directory entry attributes formats.
			
 
				- *
			
 
				- * Individual attributes do not have their length encoded in a generic way. It
			
 
				- * is assumed that consumer of an attribute knows its format. This means that
			
 
				- * it is impossible to skip over an unknown attribute, except by skipping over all
			
 
				- * remaining attributes (by using ->lde_reclen), which is not too
			
 
				- * constraining, because new server versions will append new attributes at
			
 
				- * the end of an entry.
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Fid directory attribute: a fid of an object referenced by the entry. This
			
 
				- * will be almost always requested by the client and supplied by the server.
			
 
				- *
			
 
				- * Aligned to 8 bytes.
			
 
				- */
			
 
				-/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
			
 
				-
			
 
				-/**
			
 
				- * File type.
			
 
				- *
			
 
				- * Aligned to 2 bytes.
			
 
				- */
			
 
				-struct luda_type {
			
 
				-	__u16 lt_type;
			
 
				-};
			
 
				-
			
 
				-#ifndef IFSHIFT
			
 
				-#define IFSHIFT                 12
			
 
				-#endif
			
 
				-
			
 
				-#ifndef IFTODT
			
 
				-#define IFTODT(type)		(((type) & S_IFMT) >> IFSHIFT)
			
 
				-#endif
			
 
				-#ifndef DTTOIF
			
 
				-#define DTTOIF(dirtype)		((dirtype) << IFSHIFT)
			
 
				-#endif
			
 
				-
			
 
				-struct lu_dirpage {
			
 
				-	__le64	    ldp_hash_start;
			
 
				-	__le64	    ldp_hash_end;
			
 
				-	__le32	    ldp_flags;
			
 
				-	__le32	    ldp_pad0;
			
 
				-	struct lu_dirent ldp_entries[0];
			
 
				-};
			
 
				-
			
 
				-enum lu_dirpage_flags {
			
 
				-	/**
			
 
				-	 * dirpage contains no entry.
			
 
				-	 */
			
 
				-	LDF_EMPTY   = 1 << 0,
			
 
				-	/**
			
 
				-	 * last entry's lde_hash equals ldp_hash_end.
			
 
				-	 */
			
 
				-	LDF_COLLIDE = 1 << 1
			
 
				-};
			
 
				-
			
 
				-static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
			
 
				-{
			
 
				-	if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
			
 
				-		return NULL;
			
 
				-	else
			
 
				-		return dp->ldp_entries;
			
 
				-}
			
 
				-
			
 
				-static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
			
 
				-{
			
 
				-	struct lu_dirent *next;
			
 
				-
			
 
				-	if (__le16_to_cpu(ent->lde_reclen) != 0)
			
 
				-		next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen);
			
 
				-	else
			
 
				-		next = NULL;
			
 
				-
			
 
				-	return next;
			
 
				-}
			
 
				-
			
 
				-static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
			
 
				-{
			
 
				-	size_t size;
			
 
				-
			
 
				-	if (attr & LUDA_TYPE) {
			
 
				-		const size_t align = sizeof(struct luda_type) - 1;
			
 
				-
			
 
				-		size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
			
 
				-		size += sizeof(struct luda_type);
			
 
				-	} else {
			
 
				-		size = sizeof(struct lu_dirent) + namelen;
			
 
				-	}
			
 
				-
			
 
				-	return (size + 7) & ~7;
			
 
				-}
			
 
				-
			
 
				-#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
			
 
				-
			
 
				-/**
			
 
				- * MDS_READPAGE page size
			
 
				- *
			
 
				- * This is the directory page size packed in MDS_READPAGE RPC.
			
 
				- * It's different than PAGE_SIZE because the client needs to
			
 
				- * access the struct lu_dirpage header packed at the beginning of
			
 
				- * the "page" and without this there isn't any way to know find the
			
 
				- * lu_dirpage header is if client and server PAGE_SIZE differ.
			
 
				- */
			
 
				-#define LU_PAGE_SHIFT 12
			
 
				-#define LU_PAGE_SIZE  (1UL << LU_PAGE_SHIFT)
			
 
				-#define LU_PAGE_MASK  (~(LU_PAGE_SIZE - 1))
			
 
				-
			
 
				-#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT))
			
 
				-
			
 
				-/** @} lu_dir */
			
 
				-
			
 
				-struct lustre_handle {
			
 
				-	__u64 cookie;
			
 
				-};
			
 
				-
			
 
				-#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
			
 
				-
			
 
				-static inline bool lustre_handle_is_used(const struct lustre_handle *lh)
			
 
				-{
			
 
				-	return lh->cookie != 0ull;
			
 
				-}
			
 
				-
			
 
				-static inline bool lustre_handle_equal(const struct lustre_handle *lh1,
			
 
				-				       const struct lustre_handle *lh2)
			
 
				-{
			
 
				-	return lh1->cookie == lh2->cookie;
			
 
				-}
			
 
				-
			
 
				-static inline void lustre_handle_copy(struct lustre_handle *tgt,
			
 
				-				      const struct lustre_handle *src)
			
 
				-{
			
 
				-	tgt->cookie = src->cookie;
			
 
				-}
			
 
				-
			
 
				-/* flags for lm_flags */
			
 
				-#define MSGHDR_AT_SUPPORT	       0x1
			
 
				-#define MSGHDR_CKSUM_INCOMPAT18	 0x2
			
 
				-
			
 
				-#define lustre_msg lustre_msg_v2
			
 
				-/* we depend on this structure to be 8-byte aligned */
			
 
				-/* this type is only endian-adjusted in lustre_unpack_msg() */
			
 
				-struct lustre_msg_v2 {
			
 
				-	__u32 lm_bufcount;
			
 
				-	__u32 lm_secflvr;
			
 
				-	__u32 lm_magic;
			
 
				-	__u32 lm_repsize;
			
 
				-	__u32 lm_cksum;
			
 
				-	__u32 lm_flags;
			
 
				-	__u32 lm_padding_2;
			
 
				-	__u32 lm_padding_3;
			
 
				-	__u32 lm_buflens[0];
			
 
				-};
			
 
				-
			
 
				-/* without gss, ptlrpc_body is put at the first buffer. */
			
 
				-#define PTLRPC_NUM_VERSIONS     4
			
 
				-
			
 
				-struct ptlrpc_body_v3 {
			
 
				-	struct lustre_handle pb_handle;
			
 
				-	__u32 pb_type;
			
 
				-	__u32 pb_version;
			
 
				-	__u32 pb_opc;
			
 
				-	__u32 pb_status;
			
 
				-	__u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
			
 
				-	__u16 pb_tag;      /* virtual slot idx for multiple modifying RPCs */
			
 
				-	__u16 pb_padding0;
			
 
				-	__u32 pb_padding1;
			
 
				-	__u64 pb_last_committed;
			
 
				-	__u64 pb_transno;
			
 
				-	__u32 pb_flags;
			
 
				-	__u32 pb_op_flags;
			
 
				-	__u32 pb_conn_cnt;
			
 
				-	__u32 pb_timeout;  /* for req, the deadline, for rep, the service est */
			
 
				-	__u32 pb_service_time; /* for rep, actual service time */
			
 
				-	__u32 pb_limit;
			
 
				-	__u64 pb_slv;
			
 
				-	/* VBR: pre-versions */
			
 
				-	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
			
 
				-	__u64 pb_mbits; /**< match bits for bulk request */
			
 
				-	/* padding for future needs */
			
 
				-	__u64 pb_padding64_0;
			
 
				-	__u64 pb_padding64_1;
			
 
				-	__u64 pb_padding64_2;
			
 
				-	char  pb_jobid[LUSTRE_JOBID_SIZE];
			
 
				-};
			
 
				-
			
 
				-#define ptlrpc_body     ptlrpc_body_v3
			
 
				-
			
 
				-struct ptlrpc_body_v2 {
			
 
				-	struct lustre_handle pb_handle;
			
 
				-	__u32 pb_type;
			
 
				-	__u32 pb_version;
			
 
				-	__u32 pb_opc;
			
 
				-	__u32 pb_status;
			
 
				-	__u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
			
 
				-	__u16 pb_tag;      /* virtual slot idx for multiple modifying RPCs */
			
 
				-	__u16 pb_padding0;
			
 
				-	__u32 pb_padding1;
			
 
				-	__u64 pb_last_committed;
			
 
				-	__u64 pb_transno;
			
 
				-	__u32 pb_flags;
			
 
				-	__u32 pb_op_flags;
			
 
				-	__u32 pb_conn_cnt;
			
 
				-	__u32 pb_timeout;  /* for req, the deadline, for rep, the service est */
			
 
				-	__u32 pb_service_time; /* for rep, actual service time, also used for
			
 
				-				* net_latency of req
			
 
				-				*/
			
 
				-	__u32 pb_limit;
			
 
				-	__u64 pb_slv;
			
 
				-	/* VBR: pre-versions */
			
 
				-	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
			
 
				-	__u64 pb_mbits; /**< unused in V2 */
			
 
				-	/* padding for future needs */
			
 
				-	__u64 pb_padding64_0;
			
 
				-	__u64 pb_padding64_1;
			
 
				-	__u64 pb_padding64_2;
			
 
				-};
			
 
				-
			
 
				-/* message body offset for lustre_msg_v2 */
			
 
				-/* ptlrpc body offset in all request/reply messages */
			
 
				-#define MSG_PTLRPC_BODY_OFF	     0
			
 
				-
			
 
				-/* normal request/reply message record offset */
			
 
				-#define REQ_REC_OFF		     1
			
 
				-#define REPLY_REC_OFF		   1
			
 
				-
			
 
				-/* ldlm request message body offset */
			
 
				-#define DLM_LOCKREQ_OFF		 1 /* lockreq offset */
			
 
				-#define DLM_REQ_REC_OFF		 2 /* normal dlm request record offset */
			
 
				-
			
 
				-/* ldlm intent lock message body offset */
			
 
				-#define DLM_INTENT_IT_OFF	       2 /* intent lock it offset */
			
 
				-#define DLM_INTENT_REC_OFF	      3 /* intent lock record offset */
			
 
				-
			
 
				-/* ldlm reply message body offset */
			
 
				-#define DLM_LOCKREPLY_OFF	       1 /* lockrep offset */
			
 
				-#define DLM_REPLY_REC_OFF	       2 /* reply record offset */
			
 
				-
			
 
				-/** only use in req->rq_{req,rep}_swab_mask */
			
 
				-#define MSG_PTLRPC_HEADER_OFF	   31
			
 
				-
			
 
				-/* Flags that are operation-specific go in the top 16 bits. */
			
 
				-#define MSG_OP_FLAG_MASK   0xffff0000
			
 
				-#define MSG_OP_FLAG_SHIFT  16
			
 
				-
			
 
				-/* Flags that apply to all requests are in the bottom 16 bits */
			
 
				-#define MSG_GEN_FLAG_MASK     0x0000ffff
			
 
				-#define MSG_LAST_REPLAY	   0x0001
			
 
				-#define MSG_RESENT		0x0002
			
 
				-#define MSG_REPLAY		0x0004
			
 
				-/* #define MSG_AT_SUPPORT	 0x0008
			
 
				- * This was used in early prototypes of adaptive timeouts, and while there
			
 
				- * shouldn't be any users of that code there also isn't a need for using this
			
 
				- * bits. Defer usage until at least 1.10 to avoid potential conflict.
			
 
				- */
			
 
				-#define MSG_DELAY_REPLAY	  0x0010
			
 
				-#define MSG_VERSION_REPLAY	0x0020
			
 
				-#define MSG_REQ_REPLAY_DONE       0x0040
			
 
				-#define MSG_LOCK_REPLAY_DONE      0x0080
			
 
				-
			
 
				-/*
			
 
				- * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
			
 
				- */
			
 
				-
			
 
				-#define MSG_CONNECT_RECOVERING  0x00000001
			
 
				-#define MSG_CONNECT_RECONNECT   0x00000002
			
 
				-#define MSG_CONNECT_REPLAYABLE  0x00000004
			
 
				-/*#define MSG_CONNECT_PEER	0x8 */
			
 
				-#define MSG_CONNECT_LIBCLIENT   0x00000010
			
 
				-#define MSG_CONNECT_INITIAL     0x00000020
			
 
				-#define MSG_CONNECT_ASYNC       0x00000040
			
 
				-#define MSG_CONNECT_NEXT_VER    0x00000080 /* use next version of lustre_msg */
			
 
				-#define MSG_CONNECT_TRANSNO     0x00000100 /* report transno */
			
 
				-
			
 
				-/* Connect flags */
			
 
				-#define OBD_CONNECT_RDONLY		  0x1ULL /*client has read-only access*/
			
 
				-#define OBD_CONNECT_INDEX		  0x2ULL /*connect specific LOV idx */
			
 
				-#define OBD_CONNECT_MDS			  0x4ULL /*connect from MDT to OST */
			
 
				-#define OBD_CONNECT_GRANT		  0x8ULL /*OSC gets grant at connect */
			
 
				-#define OBD_CONNECT_SRVLOCK		 0x10ULL /*server takes locks for cli */
			
 
				-#define OBD_CONNECT_VERSION		 0x20ULL /*Lustre versions in ocd */
			
 
				-#define OBD_CONNECT_REQPORTAL		 0x40ULL /*Separate non-IO req portal */
			
 
				-#define OBD_CONNECT_ACL			 0x80ULL /*access control lists */
			
 
				-#define OBD_CONNECT_XATTR		0x100ULL /*client use extended attr */
			
 
				-#define OBD_CONNECT_LARGE_ACL		0x200ULL /* more than 32 ACL entries */
			
 
				-#define OBD_CONNECT_TRUNCLOCK		0x400ULL /*locks on server for punch */
			
 
				-#define OBD_CONNECT_TRANSNO		0x800ULL /*replay sends init transno */
			
 
				-#define OBD_CONNECT_IBITS	       0x1000ULL /*support for inodebits locks*/
			
 
				-#define OBD_CONNECT_JOIN	       0x2000ULL /*files can be concatenated.
			
 
				-						  *We do not support JOIN FILE
			
 
				-						  *anymore, reserve this flags
			
 
				-						  *just for preventing such bit
			
 
				-						  *to be reused.
			
 
				-						  */
			
 
				-#define OBD_CONNECT_ATTRFID	       0x4000ULL /*Server can GetAttr By Fid*/
			
 
				-#define OBD_CONNECT_NODEVOH	       0x8000ULL /*No open hndl on specl nodes*/
			
 
				-#define OBD_CONNECT_RMT_CLIENT	      0x10000ULL /* Remote client, never used
			
 
				-						  * in production. Removed in
			
 
				-						  * 2.9. Keep this flag to
			
 
				-						  * avoid reuse.
			
 
				-						  */
			
 
				-#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /* Remote client by force,
			
 
				-						  * never used in production.
			
 
				-						  * Removed in 2.9. Keep this
			
 
				-						  * flag to avoid reuse
			
 
				-						  */
			
 
				-#define OBD_CONNECT_BRW_SIZE	      0x40000ULL /*Max bytes per rpc */
			
 
				-#define OBD_CONNECT_QUOTA64	      0x80000ULL /*Not used since 2.4 */
			
 
				-#define OBD_CONNECT_MDS_CAPA	     0x100000ULL /*MDS capability */
			
 
				-#define OBD_CONNECT_OSS_CAPA	     0x200000ULL /*OSS capability */
			
 
				-#define OBD_CONNECT_CANCELSET	     0x400000ULL /*Early batched cancels. */
			
 
				-#define OBD_CONNECT_SOM		     0x800000ULL /*Size on MDS */
			
 
				-#define OBD_CONNECT_AT		    0x1000000ULL /*client uses AT */
			
 
				-#define OBD_CONNECT_LRU_RESIZE      0x2000000ULL /*LRU resize feature. */
			
 
				-#define OBD_CONNECT_MDS_MDS	    0x4000000ULL /*MDS-MDS connection */
			
 
				-#define OBD_CONNECT_REAL	    0x8000000ULL /* obsolete since 2.8 */
			
 
				-#define OBD_CONNECT_CHANGE_QS      0x10000000ULL /*Not used since 2.4 */
			
 
				-#define OBD_CONNECT_CKSUM	   0x20000000ULL /*support several cksum algos*/
			
 
				-#define OBD_CONNECT_FID		   0x40000000ULL /*FID is supported by server */
			
 
				-#define OBD_CONNECT_VBR		   0x80000000ULL /*version based recovery */
			
 
				-#define OBD_CONNECT_LOV_V3	  0x100000000ULL /*client supports LOV v3 EA */
			
 
				-#define OBD_CONNECT_GRANT_SHRINK  0x200000000ULL /* support grant shrink */
			
 
				-#define OBD_CONNECT_SKIP_ORPHAN   0x400000000ULL /* don't reuse orphan objids */
			
 
				-#define OBD_CONNECT_MAX_EASIZE    0x800000000ULL /* preserved for large EA */
			
 
				-#define OBD_CONNECT_FULL20       0x1000000000ULL /* it is 2.0 client */
			
 
				-#define OBD_CONNECT_LAYOUTLOCK   0x2000000000ULL /* client uses layout lock */
			
 
				-#define OBD_CONNECT_64BITHASH    0x4000000000ULL /* client supports 64-bits
			
 
				-						  * directory hash
			
 
				-						  */
			
 
				-#define OBD_CONNECT_MAXBYTES     0x8000000000ULL /* max stripe size */
			
 
				-#define OBD_CONNECT_IMP_RECOV   0x10000000000ULL /* imp recovery support */
			
 
				-#define OBD_CONNECT_JOBSTATS    0x20000000000ULL /* jobid in ptlrpc_body */
			
 
				-#define OBD_CONNECT_UMASK       0x40000000000ULL /* create uses client umask */
			
 
				-#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
			
 
				-						  * RPC error properly
			
 
				-						  */
			
 
				-#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
			
 
				-						  * finer space reservation
			
 
				-						  */
			
 
				-#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8
			
 
				-						   * policy and 2.x server
			
 
				-						   */
			
 
				-#define OBD_CONNECT_LVB_TYPE	0x400000000000ULL /* variable type of LVB */
			
 
				-#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */
			
 
				-#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
			
 
				-#define OBD_CONNECT_SHORTIO     0x2000000000000ULL/* short io */
			
 
				-#define OBD_CONNECT_PINGLESS	0x4000000000000ULL/* pings not required */
			
 
				-#define OBD_CONNECT_FLOCK_DEAD	0x8000000000000ULL/* flock deadlock detection */
			
 
				-#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
			
 
				-#define OBD_CONNECT_OPEN_BY_FID	0x20000000000000ULL	/* open by fid won't pack
			
 
				-							 * name in request
			
 
				-							 */
			
 
				-#define OBD_CONNECT_LFSCK	0x40000000000000ULL/* support online LFSCK */
			
 
				-#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
			
 
				-#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
			
 
				-						       *  RPCs in parallel
			
 
				-						       */
			
 
				-#define OBD_CONNECT_DIR_STRIPE	 0x400000000000000ULL/* striped DNE dir */
			
 
				-#define OBD_CONNECT_SUBTREE	 0x800000000000000ULL /* fileset mount */
			
 
				-#define OBD_CONNECT_LOCK_AHEAD	 0x1000000000000000ULL /* lock ahead */
			
 
				-/** bulk matchbits is sent within ptlrpc_body */
			
 
				-#define OBD_CONNECT_BULK_MBITS	 0x2000000000000000ULL
			
 
				-#define OBD_CONNECT_OBDOPACK	 0x4000000000000000ULL /* compact OUT obdo */
			
 
				-#define OBD_CONNECT_FLAGS2	 0x8000000000000000ULL /* second flags word */
			
 
				-
			
 
				-/* XXX README XXX:
			
 
				- * Please DO NOT add flag values here before first ensuring that this same
			
 
				- * flag value is not in use on some other branch.  Please clear any such
			
 
				- * changes with senior engineers before starting to use a new flag.  Then,
			
 
				- * submit a small patch against EVERY branch that ONLY adds the new flag,
			
 
				- * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the
			
 
				- * flag to check_obd_connect_data(), and updates wiretests accordingly, so it
			
 
				- * can be approved and landed easily to reserve the flag for future use.
			
 
				- */
			
 
				-
			
 
				-/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
			
 
				- * connection.  It is a temporary bug fix for Imperative Recovery interop
			
 
				- * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for
			
 
				- * 2.2 clients/servers is no longer needed.  LU-1252/LU-1644.
			
 
				- */
			
 
				-#define OBD_CONNECT_MNE_SWAB		 OBD_CONNECT_MDS_MDS
			
 
				-
			
 
				-#define OCD_HAS_FLAG(ocd, flg)  \
			
 
				-	(!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
			
 
				-
			
 
				-/* Features required for this version of the client to work with server */
			
 
				-#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
			
 
				-				 OBD_CONNECT_FULL20)
			
 
				-
			
 
				-/* This structure is used for both request and reply.
			
 
				- *
			
 
				- * If we eventually have separate connect data for different types, which we
			
 
				- * almost certainly will, then perhaps we stick a union in here.
			
 
				- */
			
 
				-struct obd_connect_data {
			
 
				-	__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
			
 
				-	__u32 ocd_version;	 /* lustre release version number */
			
 
				-	__u32 ocd_grant;	 /* initial cache grant amount (bytes) */
			
 
				-	__u32 ocd_index;	 /* LOV index to connect to */
			
 
				-	__u32 ocd_brw_size;	 /* Maximum BRW size in bytes */
			
 
				-	__u64 ocd_ibits_known;   /* inode bits this client understands */
			
 
				-	__u8  ocd_blocksize;     /* log2 of the backend filesystem blocksize */
			
 
				-	__u8  ocd_inodespace;    /* log2 of the per-inode space consumption */
			
 
				-	__u16 ocd_grant_extent;  /* per-extent grant overhead, in 1K blocks */
			
 
				-	__u32 ocd_unused;	 /* also fix lustre_swab_connect */
			
 
				-	__u64 ocd_transno;       /* first transno from client to be replayed */
			
 
				-	__u32 ocd_group;	 /* MDS group on OST */
			
 
				-	__u32 ocd_cksum_types;   /* supported checksum algorithms */
			
 
				-	__u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
			
 
				-	__u32 ocd_instance;      /* instance # of this target */
			
 
				-	__u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
			
 
				-	/* Fields after ocd_maxbytes are only accessible by the receiver
			
 
				-	 * if the corresponding flag in ocd_connect_flags is set. Accessing
			
 
				-	 * any field after ocd_maxbytes on the receiver without a valid flag
			
 
				-	 * may result in out-of-bound memory access and kernel oops.
			
 
				-	 */
			
 
				-	__u16 ocd_maxmodrpcs;	/* Maximum modify RPCs in parallel */
			
 
				-	__u16 padding0;		/* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u32 padding1;		/* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 ocd_connect_flags2;
			
 
				-	__u64 padding3;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 padding4;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 padding5;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 padding6;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 padding7;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 padding8;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 padding9;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 paddingA;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 paddingB;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 paddingC;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 paddingD;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 paddingE;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-	__u64 paddingF;	  /* added 2.1.0. also fix lustre_swab_connect */
			
 
				-};
			
 
				-
			
 
				-/* XXX README XXX:
			
 
				- * Please DO NOT use any fields here before first ensuring that this same
			
 
				- * field is not in use on some other branch.  Please clear any such changes
			
 
				- * with senior engineers before starting to use a new field.  Then, submit
			
 
				- * a small patch against EVERY branch that ONLY adds the new field along with
			
 
				- * the matching OBD_CONNECT flag, so that can be approved and landed easily to
			
 
				- * reserve the flag for future use.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Supported checksum algorithms. Up to 32 checksum types are supported.
			
 
				- * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
			
 
				- * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
			
 
				- * algorithm and also the OBD_FL_CKSUM* flags.
			
 
				- */
			
 
				-enum cksum_type {
			
 
				-	OBD_CKSUM_CRC32  = 0x00000001,
			
 
				-	OBD_CKSUM_ADLER  = 0x00000002,
			
 
				-	OBD_CKSUM_CRC32C = 0x00000004,
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- *   OST requests: OBDO & OBD request records
			
 
				- */
			
 
				-
			
 
				-/* opcodes */
			
 
				-enum ost_cmd {
			
 
				-	OST_REPLY      =  0,       /* reply ? */
			
 
				-	OST_GETATTR    =  1,
			
 
				-	OST_SETATTR    =  2,
			
 
				-	OST_READ       =  3,
			
 
				-	OST_WRITE      =  4,
			
 
				-	OST_CREATE     =  5,
			
 
				-	OST_DESTROY    =  6,
			
 
				-	OST_GET_INFO   =  7,
			
 
				-	OST_CONNECT    =  8,
			
 
				-	OST_DISCONNECT =  9,
			
 
				-	OST_PUNCH      = 10,
			
 
				-	OST_OPEN       = 11,
			
 
				-	OST_CLOSE      = 12,
			
 
				-	OST_STATFS     = 13,
			
 
				-	OST_SYNC       = 16,
			
 
				-	OST_SET_INFO   = 17,
			
 
				-	OST_QUOTACHECK = 18, /* not used since 2.4 */
			
 
				-	OST_QUOTACTL   = 19,
			
 
				-	OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
			
 
				-	OST_LAST_OPC
			
 
				-};
			
 
				-#define OST_FIRST_OPC  OST_REPLY
			
 
				-
			
 
				-enum obdo_flags {
			
 
				-	OBD_FL_INLINEDATA   = 0x00000001,
			
 
				-	OBD_FL_OBDMDEXISTS  = 0x00000002,
			
 
				-	OBD_FL_DELORPHAN    = 0x00000004, /* if set in o_flags delete orphans */
			
 
				-	OBD_FL_NORPC	    = 0x00000008, /* set in o_flags do in OSC not OST */
			
 
				-	OBD_FL_IDONLY       = 0x00000010, /* set in o_flags only adjust obj id*/
			
 
				-	OBD_FL_RECREATE_OBJS = 0x00000020, /* recreate missing obj */
			
 
				-	OBD_FL_DEBUG_CHECK  = 0x00000040, /* echo client/server debug check */
			
 
				-	OBD_FL_NO_USRQUOTA  = 0x00000100, /* the object's owner is over quota */
			
 
				-	OBD_FL_NO_GRPQUOTA  = 0x00000200, /* the object's group is over quota */
			
 
				-	OBD_FL_CREATE_CROW  = 0x00000400, /* object should be create on write */
			
 
				-	OBD_FL_SRVLOCK      = 0x00000800, /* delegate DLM locking to server */
			
 
				-	OBD_FL_CKSUM_CRC32  = 0x00001000, /* CRC32 checksum type */
			
 
				-	OBD_FL_CKSUM_ADLER  = 0x00002000, /* ADLER checksum type */
			
 
				-	OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */
			
 
				-	OBD_FL_CKSUM_RSVD2  = 0x00008000, /* for future cksum types */
			
 
				-	OBD_FL_CKSUM_RSVD3  = 0x00010000, /* for future cksum types */
			
 
				-	OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
			
 
				-	OBD_FL_MMAP	    = 0x00040000, /* object is mmapped on the client.
			
 
				-					   * XXX: obsoleted - reserved for old
			
 
				-					   * clients prior than 2.2
			
 
				-					   */
			
 
				-	OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
			
 
				-	OBD_FL_NOSPC_BLK    = 0x00100000, /* no more block space on OST */
			
 
				-	OBD_FL_FLUSH	    = 0x00200000, /* flush pages on the OST */
			
 
				-	OBD_FL_SHORT_IO	    = 0x00400000, /* short io request */
			
 
				-
			
 
				-	/* Note that while these checksum values are currently separate bits,
			
 
				-	 * in 2.x we can actually allow all values from 1-31 if we wanted.
			
 
				-	 */
			
 
				-	OBD_FL_CKSUM_ALL    = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER |
			
 
				-			      OBD_FL_CKSUM_CRC32C,
			
 
				-
			
 
				-	/* mask for local-only flag, which won't be sent over network */
			
 
				-	OBD_FL_LOCAL_MASK   = 0xF0000000,
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * All LOV EA magics should have the same postfix, if some new version
			
 
				- * Lustre instroduces new LOV EA magic, then when down-grade to an old
			
 
				- * Lustre, even though the old version system does not recognizes such
			
 
				- * new magic, it still can distinguish the corrupted cases by checking
			
 
				- * the magic's postfix.
			
 
				- */
			
 
				-#define LOV_MAGIC_MAGIC 0x0BD0
			
 
				-#define LOV_MAGIC_MASK  0xFFFF
			
 
				-
			
 
				-#define LOV_MAGIC_V1		(0x0BD10000 | LOV_MAGIC_MAGIC)
			
 
				-#define LOV_MAGIC_JOIN_V1	(0x0BD20000 | LOV_MAGIC_MAGIC)
			
 
				-#define LOV_MAGIC_V3		(0x0BD30000 | LOV_MAGIC_MAGIC)
			
 
				-#define LOV_MAGIC_MIGRATE	(0x0BD40000 | LOV_MAGIC_MAGIC)
			
 
				-/* reserved for specifying OSTs */
			
 
				-#define LOV_MAGIC_SPECIFIC	(0x0BD50000 | LOV_MAGIC_MAGIC)
			
 
				-#define LOV_MAGIC		LOV_MAGIC_V1
			
 
				-
			
 
				-/*
			
 
				- * magic for fully defined striping
			
 
				- * the idea is that we should have different magics for striping "hints"
			
 
				- * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct
			
 
				- * lov_mds_md_v[13]). at the moment the magics are used in wire protocol,
			
 
				- * we can't just change it w/o long way preparation, but we still need a
			
 
				- * mechanism to allow LOD to differentiate hint versus ready striping.
			
 
				- * so, at the moment we do a trick: MDT knows what to expect from request
			
 
				- * depending on the case (replay uses ready striping, non-replay req uses
			
 
				- * hints), so MDT replaces magic with appropriate one and now LOD can
			
 
				- * easily understand what's inside -bzzz
			
 
				- */
			
 
				-#define LOV_MAGIC_V1_DEF  0x0CD10BD0
			
 
				-#define LOV_MAGIC_V3_DEF  0x0CD30BD0
			
 
				-
			
 
				-#define lov_pattern(pattern)		(pattern & ~LOV_PATTERN_F_MASK)
			
 
				-#define lov_pattern_flags(pattern)	(pattern & LOV_PATTERN_F_MASK)
			
 
				-
			
 
				-#define lov_ost_data lov_ost_data_v1
			
 
				-struct lov_ost_data_v1 {	  /* per-stripe data structure (little-endian)*/
			
 
				-	struct ost_id l_ost_oi;	  /* OST object ID */
			
 
				-	__u32 l_ost_gen;	  /* generation of this l_ost_idx */
			
 
				-	__u32 l_ost_idx;	  /* OST index in LOV (lov_tgt_desc->tgts) */
			
 
				-};
			
 
				-
			
 
				-#define lov_mds_md lov_mds_md_v1
			
 
				-struct lov_mds_md_v1 {	    /* LOV EA mds/wire data (little-endian) */
			
 
				-	__u32 lmm_magic;	  /* magic number = LOV_MAGIC_V1 */
			
 
				-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
			
 
				-	struct ost_id	lmm_oi;	  /* LOV object ID */
			
 
				-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
			
 
				-	/* lmm_stripe_count used to be __u32 */
			
 
				-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
			
 
				-	__u16 lmm_layout_gen;     /* layout generation number */
			
 
				-	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
			
 
				-};
			
 
				-
			
 
				-#define MAX_MD_SIZE							\
			
 
				-	(sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
			
 
				-#define MIN_MD_SIZE							\
			
 
				-	(sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
			
 
				-
			
 
				-#define XATTR_NAME_ACL_ACCESS   "system.posix_acl_access"
			
 
				-#define XATTR_NAME_ACL_DEFAULT  "system.posix_acl_default"
			
 
				-#define XATTR_USER_PREFIX       "user."
			
 
				-#define XATTR_TRUSTED_PREFIX    "trusted."
			
 
				-#define XATTR_SECURITY_PREFIX   "security."
			
 
				-#define XATTR_LUSTRE_PREFIX     "lustre."
			
 
				-
			
 
				-#define XATTR_NAME_LOV	  "trusted.lov"
			
 
				-#define XATTR_NAME_LMA	  "trusted.lma"
			
 
				-#define XATTR_NAME_LMV	  "trusted.lmv"
			
 
				-#define XATTR_NAME_DEFAULT_LMV	"trusted.dmv"
			
 
				-#define XATTR_NAME_LINK	 "trusted.link"
			
 
				-#define XATTR_NAME_FID	  "trusted.fid"
			
 
				-#define XATTR_NAME_VERSION      "trusted.version"
			
 
				-#define XATTR_NAME_SOM		"trusted.som"
			
 
				-#define XATTR_NAME_HSM		"trusted.hsm"
			
 
				-#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace"
			
 
				-
			
 
				-struct lov_mds_md_v3 {	    /* LOV EA mds/wire data (little-endian) */
			
 
				-	__u32 lmm_magic;	  /* magic number = LOV_MAGIC_V3 */
			
 
				-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
			
 
				-	struct ost_id	lmm_oi;	  /* LOV object ID */
			
 
				-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
			
 
				-	/* lmm_stripe_count used to be __u32 */
			
 
				-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
			
 
				-	__u16 lmm_layout_gen;     /* layout generation number */
			
 
				-	char  lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */
			
 
				-	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
			
 
				-};
			
 
				-
			
 
				-static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic)
			
 
				-{
			
 
				-	if (lmm_magic == LOV_MAGIC_V3)
			
 
				-		return sizeof(struct lov_mds_md_v3) +
			
 
				-				stripes * sizeof(struct lov_ost_data_v1);
			
 
				-	else
			
 
				-		return sizeof(struct lov_mds_md_v1) +
			
 
				-				stripes * sizeof(struct lov_ost_data_v1);
			
 
				-}
			
 
				-
			
 
				-static inline __u32
			
 
				-lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
			
 
				-{
			
 
				-	switch (lmm_magic) {
			
 
				-	case LOV_MAGIC_V1: {
			
 
				-		struct lov_mds_md_v1 lmm;
			
 
				-
			
 
				-		if (buf_size < sizeof(lmm))
			
 
				-			return 0;
			
 
				-
			
 
				-		return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]);
			
 
				-	}
			
 
				-	case LOV_MAGIC_V3: {
			
 
				-		struct lov_mds_md_v3 lmm;
			
 
				-
			
 
				-		if (buf_size < sizeof(lmm))
			
 
				-			return 0;
			
 
				-
			
 
				-		return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]);
			
 
				-	}
			
 
				-	default:
			
 
				-		return 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#define OBD_MD_FLID	   (0x00000001ULL) /* object ID */
			
 
				-#define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
			
 
				-#define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
			
 
				-#define OBD_MD_FLCTIME     (0x00000008ULL) /* change time */
			
 
				-#define OBD_MD_FLSIZE      (0x00000010ULL) /* size */
			
 
				-#define OBD_MD_FLBLOCKS    (0x00000020ULL) /* allocated blocks count */
			
 
				-#define OBD_MD_FLBLKSZ     (0x00000040ULL) /* block size */
			
 
				-#define OBD_MD_FLMODE      (0x00000080ULL) /* access bits (mode & ~S_IFMT) */
			
 
				-#define OBD_MD_FLTYPE      (0x00000100ULL) /* object type (mode & S_IFMT) */
			
 
				-#define OBD_MD_FLUID       (0x00000200ULL) /* user ID */
			
 
				-#define OBD_MD_FLGID       (0x00000400ULL) /* group ID */
			
 
				-#define OBD_MD_FLFLAGS     (0x00000800ULL) /* flags word */
			
 
				-#define OBD_MD_FLNLINK     (0x00002000ULL) /* link count */
			
 
				-#define OBD_MD_FLGENER     (0x00004000ULL) /* generation number */
			
 
				-/*#define OBD_MD_FLINLINE    (0x00008000ULL)  inline data. used until 1.6.5 */
			
 
				-#define OBD_MD_FLRDEV      (0x00010000ULL) /* device number */
			
 
				-#define OBD_MD_FLEASIZE    (0x00020000ULL) /* extended attribute data */
			
 
				-#define OBD_MD_LINKNAME    (0x00040000ULL) /* symbolic link target */
			
 
				-#define OBD_MD_FLHANDLE    (0x00080000ULL) /* file/lock handle */
			
 
				-#define OBD_MD_FLCKSUM     (0x00100000ULL) /* bulk data checksum */
			
 
				-#define OBD_MD_FLQOS       (0x00200000ULL) /* quality of service stats */
			
 
				-/*#define OBD_MD_FLOSCOPQ    (0x00400000ULL) osc opaque data, never used */
			
 
				-/*	OBD_MD_FLCOOKIE    (0x00800000ULL) obsolete in 2.8 */
			
 
				-#define OBD_MD_FLGROUP     (0x01000000ULL) /* group */
			
 
				-#define OBD_MD_FLFID       (0x02000000ULL) /* ->ost write inline fid */
			
 
				-#define OBD_MD_FLEPOCH     (0x04000000ULL) /* ->ost write with ioepoch */
			
 
				-					   /* ->mds if epoch opens or closes
			
 
				-					    */
			
 
				-#define OBD_MD_FLGRANT     (0x08000000ULL) /* ost preallocation space grant */
			
 
				-#define OBD_MD_FLDIREA     (0x10000000ULL) /* dir's extended attribute data */
			
 
				-#define OBD_MD_FLUSRQUOTA  (0x20000000ULL) /* over quota flags sent from ost */
			
 
				-#define OBD_MD_FLGRPQUOTA  (0x40000000ULL) /* over quota flags sent from ost */
			
 
				-#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */
			
 
				-
			
 
				-#define OBD_MD_MDS	   (0x0000000100000000ULL) /* where an inode lives on */
			
 
				-#define OBD_MD_REINT       (0x0000000200000000ULL) /* reintegrate oa */
			
 
				-#define OBD_MD_MEA	   (0x0000000400000000ULL) /* CMD split EA  */
			
 
				-#define OBD_MD_TSTATE      (0x0000000800000000ULL) /* transient state field */
			
 
				-
			
 
				-#define OBD_MD_FLXATTR       (0x0000001000000000ULL) /* xattr */
			
 
				-#define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
			
 
				-#define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
			
 
				-#define OBD_MD_FLACL	     (0x0000008000000000ULL) /* ACL */
			
 
				-/*	OBD_MD_FLRMTPERM     (0x0000010000000000ULL) remote perm, obsolete */
			
 
				-#define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
			
 
				-#define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
			
 
				-#define OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) /* Check split on server */
			
 
				-#define OBD_MD_FLCROSSREF    (0x0000100000000000ULL) /* Cross-ref case */
			
 
				-#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes
			
 
				-						      * under lock; for xattr
			
 
				-						      * requests means the
			
 
				-						      * client holds the lock
			
 
				-						      */
			
 
				-#define OBD_MD_FLOBJCOUNT    (0x0000400000000000ULL) /* for multiple destroy */
			
 
				-
			
 
				-/*	OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
			
 
				-/*	OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
			
 
				-/*	OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
			
 
				-/*	OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
			
 
				-
			
 
				-#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
			
 
				-#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
			
 
				-							    * executed
			
 
				-							    */
			
 
				-
			
 
				-#define OBD_MD_DEFAULT_MEA   (0x0040000000000000ULL) /* default MEA */
			
 
				-
			
 
				-#define OBD_MD_FLGETATTR (OBD_MD_FLID    | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
			
 
				-			  OBD_MD_FLCTIME | OBD_MD_FLSIZE  | OBD_MD_FLBLKSZ | \
			
 
				-			  OBD_MD_FLMODE  | OBD_MD_FLTYPE  | OBD_MD_FLUID   | \
			
 
				-			  OBD_MD_FLGID   | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \
			
 
				-			  OBD_MD_FLGENER | OBD_MD_FLRDEV  | OBD_MD_FLGROUP)
			
 
				-
			
 
				-#define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS)
			
 
				-
			
 
				-/* don't forget obdo_fid which is way down at the bottom so it can
			
 
				- * come after the definition of llog_cookie
			
 
				- */
			
 
				-
			
 
				-enum hss_valid {
			
 
				-	HSS_SETMASK	= 0x01,
			
 
				-	HSS_CLEARMASK	= 0x02,
			
 
				-	HSS_ARCHIVE_ID	= 0x04,
			
 
				-};
			
 
				-
			
 
				-struct hsm_state_set {
			
 
				-	__u32	hss_valid;
			
 
				-	__u32	hss_archive_id;
			
 
				-	__u64	hss_setmask;
			
 
				-	__u64	hss_clearmask;
			
 
				-};
			
 
				-
			
 
				-/* ost_body.data values for OST_BRW */
			
 
				-
			
 
				-#define OBD_BRW_READ		0x01
			
 
				-#define OBD_BRW_WRITE		0x02
			
 
				-#define OBD_BRW_RWMASK		(OBD_BRW_READ | OBD_BRW_WRITE)
			
 
				-#define OBD_BRW_SYNC		0x08 /* this page is a part of synchronous
			
 
				-				      * transfer and is not accounted in
			
 
				-				      * the grant.
			
 
				-				      */
			
 
				-#define OBD_BRW_CHECK		0x10
			
 
				-#define OBD_BRW_FROM_GRANT      0x20 /* the osc manages this under llite */
			
 
				-#define OBD_BRW_GRANTED		0x40 /* the ost manages this */
			
 
				-#define OBD_BRW_NOCACHE		0x80 /* this page is a part of non-cached IO */
			
 
				-#define OBD_BRW_NOQUOTA	       0x100
			
 
				-#define OBD_BRW_SRVLOCK	       0x200 /* Client holds no lock over this page */
			
 
				-#define OBD_BRW_ASYNC	       0x400 /* Server may delay commit to disk */
			
 
				-#define OBD_BRW_MEMALLOC       0x800 /* Client runs in the "kswapd" context */
			
 
				-#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
			
 
				-#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
			
 
				-#define OBD_BRW_SOFT_SYNC     0x4000 /* This flag notifies the server
			
 
				-				      * that the client is running low on
			
 
				-				      * space for unstable pages; asking
			
 
				-				      * it to sync quickly
			
 
				-				      */
			
 
				-
			
 
				-#define OBD_OBJECT_EOF	LUSTRE_EOF
			
 
				-
			
 
				-#define OST_MIN_PRECREATE 32
			
 
				-#define OST_MAX_PRECREATE 20000
			
 
				-
			
 
				-struct obd_ioobj {
			
 
				-	struct ost_id	ioo_oid;	/* object ID, if multi-obj BRW */
			
 
				-	__u32		ioo_max_brw;	/* low 16 bits were o_mode before 2.4,
			
 
				-					 * now (PTLRPC_BULK_OPS_COUNT - 1) in
			
 
				-					 * high 16 bits in 2.4 and later
			
 
				-					 */
			
 
				-	__u32		ioo_bufcnt;	/* number of niobufs for this object */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in
			
 
				- * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS.
			
 
				- * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits.
			
 
				- */
			
 
				-#define IOOBJ_MAX_BRW_BITS	16
			
 
				-#define ioobj_max_brw_get(ioo)	(((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
			
 
				-#define ioobj_max_brw_set(ioo, num)					\
			
 
				-do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
			
 
				-
			
 
				-/* multiple of 8 bytes => can array */
			
 
				-struct niobuf_remote {
			
 
				-	__u64	rnb_offset;
			
 
				-	__u32	rnb_len;
			
 
				-	__u32	rnb_flags;
			
 
				-};
			
 
				-
			
 
				-/* lock value block communicated between the filter and llite */
			
 
				-
			
 
				-/* OST_LVB_ERR_INIT is needed because the return code in rc is
			
 
				- * negative, i.e. because ((MASK + rc) & MASK) != MASK.
			
 
				- */
			
 
				-#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL
			
 
				-#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL
			
 
				-#define OST_LVB_IS_ERR(blocks)					  \
			
 
				-	((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK)
			
 
				-#define OST_LVB_SET_ERR(blocks, rc)				     \
			
 
				-	do { blocks = OST_LVB_ERR_INIT + rc; } while (0)
			
 
				-#define OST_LVB_GET_ERR(blocks)    (int)(blocks - OST_LVB_ERR_INIT)
			
 
				-
			
 
				-struct ost_lvb_v1 {
			
 
				-	__u64		lvb_size;
			
 
				-	__s64		lvb_mtime;
			
 
				-	__s64		lvb_atime;
			
 
				-	__s64		lvb_ctime;
			
 
				-	__u64		lvb_blocks;
			
 
				-};
			
 
				-
			
 
				-struct ost_lvb {
			
 
				-	__u64		lvb_size;
			
 
				-	__s64		lvb_mtime;
			
 
				-	__s64		lvb_atime;
			
 
				-	__s64		lvb_ctime;
			
 
				-	__u64		lvb_blocks;
			
 
				-	__u32		lvb_mtime_ns;
			
 
				-	__u32		lvb_atime_ns;
			
 
				-	__u32		lvb_ctime_ns;
			
 
				-	__u32		lvb_padding;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- *   lquota data structures
			
 
				- */
			
 
				-
			
 
				-/* The lquota_id structure is a union of all the possible identifier types that
			
 
				- * can be used with quota, this includes:
			
 
				- * - 64-bit user ID
			
 
				- * - 64-bit group ID
			
 
				- * - a FID which can be used for per-directory quota in the future
			
 
				- */
			
 
				-union lquota_id {
			
 
				-	struct lu_fid	qid_fid; /* FID for per-directory quota */
			
 
				-	__u64		qid_uid; /* user identifier */
			
 
				-	__u64		qid_gid; /* group identifier */
			
 
				-};
			
 
				-
			
 
				-/* quotactl management */
			
 
				-struct obd_quotactl {
			
 
				-	__u32			qc_cmd;
			
 
				-	__u32			qc_type; /* see Q_* flag below */
			
 
				-	__u32			qc_id;
			
 
				-	__u32			qc_stat;
			
 
				-	struct obd_dqinfo	qc_dqinfo;
			
 
				-	struct obd_dqblk	qc_dqblk;
			
 
				-};
			
 
				-
			
 
				-#define Q_COPY(out, in, member) (out)->member = (in)->member
			
 
				-
			
 
				-#define QCTL_COPY(out, in)		\
			
 
				-do {					\
			
 
				-	Q_COPY(out, in, qc_cmd);	\
			
 
				-	Q_COPY(out, in, qc_type);	\
			
 
				-	Q_COPY(out, in, qc_id);		\
			
 
				-	Q_COPY(out, in, qc_stat);	\
			
 
				-	Q_COPY(out, in, qc_dqinfo);	\
			
 
				-	Q_COPY(out, in, qc_dqblk);	\
			
 
				-} while (0)
			
 
				-
			
 
				-/* Data structures associated with the quota locks */
			
 
				-
			
 
				-/* Glimpse descriptor used for the index & per-ID quota locks */
			
 
				-struct ldlm_gl_lquota_desc {
			
 
				-	union lquota_id	gl_id;    /* quota ID subject to the glimpse */
			
 
				-	__u64		gl_flags; /* see LQUOTA_FL* below */
			
 
				-	__u64		gl_ver;   /* new index version */
			
 
				-	__u64		gl_hardlimit; /* new hardlimit or qunit value */
			
 
				-	__u64		gl_softlimit; /* new softlimit */
			
 
				-	__u64		gl_time;
			
 
				-	__u64		gl_pad2;
			
 
				-};
			
 
				-
			
 
				-/* quota glimpse flags */
			
 
				-#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */
			
 
				-
			
 
				-/* LVB used with quota (global and per-ID) locks */
			
 
				-struct lquota_lvb {
			
 
				-	__u64	lvb_flags;	/* see LQUOTA_FL* above */
			
 
				-	__u64	lvb_id_may_rel; /* space that might be released later */
			
 
				-	__u64	lvb_id_rel;     /* space released by the slave for this ID */
			
 
				-	__u64	lvb_id_qunit;   /* current qunit value */
			
 
				-	__u64	lvb_pad1;
			
 
				-};
			
 
				-
			
 
				-/* op codes */
			
 
				-enum quota_cmd {
			
 
				-	QUOTA_DQACQ	= 601,
			
 
				-	QUOTA_DQREL	= 602,
			
 
				-	QUOTA_LAST_OPC
			
 
				-};
			
 
				-#define QUOTA_FIRST_OPC	QUOTA_DQACQ
			
 
				-
			
 
				-/*
			
 
				- *   MDS REQ RECORDS
			
 
				- */
			
 
				-
			
 
				-/* opcodes */
			
 
				-enum mds_cmd {
			
 
				-	MDS_GETATTR		= 33,
			
 
				-	MDS_GETATTR_NAME	= 34,
			
 
				-	MDS_CLOSE		= 35,
			
 
				-	MDS_REINT		= 36,
			
 
				-	MDS_READPAGE		= 37,
			
 
				-	MDS_CONNECT		= 38,
			
 
				-	MDS_DISCONNECT		= 39,
			
 
				-	MDS_GETSTATUS		= 40,
			
 
				-	MDS_STATFS		= 41,
			
 
				-	MDS_PIN			= 42, /* obsolete, never used in a release */
			
 
				-	MDS_UNPIN		= 43, /* obsolete, never used in a release */
			
 
				-	MDS_SYNC		= 44,
			
 
				-	MDS_DONE_WRITING	= 45, /* obsolete since 2.8.0 */
			
 
				-	MDS_SET_INFO		= 46,
			
 
				-	MDS_QUOTACHECK		= 47, /* not used since 2.4 */
			
 
				-	MDS_QUOTACTL		= 48,
			
 
				-	MDS_GETXATTR		= 49,
			
 
				-	MDS_SETXATTR		= 50, /* obsolete, now it's MDS_REINT op */
			
 
				-	MDS_WRITEPAGE		= 51,
			
 
				-	MDS_IS_SUBDIR		= 52, /* obsolete, never used in a release */
			
 
				-	MDS_GET_INFO		= 53,
			
 
				-	MDS_HSM_STATE_GET	= 54,
			
 
				-	MDS_HSM_STATE_SET	= 55,
			
 
				-	MDS_HSM_ACTION		= 56,
			
 
				-	MDS_HSM_PROGRESS	= 57,
			
 
				-	MDS_HSM_REQUEST		= 58,
			
 
				-	MDS_HSM_CT_REGISTER	= 59,
			
 
				-	MDS_HSM_CT_UNREGISTER	= 60,
			
 
				-	MDS_SWAP_LAYOUTS	= 61,
			
 
				-	MDS_LAST_OPC
			
 
				-};
			
 
				-
			
 
				-#define MDS_FIRST_OPC    MDS_GETATTR
			
 
				-
			
 
				-/*
			
 
				- * Do not exceed 63
			
 
				- */
			
 
				-
			
 
				-enum mdt_reint_cmd {
			
 
				-	REINT_SETATTR  = 1,
			
 
				-	REINT_CREATE   = 2,
			
 
				-	REINT_LINK     = 3,
			
 
				-	REINT_UNLINK   = 4,
			
 
				-	REINT_RENAME   = 5,
			
 
				-	REINT_OPEN     = 6,
			
 
				-	REINT_SETXATTR = 7,
			
 
				-	REINT_RMENTRY  = 8,
			
 
				-	REINT_MIGRATE  = 9,
			
 
				-	REINT_MAX
			
 
				-};
			
 
				-
			
 
				-/* the disposition of the intent outlines what was executed */
			
 
				-#define DISP_IT_EXECD	0x00000001
			
 
				-#define DISP_LOOKUP_EXECD    0x00000002
			
 
				-#define DISP_LOOKUP_NEG      0x00000004
			
 
				-#define DISP_LOOKUP_POS      0x00000008
			
 
				-#define DISP_OPEN_CREATE     0x00000010
			
 
				-#define DISP_OPEN_OPEN       0x00000020
			
 
				-#define DISP_ENQ_COMPLETE    0x00400000		/* obsolete and unused */
			
 
				-#define DISP_ENQ_OPEN_REF    0x00800000
			
 
				-#define DISP_ENQ_CREATE_REF  0x01000000
			
 
				-#define DISP_OPEN_LOCK       0x02000000
			
 
				-#define DISP_OPEN_LEASE      0x04000000
			
 
				-#define DISP_OPEN_STRIPE     0x08000000
			
 
				-#define DISP_OPEN_DENY		0x10000000
			
 
				-
			
 
				-/* INODE LOCK PARTS */
			
 
				-#define MDS_INODELOCK_LOOKUP 0x000001	/* For namespace, dentry etc, and also
			
 
				-					 * was used to protect permission (mode,
			
 
				-					 * owner, group etc) before 2.4.
			
 
				-					 */
			
 
				-#define MDS_INODELOCK_UPDATE 0x000002	/* size, links, timestamps */
			
 
				-#define MDS_INODELOCK_OPEN   0x000004	/* For opened files */
			
 
				-#define MDS_INODELOCK_LAYOUT 0x000008	/* for layout */
			
 
				-
			
 
				-/* The PERM bit is added int 2.4, and it is used to protect permission(mode,
			
 
				- * owner, group, acl etc), so to separate the permission from LOOKUP lock.
			
 
				- * Because for remote directories(in DNE), these locks will be granted by
			
 
				- * different MDTs(different ldlm namespace).
			
 
				- *
			
 
				- * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together.
			
 
				- * For Remote directory, the master MDT, where the remote directory is, will
			
 
				- * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is,
			
 
				- * will grant LOOKUP_LOCK.
			
 
				- */
			
 
				-#define MDS_INODELOCK_PERM   0x000010
			
 
				-#define MDS_INODELOCK_XATTR  0x000020	/* extended attributes */
			
 
				-
			
 
				-#define MDS_INODELOCK_MAXSHIFT 5
			
 
				-/* This FULL lock is useful to take on unlink sort of operations */
			
 
				-#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
			
 
				-
			
 
				-/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
			
 
				- * but was moved into name[1] along with the OID to avoid consuming the
			
 
				- * name[2,3] fields that need to be used for the quota id (also a FID).
			
 
				- */
			
 
				-enum {
			
 
				-	LUSTRE_RES_ID_SEQ_OFF = 0,
			
 
				-	LUSTRE_RES_ID_VER_OID_OFF = 1,
			
 
				-	LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */
			
 
				-	LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2,
			
 
				-	LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3,
			
 
				-	LUSTRE_RES_ID_HSH_OFF = 3
			
 
				-};
			
 
				-
			
 
				-#define MDS_STATUS_CONN 1
			
 
				-#define MDS_STATUS_LOV 2
			
 
				-
			
 
				-/* these should be identical to their EXT4_*_FL counterparts, they are
			
 
				- * redefined here only to avoid dragging in fs/ext4/ext4.h
			
 
				- */
			
 
				-#define LUSTRE_SYNC_FL	 0x00000008 /* Synchronous updates */
			
 
				-#define LUSTRE_IMMUTABLE_FL    0x00000010 /* Immutable file */
			
 
				-#define LUSTRE_APPEND_FL       0x00000020 /* writes to file may only append */
			
 
				-#define LUSTRE_NODUMP_FL	0x00000040 /* do not dump file */
			
 
				-#define LUSTRE_NOATIME_FL      0x00000080 /* do not update atime */
			
 
				-#define LUSTRE_INDEX_FL		0x00001000 /* hash-indexed directory */
			
 
				-#define LUSTRE_DIRSYNC_FL      0x00010000 /* dirsync behaviour (dir only) */
			
 
				-#define LUSTRE_TOPDIR_FL	0x00020000 /* Top of directory hierarchies*/
			
 
				-#define LUSTRE_DIRECTIO_FL	0x00100000 /* Use direct i/o */
			
 
				-#define LUSTRE_INLINE_DATA_FL	0x10000000 /* Inode has inline data. */
			
 
				-
			
 
				-/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
			
 
				- * for the client inode i_flags.  The LUSTRE_*_FL are the Lustre wire
			
 
				- * protocol equivalents of LDISKFS_*_FL values stored on disk, while
			
 
				- * the S_* flags are kernel-internal values that change between kernel
			
 
				- * versions.  These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS.
			
 
				- * See b=16526 for a full history.
			
 
				- */
			
 
				-static inline int ll_ext_to_inode_flags(int flags)
			
 
				-{
			
 
				-	return (((flags & LUSTRE_SYNC_FL)      ? S_SYNC      : 0) |
			
 
				-		((flags & LUSTRE_NOATIME_FL)   ? S_NOATIME   : 0) |
			
 
				-		((flags & LUSTRE_APPEND_FL)    ? S_APPEND    : 0) |
			
 
				-		((flags & LUSTRE_DIRSYNC_FL)   ? S_DIRSYNC   : 0) |
			
 
				-		((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0));
			
 
				-}
			
 
				-
			
 
				-static inline int ll_inode_to_ext_flags(int iflags)
			
 
				-{
			
 
				-	return (((iflags & S_SYNC)      ? LUSTRE_SYNC_FL      : 0) |
			
 
				-		((iflags & S_NOATIME)   ? LUSTRE_NOATIME_FL   : 0) |
			
 
				-		((iflags & S_APPEND)    ? LUSTRE_APPEND_FL    : 0) |
			
 
				-		((iflags & S_DIRSYNC)   ? LUSTRE_DIRSYNC_FL   : 0) |
			
 
				-		((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0));
			
 
				-}
			
 
				-
			
 
				-/* 64 possible states */
			
 
				-enum md_transient_state {
			
 
				-	MS_RESTORE	= (1 << 0),	/* restore is running */
			
 
				-};
			
 
				-
			
 
				-struct mdt_body {
			
 
				-	struct lu_fid mbo_fid1;
			
 
				-	struct lu_fid mbo_fid2;
			
 
				-	struct lustre_handle mbo_handle;
			
 
				-	__u64	mbo_valid;
			
 
				-	__u64	mbo_size;	/* Offset, in the case of MDS_READPAGE */
			
 
				-	__s64	mbo_mtime;
			
 
				-	__s64	mbo_atime;
			
 
				-	__s64	mbo_ctime;
			
 
				-	__u64	mbo_blocks;	/* XID, in the case of MDS_READPAGE */
			
 
				-	__u64	mbo_ioepoch;
			
 
				-	__u64	mbo_t_state;	/* transient file state defined in
			
 
				-				 * enum md_transient_state
			
 
				-				 * was "ino" until 2.4.0
			
 
				-				 */
			
 
				-	__u32	mbo_fsuid;
			
 
				-	__u32	mbo_fsgid;
			
 
				-	__u32	mbo_capability;
			
 
				-	__u32	mbo_mode;
			
 
				-	__u32	mbo_uid;
			
 
				-	__u32	mbo_gid;
			
 
				-	__u32	mbo_flags;	/* LUSTRE_*_FL file attributes */
			
 
				-	__u32	mbo_rdev;
			
 
				-	__u32	mbo_nlink;	/* #bytes to read in the case of MDS_READPAGE */
			
 
				-	__u32	mbo_unused2;	/* was "generation" until 2.4.0 */
			
 
				-	__u32	mbo_suppgid;
			
 
				-	__u32	mbo_eadatasize;
			
 
				-	__u32	mbo_aclsize;
			
 
				-	__u32	mbo_max_mdsize;
			
 
				-	__u32	mbo_unused3;	/* was max_cookiesize until 2.8 */
			
 
				-	__u32	mbo_uid_h;	/* high 32-bits of uid, for FUID */
			
 
				-	__u32	mbo_gid_h;	/* high 32-bits of gid, for FUID */
			
 
				-	__u32	mbo_padding_5;	/* also fix lustre_swab_mdt_body */
			
 
				-	__u64	mbo_padding_6;
			
 
				-	__u64	mbo_padding_7;
			
 
				-	__u64	mbo_padding_8;
			
 
				-	__u64	mbo_padding_9;
			
 
				-	__u64	mbo_padding_10;
			
 
				-}; /* 216 */
			
 
				-
			
 
				-struct mdt_ioepoch {
			
 
				-	struct lustre_handle mio_handle;
			
 
				-	__u64 mio_unused1; /* was ioepoch */
			
 
				-	__u32 mio_unused2; /* was flags */
			
 
				-	__u32 mio_padding;
			
 
				-};
			
 
				-
			
 
				-/* permissions for md_perm.mp_perm */
			
 
				-enum {
			
 
				-	CFS_SETUID_PERM = 0x01,
			
 
				-	CFS_SETGID_PERM = 0x02,
			
 
				-	CFS_SETGRP_PERM = 0x04,
			
 
				-};
			
 
				-
			
 
				-struct mdt_rec_setattr {
			
 
				-	__u32	   sa_opcode;
			
 
				-	__u32	   sa_cap;
			
 
				-	__u32	   sa_fsuid;
			
 
				-	__u32	   sa_fsuid_h;
			
 
				-	__u32	   sa_fsgid;
			
 
				-	__u32	   sa_fsgid_h;
			
 
				-	__u32	   sa_suppgid;
			
 
				-	__u32	   sa_suppgid_h;
			
 
				-	__u32	   sa_padding_1;
			
 
				-	__u32	   sa_padding_1_h;
			
 
				-	struct lu_fid   sa_fid;
			
 
				-	__u64	   sa_valid;
			
 
				-	__u32	   sa_uid;
			
 
				-	__u32	   sa_gid;
			
 
				-	__u64	   sa_size;
			
 
				-	__u64	   sa_blocks;
			
 
				-	__s64	   sa_mtime;
			
 
				-	__s64	   sa_atime;
			
 
				-	__s64	   sa_ctime;
			
 
				-	__u32	   sa_attr_flags;
			
 
				-	__u32	   sa_mode;
			
 
				-	__u32	   sa_bias;      /* some operation flags */
			
 
				-	__u32	   sa_padding_3;
			
 
				-	__u32	   sa_padding_4;
			
 
				-	__u32	   sa_padding_5;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Attribute flags used in mdt_rec_setattr::sa_valid.
			
 
				- * The kernel's #defines for ATTR_* should not be used over the network
			
 
				- * since the client and MDS may run different kernels (see bug 13828)
			
 
				- * Therefore, we should only use MDS_ATTR_* attributes for sa_valid.
			
 
				- */
			
 
				-#define MDS_ATTR_MODE	       0x1ULL /* = 1 */
			
 
				-#define MDS_ATTR_UID	       0x2ULL /* = 2 */
			
 
				-#define MDS_ATTR_GID	       0x4ULL /* = 4 */
			
 
				-#define MDS_ATTR_SIZE	       0x8ULL /* = 8 */
			
 
				-#define MDS_ATTR_ATIME	      0x10ULL /* = 16 */
			
 
				-#define MDS_ATTR_MTIME	      0x20ULL /* = 32 */
			
 
				-#define MDS_ATTR_CTIME	      0x40ULL /* = 64 */
			
 
				-#define MDS_ATTR_ATIME_SET    0x80ULL /* = 128 */
			
 
				-#define MDS_ATTR_MTIME_SET   0x100ULL /* = 256 */
			
 
				-#define MDS_ATTR_FORCE       0x200ULL /* = 512, Not a change, but a change it */
			
 
				-#define MDS_ATTR_ATTR_FLAG   0x400ULL /* = 1024 */
			
 
				-#define MDS_ATTR_KILL_SUID   0x800ULL /* = 2048 */
			
 
				-#define MDS_ATTR_KILL_SGID  0x1000ULL /* = 4096 */
			
 
				-#define MDS_ATTR_CTIME_SET  0x2000ULL /* = 8192 */
			
 
				-#define MDS_ATTR_FROM_OPEN  0x4000ULL /* = 16384, called from open path,
			
 
				-				       * ie O_TRUNC
			
 
				-				       */
			
 
				-#define MDS_ATTR_BLOCKS     0x8000ULL /* = 32768 */
			
 
				-
			
 
				-#define MDS_FMODE_CLOSED	 00000000
			
 
				-#define MDS_FMODE_EXEC	   00000004
			
 
				-/*	MDS_FMODE_EPOCH		01000000 obsolete since 2.8.0 */
			
 
				-/*	MDS_FMODE_TRUNC		02000000 obsolete since 2.8.0 */
			
 
				-/*	MDS_FMODE_SOM		04000000 obsolete since 2.8.0 */
			
 
				-
			
 
				-#define MDS_OPEN_CREATED	 00000010
			
 
				-#define MDS_OPEN_CROSS	   00000020
			
 
				-
			
 
				-#define MDS_OPEN_CREAT	   00000100
			
 
				-#define MDS_OPEN_EXCL	    00000200
			
 
				-#define MDS_OPEN_TRUNC	   00001000
			
 
				-#define MDS_OPEN_APPEND	  00002000
			
 
				-#define MDS_OPEN_SYNC	    00010000
			
 
				-#define MDS_OPEN_DIRECTORY       00200000
			
 
				-
			
 
				-#define MDS_OPEN_BY_FID		040000000 /* open_by_fid for known object */
			
 
				-#define MDS_OPEN_DELAY_CREATE  0100000000 /* delay initial object create */
			
 
				-#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
			
 
				-#define MDS_OPEN_JOIN_FILE     0400000000 /* open for join file.
			
 
				-					   * We do not support JOIN FILE
			
 
				-					   * anymore, reserve this flags
			
 
				-					   * just for preventing such bit
			
 
				-					   * to be reused.
			
 
				-					   */
			
 
				-
			
 
				-#define MDS_OPEN_LOCK	      04000000000 /* This open requires open lock */
			
 
				-#define MDS_OPEN_HAS_EA      010000000000 /* specify object create pattern */
			
 
				-#define MDS_OPEN_HAS_OBJS    020000000000 /* Just set the EA the obj exist */
			
 
				-#define MDS_OPEN_NORESTORE  0100000000000ULL /* Do not restore file at open */
			
 
				-#define MDS_OPEN_NEWSTRIPE  0200000000000ULL /* New stripe needed (restripe or
			
 
				-					      * hsm restore)
			
 
				-					      */
			
 
				-#define MDS_OPEN_VOLATILE   0400000000000ULL /* File is volatile = created
			
 
				-					      * unlinked
			
 
				-					      */
			
 
				-#define MDS_OPEN_LEASE	   01000000000000ULL /* Open the file and grant lease
			
 
				-					      * delegation, succeed if it's not
			
 
				-					      * being opened with conflict mode.
			
 
				-					      */
			
 
				-#define MDS_OPEN_RELEASE   02000000000000ULL /* Open the file for HSM release */
			
 
				-
			
 
				-#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |	\
			
 
				-			      MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |	\
			
 
				-			      MDS_OPEN_BY_FID | MDS_OPEN_LEASE |	\
			
 
				-			      MDS_OPEN_RELEASE)
			
 
				-
			
 
				-enum mds_op_bias {
			
 
				-	MDS_CHECK_SPLIT		= 1 << 0,
			
 
				-	MDS_CROSS_REF		= 1 << 1,
			
 
				-	MDS_VTX_BYPASS		= 1 << 2,
			
 
				-	MDS_PERM_BYPASS		= 1 << 3,
			
 
				-/*	MDS_SOM			= 1 << 4, obsolete since 2.8.0 */
			
 
				-	MDS_QUOTA_IGNORE	= 1 << 5,
			
 
				-	MDS_CLOSE_CLEANUP	= 1 << 6,
			
 
				-	MDS_KEEP_ORPHAN		= 1 << 7,
			
 
				-	MDS_RECOV_OPEN		= 1 << 8,
			
 
				-	MDS_DATA_MODIFIED	= 1 << 9,
			
 
				-	MDS_CREATE_VOLATILE	= 1 << 10,
			
 
				-	MDS_OWNEROVERRIDE	= 1 << 11,
			
 
				-	MDS_HSM_RELEASE		= 1 << 12,
			
 
				-	MDS_RENAME_MIGRATE	= 1 << 13,
			
 
				-	MDS_CLOSE_LAYOUT_SWAP	= 1 << 14,
			
 
				-};
			
 
				-
			
 
				-/* instance of mdt_reint_rec */
			
 
				-struct mdt_rec_create {
			
 
				-	__u32	   cr_opcode;
			
 
				-	__u32	   cr_cap;
			
 
				-	__u32	   cr_fsuid;
			
 
				-	__u32	   cr_fsuid_h;
			
 
				-	__u32	   cr_fsgid;
			
 
				-	__u32	   cr_fsgid_h;
			
 
				-	__u32	   cr_suppgid1;
			
 
				-	__u32	   cr_suppgid1_h;
			
 
				-	__u32	   cr_suppgid2;
			
 
				-	__u32	   cr_suppgid2_h;
			
 
				-	struct lu_fid   cr_fid1;
			
 
				-	struct lu_fid   cr_fid2;
			
 
				-	struct lustre_handle cr_old_handle; /* handle in case of open replay */
			
 
				-	__s64	   cr_time;
			
 
				-	__u64	   cr_rdev;
			
 
				-	__u64	   cr_ioepoch;
			
 
				-	__u64	   cr_padding_1;   /* rr_blocks */
			
 
				-	__u32	   cr_mode;
			
 
				-	__u32	   cr_bias;
			
 
				-	/* use of helpers set/get_mrc_cr_flags() is needed to access
			
 
				-	 * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to
			
 
				-	 * extend cr_flags size without breaking 1.8 compat
			
 
				-	 */
			
 
				-	__u32	   cr_flags_l;     /* for use with open, low  32 bits  */
			
 
				-	__u32	   cr_flags_h;     /* for use with open, high 32 bits */
			
 
				-	__u32	   cr_umask;       /* umask for create */
			
 
				-	__u32	   cr_padding_4;   /* rr_padding_4 */
			
 
				-};
			
 
				-
			
 
				-/* instance of mdt_reint_rec */
			
 
				-struct mdt_rec_link {
			
 
				-	__u32	   lk_opcode;
			
 
				-	__u32	   lk_cap;
			
 
				-	__u32	   lk_fsuid;
			
 
				-	__u32	   lk_fsuid_h;
			
 
				-	__u32	   lk_fsgid;
			
 
				-	__u32	   lk_fsgid_h;
			
 
				-	__u32	   lk_suppgid1;
			
 
				-	__u32	   lk_suppgid1_h;
			
 
				-	__u32	   lk_suppgid2;
			
 
				-	__u32	   lk_suppgid2_h;
			
 
				-	struct lu_fid   lk_fid1;
			
 
				-	struct lu_fid   lk_fid2;
			
 
				-	__s64	   lk_time;
			
 
				-	__u64	   lk_padding_1;   /* rr_atime */
			
 
				-	__u64	   lk_padding_2;   /* rr_ctime */
			
 
				-	__u64	   lk_padding_3;   /* rr_size */
			
 
				-	__u64	   lk_padding_4;   /* rr_blocks */
			
 
				-	__u32	   lk_bias;
			
 
				-	__u32	   lk_padding_5;   /* rr_mode */
			
 
				-	__u32	   lk_padding_6;   /* rr_flags */
			
 
				-	__u32	   lk_padding_7;   /* rr_padding_2 */
			
 
				-	__u32	   lk_padding_8;   /* rr_padding_3 */
			
 
				-	__u32	   lk_padding_9;   /* rr_padding_4 */
			
 
				-};
			
 
				-
			
 
				-/* instance of mdt_reint_rec */
			
 
				-struct mdt_rec_unlink {
			
 
				-	__u32	   ul_opcode;
			
 
				-	__u32	   ul_cap;
			
 
				-	__u32	   ul_fsuid;
			
 
				-	__u32	   ul_fsuid_h;
			
 
				-	__u32	   ul_fsgid;
			
 
				-	__u32	   ul_fsgid_h;
			
 
				-	__u32	   ul_suppgid1;
			
 
				-	__u32	   ul_suppgid1_h;
			
 
				-	__u32	   ul_suppgid2;
			
 
				-	__u32	   ul_suppgid2_h;
			
 
				-	struct lu_fid   ul_fid1;
			
 
				-	struct lu_fid   ul_fid2;
			
 
				-	__s64	   ul_time;
			
 
				-	__u64	   ul_padding_2;   /* rr_atime */
			
 
				-	__u64	   ul_padding_3;   /* rr_ctime */
			
 
				-	__u64	   ul_padding_4;   /* rr_size */
			
 
				-	__u64	   ul_padding_5;   /* rr_blocks */
			
 
				-	__u32	   ul_bias;
			
 
				-	__u32	   ul_mode;
			
 
				-	__u32	   ul_padding_6;   /* rr_flags */
			
 
				-	__u32	   ul_padding_7;   /* rr_padding_2 */
			
 
				-	__u32	   ul_padding_8;   /* rr_padding_3 */
			
 
				-	__u32	   ul_padding_9;   /* rr_padding_4 */
			
 
				-};
			
 
				-
			
 
				-/* instance of mdt_reint_rec */
			
 
				-struct mdt_rec_rename {
			
 
				-	__u32	   rn_opcode;
			
 
				-	__u32	   rn_cap;
			
 
				-	__u32	   rn_fsuid;
			
 
				-	__u32	   rn_fsuid_h;
			
 
				-	__u32	   rn_fsgid;
			
 
				-	__u32	   rn_fsgid_h;
			
 
				-	__u32	   rn_suppgid1;
			
 
				-	__u32	   rn_suppgid1_h;
			
 
				-	__u32	   rn_suppgid2;
			
 
				-	__u32	   rn_suppgid2_h;
			
 
				-	struct lu_fid   rn_fid1;
			
 
				-	struct lu_fid   rn_fid2;
			
 
				-	__s64	   rn_time;
			
 
				-	__u64	   rn_padding_1;   /* rr_atime */
			
 
				-	__u64	   rn_padding_2;   /* rr_ctime */
			
 
				-	__u64	   rn_padding_3;   /* rr_size */
			
 
				-	__u64	   rn_padding_4;   /* rr_blocks */
			
 
				-	__u32	   rn_bias;	/* some operation flags */
			
 
				-	__u32	   rn_mode;	/* cross-ref rename has mode */
			
 
				-	__u32	   rn_padding_5;   /* rr_flags */
			
 
				-	__u32	   rn_padding_6;   /* rr_padding_2 */
			
 
				-	__u32	   rn_padding_7;   /* rr_padding_3 */
			
 
				-	__u32	   rn_padding_8;   /* rr_padding_4 */
			
 
				-};
			
 
				-
			
 
				-/* instance of mdt_reint_rec */
			
 
				-struct mdt_rec_setxattr {
			
 
				-	__u32	   sx_opcode;
			
 
				-	__u32	   sx_cap;
			
 
				-	__u32	   sx_fsuid;
			
 
				-	__u32	   sx_fsuid_h;
			
 
				-	__u32	   sx_fsgid;
			
 
				-	__u32	   sx_fsgid_h;
			
 
				-	__u32	   sx_suppgid1;
			
 
				-	__u32	   sx_suppgid1_h;
			
 
				-	__u32	   sx_suppgid2;
			
 
				-	__u32	   sx_suppgid2_h;
			
 
				-	struct lu_fid   sx_fid;
			
 
				-	__u64	   sx_padding_1;   /* These three are rr_fid2 */
			
 
				-	__u32	   sx_padding_2;
			
 
				-	__u32	   sx_padding_3;
			
 
				-	__u64	   sx_valid;
			
 
				-	__s64	   sx_time;
			
 
				-	__u64	   sx_padding_5;   /* rr_ctime */
			
 
				-	__u64	   sx_padding_6;   /* rr_size */
			
 
				-	__u64	   sx_padding_7;   /* rr_blocks */
			
 
				-	__u32	   sx_size;
			
 
				-	__u32	   sx_flags;
			
 
				-	__u32	   sx_padding_8;   /* rr_flags */
			
 
				-	__u32	   sx_padding_9;   /* rr_padding_2 */
			
 
				-	__u32	   sx_padding_10;  /* rr_padding_3 */
			
 
				-	__u32	   sx_padding_11;  /* rr_padding_4 */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * mdt_rec_reint is the template for all mdt_reint_xxx structures.
			
 
				- * Do NOT change the size of various members, otherwise the value
			
 
				- * will be broken in lustre_swab_mdt_rec_reint().
			
 
				- *
			
 
				- * If you add new members in other mdt_reint_xxx structures and need to use the
			
 
				- * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also.
			
 
				- */
			
 
				-struct mdt_rec_reint {
			
 
				-	__u32	   rr_opcode;
			
 
				-	__u32	   rr_cap;
			
 
				-	__u32	   rr_fsuid;
			
 
				-	__u32	   rr_fsuid_h;
			
 
				-	__u32	   rr_fsgid;
			
 
				-	__u32	   rr_fsgid_h;
			
 
				-	__u32	   rr_suppgid1;
			
 
				-	__u32	   rr_suppgid1_h;
			
 
				-	__u32	   rr_suppgid2;
			
 
				-	__u32	   rr_suppgid2_h;
			
 
				-	struct lu_fid   rr_fid1;
			
 
				-	struct lu_fid   rr_fid2;
			
 
				-	__s64	   rr_mtime;
			
 
				-	__s64	   rr_atime;
			
 
				-	__s64	   rr_ctime;
			
 
				-	__u64	   rr_size;
			
 
				-	__u64	   rr_blocks;
			
 
				-	__u32	   rr_bias;
			
 
				-	__u32	   rr_mode;
			
 
				-	__u32	   rr_flags;
			
 
				-	__u32	   rr_flags_h;
			
 
				-	__u32	   rr_umask;
			
 
				-	__u32	   rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
			
 
				-};
			
 
				-
			
 
				-/* lmv structures */
			
 
				-struct lmv_desc {
			
 
				-	__u32 ld_tgt_count;		/* how many MDS's */
			
 
				-	__u32 ld_active_tgt_count;	 /* how many active */
			
 
				-	__u32 ld_default_stripe_count;     /* how many objects are used */
			
 
				-	__u32 ld_pattern;		  /* default hash pattern */
			
 
				-	__u64 ld_default_hash_size;
			
 
				-	__u64 ld_padding_1;		/* also fix lustre_swab_lmv_desc */
			
 
				-	__u32 ld_padding_2;		/* also fix lustre_swab_lmv_desc */
			
 
				-	__u32 ld_qos_maxage;	       /* in second */
			
 
				-	__u32 ld_padding_3;		/* also fix lustre_swab_lmv_desc */
			
 
				-	__u32 ld_padding_4;		/* also fix lustre_swab_lmv_desc */
			
 
				-	struct obd_uuid ld_uuid;
			
 
				-};
			
 
				-
			
 
				-/* LMV layout EA, and it will be stored both in master and slave object */
			
 
				-struct lmv_mds_md_v1 {
			
 
				-	__u32 lmv_magic;
			
 
				-	__u32 lmv_stripe_count;
			
 
				-	__u32 lmv_master_mdt_index;	/* On master object, it is master
			
 
				-					 * MDT index, on slave object, it
			
 
				-					 * is stripe index of the slave obj
			
 
				-					 */
			
 
				-	__u32 lmv_hash_type;		/* dir stripe policy, i.e. indicate
			
 
				-					 * which hash function to be used,
			
 
				-					 * Note: only lower 16 bits is being
			
 
				-					 * used for now. Higher 16 bits will
			
 
				-					 * be used to mark the object status,
			
 
				-					 * for example migrating or dead.
			
 
				-					 */
			
 
				-	__u32 lmv_layout_version;	/* Used for directory restriping */
			
 
				-	__u32 lmv_padding1;
			
 
				-	__u64 lmv_padding2;
			
 
				-	__u64 lmv_padding3;
			
 
				-	char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */
			
 
				-	struct lu_fid lmv_stripe_fids[0];	/* FIDs for each stripe */
			
 
				-};
			
 
				-
			
 
				-#define LMV_MAGIC_V1	 0x0CD20CD0	/* normal stripe lmv magic */
			
 
				-#define LMV_MAGIC	 LMV_MAGIC_V1
			
 
				-
			
 
				-/* #define LMV_USER_MAGIC 0x0CD30CD0 */
			
 
				-#define LMV_MAGIC_STRIPE 0x0CD40CD0	/* magic for dir sub_stripe */
			
 
				-
			
 
				-/*
			
 
				- *Right now only the lower part(0-16bits) of lmv_hash_type is being used,
			
 
				- * and the higher part will be the flag to indicate the status of object,
			
 
				- * for example the object is being migrated. And the hash function
			
 
				- * might be interpreted differently with different flags.
			
 
				- */
			
 
				-#define LMV_HASH_TYPE_MASK		0x0000ffff
			
 
				-
			
 
				-#define LMV_HASH_FLAG_MIGRATION		0x80000000
			
 
				-#define LMV_HASH_FLAG_DEAD		0x40000000
			
 
				-
			
 
				-/**
			
 
				- * The FNV-1a hash algorithm is as follows:
			
 
				- *     hash = FNV_offset_basis
			
 
				- *     for each octet_of_data to be hashed
			
 
				- *             hash = hash XOR octet_of_data
			
 
				- *             hash = hash × FNV_prime
			
 
				- *     return hash
			
 
				- * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash
			
 
				- *
			
 
				- * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
			
 
				- * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL
			
 
				- **/
			
 
				-#define LUSTRE_FNV_1A_64_PRIME		0x100000001b3ULL
			
 
				-#define LUSTRE_FNV_1A_64_OFFSET_BIAS	0xcbf29ce484222325ULL
			
 
				-static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size)
			
 
				-{
			
 
				-	__u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS;
			
 
				-	const unsigned char *p = buf;
			
 
				-	size_t i;
			
 
				-
			
 
				-	for (i = 0; i < size; i++) {
			
 
				-		hash ^= p[i];
			
 
				-		hash *= LUSTRE_FNV_1A_64_PRIME;
			
 
				-	}
			
 
				-
			
 
				-	return hash;
			
 
				-}
			
 
				-
			
 
				-union lmv_mds_md {
			
 
				-	__u32			lmv_magic;
			
 
				-	struct lmv_mds_md_v1	lmv_md_v1;
			
 
				-	struct lmv_user_md	lmv_user_md;
			
 
				-};
			
 
				-
			
 
				-static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
			
 
				-{
			
 
				-	ssize_t len = -EINVAL;
			
 
				-
			
 
				-	switch (lmm_magic) {
			
 
				-	case LMV_MAGIC_V1: {
			
 
				-		struct lmv_mds_md_v1 *lmm1;
			
 
				-
			
 
				-		len = sizeof(*lmm1);
			
 
				-		len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]);
			
 
				-		break; }
			
 
				-	default:
			
 
				-		break;
			
 
				-	}
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm)
			
 
				-{
			
 
				-	switch (__le32_to_cpu(lmm->lmv_magic)) {
			
 
				-	case LMV_MAGIC_V1:
			
 
				-		return __le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
			
 
				-	case LMV_USER_MAGIC:
			
 
				-		return __le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
			
 
				-	default:
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-enum fld_rpc_opc {
			
 
				-	FLD_QUERY	= 900,
			
 
				-	FLD_READ	= 901,
			
 
				-	FLD_LAST_OPC,
			
 
				-	FLD_FIRST_OPC	= FLD_QUERY
			
 
				-};
			
 
				-
			
 
				-enum seq_rpc_opc {
			
 
				-	SEQ_QUERY		       = 700,
			
 
				-	SEQ_LAST_OPC,
			
 
				-	SEQ_FIRST_OPC		   = SEQ_QUERY
			
 
				-};
			
 
				-
			
 
				-enum seq_op {
			
 
				-	SEQ_ALLOC_SUPER = 0,
			
 
				-	SEQ_ALLOC_META = 1
			
 
				-};
			
 
				-
			
 
				-enum fld_op {
			
 
				-	FLD_CREATE = 0,
			
 
				-	FLD_DELETE = 1,
			
 
				-	FLD_LOOKUP = 2,
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- *  LOV data structures
			
 
				- */
			
 
				-
			
 
				-#define LOV_MAX_UUID_BUFFER_SIZE  8192
			
 
				-/* The size of the buffer the lov/mdc reserves for the
			
 
				- * array of UUIDs returned by the MDS.  With the current
			
 
				- * protocol, this will limit the max number of OSTs per LOV
			
 
				- */
			
 
				-
			
 
				-#define LOV_DESC_MAGIC 0xB0CCDE5C
			
 
				-#define LOV_DESC_QOS_MAXAGE_DEFAULT 5  /* Seconds */
			
 
				-#define LOV_DESC_STRIPE_SIZE_DEFAULT (1 << LNET_MTU_BITS)
			
 
				-
			
 
				-/* LOV settings descriptor (should only contain static info) */
			
 
				-struct lov_desc {
			
 
				-	__u32 ld_tgt_count;		/* how many OBD's */
			
 
				-	__u32 ld_active_tgt_count;	/* how many active */
			
 
				-	__u32 ld_default_stripe_count;  /* how many objects are used */
			
 
				-	__u32 ld_pattern;		/* default PATTERN_RAID0 */
			
 
				-	__u64 ld_default_stripe_size;   /* in bytes */
			
 
				-	__u64 ld_default_stripe_offset; /* in bytes */
			
 
				-	__u32 ld_padding_0;		/* unused */
			
 
				-	__u32 ld_qos_maxage;		/* in second */
			
 
				-	__u32 ld_padding_1;		/* also fix lustre_swab_lov_desc */
			
 
				-	__u32 ld_padding_2;		/* also fix lustre_swab_lov_desc */
			
 
				-	struct obd_uuid ld_uuid;
			
 
				-};
			
 
				-
			
 
				-#define ld_magic ld_active_tgt_count       /* for swabbing from llogs */
			
 
				-
			
 
				-/*
			
 
				- *   LDLM requests:
			
 
				- */
			
 
				-/* opcodes -- MUST be distinct from OST/MDS opcodes */
			
 
				-enum ldlm_cmd {
			
 
				-	LDLM_ENQUEUE     = 101,
			
 
				-	LDLM_CONVERT     = 102,
			
 
				-	LDLM_CANCEL      = 103,
			
 
				-	LDLM_BL_CALLBACK = 104,
			
 
				-	LDLM_CP_CALLBACK = 105,
			
 
				-	LDLM_GL_CALLBACK = 106,
			
 
				-	LDLM_SET_INFO    = 107,
			
 
				-	LDLM_LAST_OPC
			
 
				-};
			
 
				-#define LDLM_FIRST_OPC LDLM_ENQUEUE
			
 
				-
			
 
				-#define RES_NAME_SIZE 4
			
 
				-struct ldlm_res_id {
			
 
				-	__u64 name[RES_NAME_SIZE];
			
 
				-};
			
 
				-
			
 
				-#define DLDLMRES	"[%#llx:%#llx:%#llx].%llx"
			
 
				-#define PLDLMRES(res)	(res)->lr_name.name[0], (res)->lr_name.name[1], \
			
 
				-			(res)->lr_name.name[2], (res)->lr_name.name[3]
			
 
				-
			
 
				-/* lock types */
			
 
				-enum ldlm_mode {
			
 
				-	LCK_MINMODE = 0,
			
 
				-	LCK_EX      = 1,
			
 
				-	LCK_PW      = 2,
			
 
				-	LCK_PR      = 4,
			
 
				-	LCK_CW      = 8,
			
 
				-	LCK_CR      = 16,
			
 
				-	LCK_NL      = 32,
			
 
				-	LCK_GROUP   = 64,
			
 
				-	LCK_COS     = 128,
			
 
				-	LCK_MAXMODE
			
 
				-};
			
 
				-
			
 
				-#define LCK_MODE_NUM    8
			
 
				-
			
 
				-enum ldlm_type {
			
 
				-	LDLM_PLAIN     = 10,
			
 
				-	LDLM_EXTENT    = 11,
			
 
				-	LDLM_FLOCK     = 12,
			
 
				-	LDLM_IBITS     = 13,
			
 
				-	LDLM_MAX_TYPE
			
 
				-};
			
 
				-
			
 
				-#define LDLM_MIN_TYPE LDLM_PLAIN
			
 
				-
			
 
				-struct ldlm_extent {
			
 
				-	__u64 start;
			
 
				-	__u64 end;
			
 
				-	__u64 gid;
			
 
				-};
			
 
				-
			
 
				-struct ldlm_inodebits {
			
 
				-	__u64 bits;
			
 
				-};
			
 
				-
			
 
				-struct ldlm_flock_wire {
			
 
				-	__u64 lfw_start;
			
 
				-	__u64 lfw_end;
			
 
				-	__u64 lfw_owner;
			
 
				-	__u32 lfw_padding;
			
 
				-	__u32 lfw_pid;
			
 
				-};
			
 
				-
			
 
				-/* it's important that the fields of the ldlm_extent structure match
			
 
				- * the first fields of the ldlm_flock structure because there is only
			
 
				- * one ldlm_swab routine to process the ldlm_policy_data_t union. if
			
 
				- * this ever changes we will need to swab the union differently based
			
 
				- * on the resource type.
			
 
				- */
			
 
				-
			
 
				-union ldlm_wire_policy_data {
			
 
				-	struct ldlm_extent l_extent;
			
 
				-	struct ldlm_flock_wire l_flock;
			
 
				-	struct ldlm_inodebits l_inodebits;
			
 
				-};
			
 
				-
			
 
				-union ldlm_gl_desc {
			
 
				-	struct ldlm_gl_lquota_desc	lquota_desc;
			
 
				-};
			
 
				-
			
 
				-enum ldlm_intent_flags {
			
 
				-	IT_OPEN		= 0x00000001,
			
 
				-	IT_CREAT	= 0x00000002,
			
 
				-	IT_OPEN_CREAT	= 0x00000003,
			
 
				-	IT_READDIR	= 0x00000004,
			
 
				-	IT_GETATTR	= 0x00000008,
			
 
				-	IT_LOOKUP	= 0x00000010,
			
 
				-	IT_UNLINK	= 0x00000020,
			
 
				-	IT_TRUNC	= 0x00000040,
			
 
				-	IT_GETXATTR	= 0x00000080,
			
 
				-	IT_EXEC		= 0x00000100,
			
 
				-	IT_PIN		= 0x00000200,
			
 
				-	IT_LAYOUT	= 0x00000400,
			
 
				-	IT_QUOTA_DQACQ	= 0x00000800,
			
 
				-	IT_QUOTA_CONN	= 0x00001000,
			
 
				-	IT_SETXATTR	= 0x00002000,
			
 
				-};
			
 
				-
			
 
				-struct ldlm_intent {
			
 
				-	__u64 opc;
			
 
				-};
			
 
				-
			
 
				-struct ldlm_resource_desc {
			
 
				-	enum ldlm_type lr_type;
			
 
				-	__u32 lr_padding;       /* also fix lustre_swab_ldlm_resource_desc */
			
 
				-	struct ldlm_res_id lr_name;
			
 
				-};
			
 
				-
			
 
				-struct ldlm_lock_desc {
			
 
				-	struct ldlm_resource_desc l_resource;
			
 
				-	enum ldlm_mode l_req_mode;
			
 
				-	enum ldlm_mode l_granted_mode;
			
 
				-	union ldlm_wire_policy_data l_policy_data;
			
 
				-};
			
 
				-
			
 
				-#define LDLM_LOCKREQ_HANDLES 2
			
 
				-#define LDLM_ENQUEUE_CANCEL_OFF 1
			
 
				-
			
 
				-struct ldlm_request {
			
 
				-	__u32 lock_flags;
			
 
				-	__u32 lock_count;
			
 
				-	struct ldlm_lock_desc lock_desc;
			
 
				-	struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
			
 
				-};
			
 
				-
			
 
				-struct ldlm_reply {
			
 
				-	__u32 lock_flags;
			
 
				-	__u32 lock_padding;     /* also fix lustre_swab_ldlm_reply */
			
 
				-	struct ldlm_lock_desc lock_desc;
			
 
				-	struct lustre_handle lock_handle;
			
 
				-	__u64  lock_policy_res1;
			
 
				-	__u64  lock_policy_res2;
			
 
				-};
			
 
				-
			
 
				-#define ldlm_flags_to_wire(flags)    ((__u32)(flags))
			
 
				-#define ldlm_flags_from_wire(flags)  ((__u64)(flags))
			
 
				-
			
 
				-/*
			
 
				- * Opcodes for mountconf (mgs and mgc)
			
 
				- */
			
 
				-enum mgs_cmd {
			
 
				-	MGS_CONNECT = 250,
			
 
				-	MGS_DISCONNECT,
			
 
				-	MGS_EXCEPTION,	 /* node died, etc. */
			
 
				-	MGS_TARGET_REG,	/* whenever target starts up */
			
 
				-	MGS_TARGET_DEL,
			
 
				-	MGS_SET_INFO,
			
 
				-	MGS_CONFIG_READ,
			
 
				-	MGS_LAST_OPC
			
 
				-};
			
 
				-#define MGS_FIRST_OPC MGS_CONNECT
			
 
				-
			
 
				-#define MGS_PARAM_MAXLEN 1024
			
 
				-#define KEY_SET_INFO "set_info"
			
 
				-
			
 
				-struct mgs_send_param {
			
 
				-	char	     mgs_param[MGS_PARAM_MAXLEN];
			
 
				-};
			
 
				-
			
 
				-/* We pass this info to the MGS so it can write config logs */
			
 
				-#define MTI_NAME_MAXLEN  64
			
 
				-#define MTI_PARAM_MAXLEN 4096
			
 
				-#define MTI_NIDS_MAX     32
			
 
				-struct mgs_target_info {
			
 
				-	__u32	    mti_lustre_ver;
			
 
				-	__u32	    mti_stripe_index;
			
 
				-	__u32	    mti_config_ver;
			
 
				-	__u32	    mti_flags;
			
 
				-	__u32	    mti_nid_count;
			
 
				-	__u32	    mti_instance; /* Running instance of target */
			
 
				-	char	     mti_fsname[MTI_NAME_MAXLEN];
			
 
				-	char	     mti_svname[MTI_NAME_MAXLEN];
			
 
				-	char	     mti_uuid[sizeof(struct obd_uuid)];
			
 
				-	__u64	    mti_nids[MTI_NIDS_MAX];     /* host nids (lnet_nid_t)*/
			
 
				-	char	     mti_params[MTI_PARAM_MAXLEN];
			
 
				-};
			
 
				-
			
 
				-struct mgs_nidtbl_entry {
			
 
				-	__u64	   mne_version;    /* table version of this entry */
			
 
				-	__u32	   mne_instance;   /* target instance # */
			
 
				-	__u32	   mne_index;      /* target index */
			
 
				-	__u32	   mne_length;     /* length of this entry - by bytes */
			
 
				-	__u8	    mne_type;       /* target type LDD_F_SV_TYPE_OST/MDT */
			
 
				-	__u8	    mne_nid_type;   /* type of nid(mbz). for ipv6. */
			
 
				-	__u8	    mne_nid_size;   /* size of each NID, by bytes */
			
 
				-	__u8	    mne_nid_count;  /* # of NIDs in buffer */
			
 
				-	union {
			
 
				-		lnet_nid_t nids[0];     /* variable size buffer for NIDs. */
			
 
				-	} u;
			
 
				-};
			
 
				-
			
 
				-struct mgs_config_body {
			
 
				-	char     mcb_name[MTI_NAME_MAXLEN]; /* logname */
			
 
				-	__u64    mcb_offset;    /* next index of config log to request */
			
 
				-	__u16    mcb_type;      /* type of log: CONFIG_T_[CONFIG|RECOVER] */
			
 
				-	__u8     mcb_reserved;
			
 
				-	__u8     mcb_bits;      /* bits unit size of config log */
			
 
				-	__u32    mcb_units;     /* # of units for bulk transfer */
			
 
				-};
			
 
				-
			
 
				-struct mgs_config_res {
			
 
				-	__u64    mcr_offset;    /* index of last config log */
			
 
				-	__u64    mcr_size;      /* size of the log */
			
 
				-};
			
 
				-
			
 
				-/* Config marker flags (in config log) */
			
 
				-#define CM_START       0x01
			
 
				-#define CM_END	 0x02
			
 
				-#define CM_SKIP	0x04
			
 
				-#define CM_UPGRADE146  0x08
			
 
				-#define CM_EXCLUDE     0x10
			
 
				-#define CM_START_SKIP (CM_START | CM_SKIP)
			
 
				-
			
 
				-struct cfg_marker {
			
 
				-	__u32	     cm_step;       /* aka config version */
			
 
				-	__u32	     cm_flags;
			
 
				-	__u32	     cm_vers;       /* lustre release version number */
			
 
				-	__u32	     cm_padding;    /* 64 bit align */
			
 
				-	__s64	     cm_createtime; /*when this record was first created */
			
 
				-	__s64	     cm_canceltime; /*when this record is no longer valid*/
			
 
				-	char	      cm_tgtname[MTI_NAME_MAXLEN];
			
 
				-	char	      cm_comment[MTI_NAME_MAXLEN];
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Opcodes for multiple servers.
			
 
				- */
			
 
				-
			
 
				-enum obd_cmd {
			
 
				-	OBD_PING = 400,
			
 
				-	OBD_LOG_CANCEL,
			
 
				-	OBD_QC_CALLBACK, /* not used since 2.4 */
			
 
				-	OBD_IDX_READ,
			
 
				-	OBD_LAST_OPC
			
 
				-};
			
 
				-#define OBD_FIRST_OPC OBD_PING
			
 
				-
			
 
				-/**
			
 
				- * llog contexts indices.
			
 
				- *
			
 
				- * There is compatibility problem with indexes below, they are not
			
 
				- * continuous and must keep their numbers for compatibility needs.
			
 
				- * See LU-5218 for details.
			
 
				- */
			
 
				-enum llog_ctxt_id {
			
 
				-	LLOG_CONFIG_ORIG_CTXT  =  0,
			
 
				-	LLOG_CONFIG_REPL_CTXT = 1,
			
 
				-	LLOG_MDS_OST_ORIG_CTXT = 2,
			
 
				-	LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */
			
 
				-	LLOG_SIZE_ORIG_CTXT = 4,
			
 
				-	LLOG_SIZE_REPL_CTXT = 5,
			
 
				-	LLOG_TEST_ORIG_CTXT = 8,
			
 
				-	LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */
			
 
				-	LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */
			
 
				-	LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */
			
 
				-	/* for multiple changelog consumers */
			
 
				-	LLOG_CHANGELOG_USER_ORIG_CTXT = 14,
			
 
				-	LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */
			
 
				-	LLOG_MAX_CTXTS
			
 
				-};
			
 
				-
			
 
				-/** Identifier for a single log object */
			
 
				-struct llog_logid {
			
 
				-	struct ost_id		lgl_oi;
			
 
				-	__u32		   lgl_ogen;
			
 
				-} __packed;
			
 
				-
			
 
				-/** Records written to the CATALOGS list */
			
 
				-#define CATLIST "CATALOGS"
			
 
				-struct llog_catid {
			
 
				-	struct llog_logid       lci_logid;
			
 
				-	__u32		   lci_padding1;
			
 
				-	__u32		   lci_padding2;
			
 
				-	__u32		   lci_padding3;
			
 
				-} __packed;
			
 
				-
			
 
				-/* Log data record types - there is no specific reason that these need to
			
 
				- * be related to the RPC opcodes, but no reason not to (may be handy later?)
			
 
				- */
			
 
				-#define LLOG_OP_MAGIC 0x10600000
			
 
				-#define LLOG_OP_MASK  0xfff00000
			
 
				-
			
 
				-enum llog_op_type {
			
 
				-	LLOG_PAD_MAGIC		= LLOG_OP_MAGIC | 0x00000,
			
 
				-	OST_SZ_REC		= LLOG_OP_MAGIC | 0x00f00,
			
 
				-	/* OST_RAID1_REC	= LLOG_OP_MAGIC | 0x01000, never used */
			
 
				-	MDS_UNLINK_REC		= LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) |
			
 
				-				  REINT_UNLINK, /* obsolete after 2.5.0 */
			
 
				-	MDS_UNLINK64_REC	= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
			
 
				-				  REINT_UNLINK,
			
 
				-	/* MDS_SETATTR_REC	= LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */
			
 
				-	MDS_SETATTR64_REC	= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
			
 
				-				  REINT_SETATTR,
			
 
				-	OBD_CFG_REC		= LLOG_OP_MAGIC | 0x20000,
			
 
				-	/* PTL_CFG_REC		= LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */
			
 
				-	LLOG_GEN_REC		= LLOG_OP_MAGIC | 0x40000,
			
 
				-	/* LLOG_JOIN_REC	= LLOG_OP_MAGIC | 0x50000, obsolete  1.8.0 */
			
 
				-	CHANGELOG_REC		= LLOG_OP_MAGIC | 0x60000,
			
 
				-	CHANGELOG_USER_REC	= LLOG_OP_MAGIC | 0x70000,
			
 
				-	HSM_AGENT_REC		= LLOG_OP_MAGIC | 0x80000,
			
 
				-	LLOG_HDR_MAGIC		= LLOG_OP_MAGIC | 0x45539,
			
 
				-	LLOG_LOGID_MAGIC	= LLOG_OP_MAGIC | 0x4553b,
			
 
				-};
			
 
				-
			
 
				-#define LLOG_REC_HDR_NEEDS_SWABBING(r) \
			
 
				-	(((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC))
			
 
				-
			
 
				-/** Log record header - stored in little endian order.
			
 
				- * Each record must start with this struct, end with a llog_rec_tail,
			
 
				- * and be a multiple of 256 bits in size.
			
 
				- */
			
 
				-struct llog_rec_hdr {
			
 
				-	__u32	lrh_len;
			
 
				-	__u32	lrh_index;
			
 
				-	__u32	lrh_type;
			
 
				-	__u32	lrh_id;
			
 
				-};
			
 
				-
			
 
				-struct llog_rec_tail {
			
 
				-	__u32	lrt_len;
			
 
				-	__u32	lrt_index;
			
 
				-};
			
 
				-
			
 
				-/* Where data follow just after header */
			
 
				-#define REC_DATA(ptr)						\
			
 
				-	((void *)((char *)ptr + sizeof(struct llog_rec_hdr)))
			
 
				-
			
 
				-#define REC_DATA_LEN(rec)					\
			
 
				-	(rec->lrh_len - sizeof(struct llog_rec_hdr) -		\
			
 
				-	 sizeof(struct llog_rec_tail))
			
 
				-
			
 
				-struct llog_logid_rec {
			
 
				-	struct llog_rec_hdr	lid_hdr;
			
 
				-	struct llog_logid	lid_id;
			
 
				-	__u32			lid_padding1;
			
 
				-	__u64			lid_padding2;
			
 
				-	__u64			lid_padding3;
			
 
				-	struct llog_rec_tail	lid_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-struct llog_unlink_rec {
			
 
				-	struct llog_rec_hdr	lur_hdr;
			
 
				-	__u64			lur_oid;
			
 
				-	__u32			lur_oseq;
			
 
				-	__u32			lur_count;
			
 
				-	struct llog_rec_tail	lur_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-struct llog_unlink64_rec {
			
 
				-	struct llog_rec_hdr	lur_hdr;
			
 
				-	struct lu_fid		lur_fid;
			
 
				-	__u32			lur_count; /* to destroy the lost precreated */
			
 
				-	__u32			lur_padding1;
			
 
				-	__u64			lur_padding2;
			
 
				-	__u64			lur_padding3;
			
 
				-	struct llog_rec_tail    lur_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-struct llog_setattr64_rec {
			
 
				-	struct llog_rec_hdr	lsr_hdr;
			
 
				-	struct ost_id		lsr_oi;
			
 
				-	__u32			lsr_uid;
			
 
				-	__u32			lsr_uid_h;
			
 
				-	__u32			lsr_gid;
			
 
				-	__u32			lsr_gid_h;
			
 
				-	__u64			lsr_valid;
			
 
				-	struct llog_rec_tail    lsr_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-struct llog_size_change_rec {
			
 
				-	struct llog_rec_hdr	lsc_hdr;
			
 
				-	struct ll_fid		lsc_fid;
			
 
				-	__u32			lsc_ioepoch;
			
 
				-	__u32			lsc_padding1;
			
 
				-	__u64			lsc_padding2;
			
 
				-	__u64			lsc_padding3;
			
 
				-	struct llog_rec_tail	lsc_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-/* changelog llog name, needed by client replicators */
			
 
				-#define CHANGELOG_CATALOG "changelog_catalog"
			
 
				-
			
 
				-struct changelog_setinfo {
			
 
				-	__u64 cs_recno;
			
 
				-	__u32 cs_id;
			
 
				-} __packed;
			
 
				-
			
 
				-/** changelog record */
			
 
				-struct llog_changelog_rec {
			
 
				-	struct llog_rec_hdr	cr_hdr;
			
 
				-	struct changelog_rec	cr;		/**< Variable length field */
			
 
				-	struct llog_rec_tail	cr_do_not_use;	/**< for_sizezof_only */
			
 
				-} __packed;
			
 
				-
			
 
				-struct llog_changelog_user_rec {
			
 
				-	struct llog_rec_hdr   cur_hdr;
			
 
				-	__u32		 cur_id;
			
 
				-	__u32		 cur_padding;
			
 
				-	__u64		 cur_endrec;
			
 
				-	struct llog_rec_tail  cur_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-enum agent_req_status {
			
 
				-	ARS_WAITING,
			
 
				-	ARS_STARTED,
			
 
				-	ARS_FAILED,
			
 
				-	ARS_CANCELED,
			
 
				-	ARS_SUCCEED,
			
 
				-};
			
 
				-
			
 
				-static inline const char *agent_req_status2name(const enum agent_req_status ars)
			
 
				-{
			
 
				-	switch (ars) {
			
 
				-	case ARS_WAITING:
			
 
				-		return "WAITING";
			
 
				-	case ARS_STARTED:
			
 
				-		return "STARTED";
			
 
				-	case ARS_FAILED:
			
 
				-		return "FAILED";
			
 
				-	case ARS_CANCELED:
			
 
				-		return "CANCELED";
			
 
				-	case ARS_SUCCEED:
			
 
				-		return "SUCCEED";
			
 
				-	default:
			
 
				-		return "UNKNOWN";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-struct llog_agent_req_rec {
			
 
				-	struct llog_rec_hdr	arr_hdr;	/**< record header */
			
 
				-	__u32			arr_status;	/**< status of the request */
			
 
				-						/* must match enum
			
 
				-						 * agent_req_status
			
 
				-						 */
			
 
				-	__u32			arr_archive_id;	/**< backend archive number */
			
 
				-	__u64			arr_flags;	/**< req flags */
			
 
				-	__u64			arr_compound_id;/**< compound cookie */
			
 
				-	__u64			arr_req_create;	/**< req. creation time */
			
 
				-	__u64			arr_req_change;	/**< req. status change time */
			
 
				-	struct hsm_action_item	arr_hai;	/**< req. to the agent */
			
 
				-	struct llog_rec_tail	arr_tail;   /**< record tail for_sizezof_only */
			
 
				-} __packed;
			
 
				-
			
 
				-/* Old llog gen for compatibility */
			
 
				-struct llog_gen {
			
 
				-	__u64 mnt_cnt;
			
 
				-	__u64 conn_cnt;
			
 
				-} __packed;
			
 
				-
			
 
				-struct llog_gen_rec {
			
 
				-	struct llog_rec_hdr	lgr_hdr;
			
 
				-	struct llog_gen		lgr_gen;
			
 
				-	__u64			padding1;
			
 
				-	__u64			padding2;
			
 
				-	__u64			padding3;
			
 
				-	struct llog_rec_tail	lgr_tail;
			
 
				-};
			
 
				-
			
 
				-/* flags for the logs */
			
 
				-enum llog_flag {
			
 
				-	LLOG_F_ZAP_WHEN_EMPTY	= 0x1,
			
 
				-	LLOG_F_IS_CAT		= 0x2,
			
 
				-	LLOG_F_IS_PLAIN		= 0x4,
			
 
				-	LLOG_F_EXT_JOBID        = 0x8,
			
 
				-	LLOG_F_IS_FIXSIZE	= 0x10,
			
 
				-
			
 
				-	/*
			
 
				-	 * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from
			
 
				-	 * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here,
			
 
				-	 * because the catlog record is usually fixed size, but its plain
			
 
				-	 * log record can be variable
			
 
				-	 */
			
 
				-	LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
			
 
				-};
			
 
				-
			
 
				-/* On-disk header structure of each log object, stored in little endian order */
			
 
				-#define LLOG_MIN_CHUNK_SIZE	8192
			
 
				-#define LLOG_HEADER_SIZE	(96)	/* sizeof (llog_log_hdr) +
			
 
				-					 * sizeof(llh_tail) - sizeof(llh_bitmap)
			
 
				-					 */
			
 
				-#define LLOG_BITMAP_BYTES	(LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE)
			
 
				-#define LLOG_MIN_REC_SIZE	(24)	/* round(llog_rec_hdr + llog_rec_tail) */
			
 
				-
			
 
				-/* flags for the logs */
			
 
				-struct llog_log_hdr {
			
 
				-	struct llog_rec_hdr     llh_hdr;
			
 
				-	__s64		   llh_timestamp;
			
 
				-	__u32		   llh_count;
			
 
				-	__u32		   llh_bitmap_offset;
			
 
				-	__u32		   llh_size;
			
 
				-	__u32		   llh_flags;
			
 
				-	__u32		   llh_cat_idx;
			
 
				-	/* for a catalog the first plain slot is next to it */
			
 
				-	struct obd_uuid	 llh_tgtuuid;
			
 
				-	__u32		   llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
			
 
				-	/* These fields must always be at the end of the llog_log_hdr.
			
 
				-	 * Note: llh_bitmap size is variable because llog chunk size could be
			
 
				-	 * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192
			
 
				-	 * bytes, and the real size is stored in llh_hdr.lrh_len, which means
			
 
				-	 * llh_tail should only be referred by LLOG_HDR_TAIL().
			
 
				-	 * But this structure is also used by client/server llog interface
			
 
				-	 * (see llog_client.c), it will be kept in its original way to avoid
			
 
				-	 * compatibility issue.
			
 
				-	 */
			
 
				-	__u32		   llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
			
 
				-	struct llog_rec_tail    llh_tail;
			
 
				-} __packed;
			
 
				-
			
 
				-#undef LLOG_HEADER_SIZE
			
 
				-#undef LLOG_BITMAP_BYTES
			
 
				-
			
 
				-#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len -	\
			
 
				-					   llh->llh_bitmap_offset -	\
			
 
				-					   sizeof(llh->llh_tail)) * 8)
			
 
				-#define LLOG_HDR_BITMAP(llh)	(__u32 *)((char *)(llh) +		\
			
 
				-					  (llh)->llh_bitmap_offset)
			
 
				-#define LLOG_HDR_TAIL(llh)	((struct llog_rec_tail *)((char *)llh + \
			
 
				-							 llh->llh_hdr.lrh_len - \
			
 
				-							 sizeof(llh->llh_tail)))
			
 
				-
			
 
				-/** log cookies are used to reference a specific log file and a record
			
 
				- * therein
			
 
				- */
			
 
				-struct llog_cookie {
			
 
				-	struct llog_logid       lgc_lgl;
			
 
				-	__u32		   lgc_subsys;
			
 
				-	__u32		   lgc_index;
			
 
				-	__u32		   lgc_padding;
			
 
				-} __packed;
			
 
				-
			
 
				-/** llog protocol */
			
 
				-enum llogd_rpc_ops {
			
 
				-	LLOG_ORIGIN_HANDLE_CREATE       = 501,
			
 
				-	LLOG_ORIGIN_HANDLE_NEXT_BLOCK   = 502,
			
 
				-	LLOG_ORIGIN_HANDLE_READ_HEADER  = 503,
			
 
				-	LLOG_ORIGIN_HANDLE_WRITE_REC    = 504,
			
 
				-	LLOG_ORIGIN_HANDLE_CLOSE	= 505,
			
 
				-	LLOG_ORIGIN_CONNECT		= 506,
			
 
				-	LLOG_CATINFO			= 507,  /* deprecated */
			
 
				-	LLOG_ORIGIN_HANDLE_PREV_BLOCK   = 508,
			
 
				-	LLOG_ORIGIN_HANDLE_DESTROY      = 509,  /* for destroy llog object*/
			
 
				-	LLOG_LAST_OPC,
			
 
				-	LLOG_FIRST_OPC		  = LLOG_ORIGIN_HANDLE_CREATE
			
 
				-};
			
 
				-
			
 
				-struct llogd_body {
			
 
				-	struct llog_logid  lgd_logid;
			
 
				-	__u32 lgd_ctxt_idx;
			
 
				-	__u32 lgd_llh_flags;
			
 
				-	__u32 lgd_index;
			
 
				-	__u32 lgd_saved_index;
			
 
				-	__u32 lgd_len;
			
 
				-	__u64 lgd_cur_offset;
			
 
				-} __packed;
			
 
				-
			
 
				-struct llogd_conn_body {
			
 
				-	struct llog_gen	 lgdc_gen;
			
 
				-	struct llog_logid       lgdc_logid;
			
 
				-	__u32		   lgdc_ctxt_idx;
			
 
				-} __packed;
			
 
				-
			
 
				-/* Note: 64-bit types are 64-bit aligned in structure */
			
 
				-struct obdo {
			
 
				-	__u64		o_valid;	/* hot fields in this obdo */
			
 
				-	struct ost_id	o_oi;
			
 
				-	__u64		o_parent_seq;
			
 
				-	__u64		o_size;	 /* o_size-o_blocks == ost_lvb */
			
 
				-	__s64		o_mtime;
			
 
				-	__s64		o_atime;
			
 
				-	__s64		o_ctime;
			
 
				-	__u64		o_blocks;       /* brw: cli sent cached bytes */
			
 
				-	__u64		o_grant;
			
 
				-
			
 
				-	/* 32-bit fields start here: keep an even number of them via padding */
			
 
				-	__u32		o_blksize;      /* optimal IO blocksize */
			
 
				-	__u32		o_mode;	 /* brw: cli sent cache remain */
			
 
				-	__u32		o_uid;
			
 
				-	__u32		o_gid;
			
 
				-	__u32		o_flags;
			
 
				-	__u32		o_nlink;	/* brw: checksum */
			
 
				-	__u32		o_parent_oid;
			
 
				-	__u32		o_misc;		/* brw: o_dropped */
			
 
				-
			
 
				-	__u64		   o_ioepoch;      /* epoch in ost writes */
			
 
				-	__u32		   o_stripe_idx;   /* holds stripe idx */
			
 
				-	__u32		   o_parent_ver;
			
 
				-	struct lustre_handle    o_handle;  /* brw: lock handle to prolong locks
			
 
				-					    */
			
 
				-	struct llog_cookie      o_lcookie; /* destroy: unlink cookie from MDS,
			
 
				-					    * obsolete in 2.8, reused in OSP
			
 
				-					    */
			
 
				-	__u32			o_uid_h;
			
 
				-	__u32			o_gid_h;
			
 
				-
			
 
				-	__u64			o_data_version; /* getattr: sum of iversion for
			
 
				-						 * each stripe.
			
 
				-						 * brw: grant space consumed on
			
 
				-						 * the client for the write
			
 
				-						 */
			
 
				-	__u64			o_padding_4;
			
 
				-	__u64			o_padding_5;
			
 
				-	__u64			o_padding_6;
			
 
				-};
			
 
				-
			
 
				-#define o_dirty   o_blocks
			
 
				-#define o_undirty o_mode
			
 
				-#define o_dropped o_misc
			
 
				-#define o_cksum   o_nlink
			
 
				-#define o_grant_used o_data_version
			
 
				-
			
 
				-/* request structure for OST's */
			
 
				-struct ost_body {
			
 
				-	struct  obdo oa;
			
 
				-};
			
 
				-
			
 
				-/* Key for FIEMAP to be used in get_info calls */
			
 
				-struct ll_fiemap_info_key {
			
 
				-	char		lfik_name[8];
			
 
				-	struct obdo	lfik_oa;
			
 
				-	struct fiemap	lfik_fiemap;
			
 
				-};
			
 
				-
			
 
				-/* security opcodes */
			
 
				-enum sec_cmd {
			
 
				-	SEC_CTX_INIT	    = 801,
			
 
				-	SEC_CTX_INIT_CONT       = 802,
			
 
				-	SEC_CTX_FINI	    = 803,
			
 
				-	SEC_LAST_OPC,
			
 
				-	SEC_FIRST_OPC	   = SEC_CTX_INIT
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * capa related definitions
			
 
				- */
			
 
				-#define CAPA_HMAC_MAX_LEN       64
			
 
				-#define CAPA_HMAC_KEY_MAX_LEN   56
			
 
				-
			
 
				-/* NB take care when changing the sequence of elements this struct,
			
 
				- * because the offset info is used in find_capa()
			
 
				- */
			
 
				-struct lustre_capa {
			
 
				-	struct lu_fid   lc_fid;	 /** fid */
			
 
				-	__u64	   lc_opc;	 /** operations allowed */
			
 
				-	__u64	   lc_uid;	 /** file owner */
			
 
				-	__u64	   lc_gid;	 /** file group */
			
 
				-	__u32	   lc_flags;       /** HMAC algorithm & flags */
			
 
				-	__u32	   lc_keyid;       /** key# used for the capability */
			
 
				-	__u32	   lc_timeout;     /** capa timeout value (sec) */
			
 
				-/* FIXME: y2038 time_t overflow: */
			
 
				-	__u32	   lc_expiry;      /** expiry time (sec) */
			
 
				-	__u8	    lc_hmac[CAPA_HMAC_MAX_LEN];   /** HMAC */
			
 
				-} __packed;
			
 
				-
			
 
				-/** lustre_capa::lc_opc */
			
 
				-enum {
			
 
				-	CAPA_OPC_BODY_WRITE   = 1 << 0,  /**< write object data */
			
 
				-	CAPA_OPC_BODY_READ    = 1 << 1,  /**< read object data */
			
 
				-	CAPA_OPC_INDEX_LOOKUP = 1 << 2,  /**< lookup object fid */
			
 
				-	CAPA_OPC_INDEX_INSERT = 1 << 3,  /**< insert object fid */
			
 
				-	CAPA_OPC_INDEX_DELETE = 1 << 4,  /**< delete object fid */
			
 
				-	CAPA_OPC_OSS_WRITE    = 1 << 5,  /**< write oss object data */
			
 
				-	CAPA_OPC_OSS_READ     = 1 << 6,  /**< read oss object data */
			
 
				-	CAPA_OPC_OSS_TRUNC    = 1 << 7,  /**< truncate oss object */
			
 
				-	CAPA_OPC_OSS_DESTROY  = 1 << 8,  /**< destroy oss object */
			
 
				-	CAPA_OPC_META_WRITE   = 1 << 9,  /**< write object meta data */
			
 
				-	CAPA_OPC_META_READ    = 1 << 10, /**< read object meta data */
			
 
				-};
			
 
				-
			
 
				-#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
			
 
				-#define CAPA_OPC_MDS_ONLY						   \
			
 
				-	(CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
			
 
				-	 CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
			
 
				-#define CAPA_OPC_OSS_ONLY						   \
			
 
				-	(CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC |      \
			
 
				-	 CAPA_OPC_OSS_DESTROY)
			
 
				-#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
			
 
				-#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
			
 
				-
			
 
				-struct lustre_capa_key {
			
 
				-	__u64   lk_seq;       /**< mds# */
			
 
				-	__u32   lk_keyid;     /**< key# */
			
 
				-	__u32   lk_padding;
			
 
				-	__u8    lk_key[CAPA_HMAC_KEY_MAX_LEN];    /**< key */
			
 
				-} __packed;
			
 
				-
			
 
				-/** The link ea holds 1 \a link_ea_entry for each hardlink */
			
 
				-#define LINK_EA_MAGIC 0x11EAF1DFUL
			
 
				-struct link_ea_header {
			
 
				-	__u32 leh_magic;
			
 
				-	__u32 leh_reccount;
			
 
				-	__u64 leh_len;      /* total size */
			
 
				-	__u32 leh_overflow_time;
			
 
				-	__u32 leh_padding;
			
 
				-};
			
 
				-
			
 
				-/** Hardlink data is name and parent fid.
			
 
				- * Stored in this crazy struct for maximum packing and endian-neutrality
			
 
				- */
			
 
				-struct link_ea_entry {
			
 
				-	/** __u16 stored big-endian, unaligned */
			
 
				-	unsigned char      lee_reclen[2];
			
 
				-	unsigned char      lee_parent_fid[sizeof(struct lu_fid)];
			
 
				-	char	       lee_name[0];
			
 
				-} __packed;
			
 
				-
			
 
				-/** fid2path request/reply structure */
			
 
				-struct getinfo_fid2path {
			
 
				-	struct lu_fid   gf_fid;
			
 
				-	__u64	   gf_recno;
			
 
				-	__u32	   gf_linkno;
			
 
				-	__u32	   gf_pathlen;
			
 
				-	char	    gf_path[0];
			
 
				-} __packed;
			
 
				-
			
 
				-/** path2parent request/reply structures */
			
 
				-struct getparent {
			
 
				-	struct lu_fid	gp_fid;		/**< parent FID */
			
 
				-	__u32		gp_linkno;	/**< hardlink number */
			
 
				-	__u32		gp_name_size;	/**< size of the name field */
			
 
				-	char		gp_name[0];	/**< zero-terminated link name */
			
 
				-} __packed;
			
 
				-
			
 
				-enum {
			
 
				-	LAYOUT_INTENT_ACCESS    = 0,
			
 
				-	LAYOUT_INTENT_READ      = 1,
			
 
				-	LAYOUT_INTENT_WRITE     = 2,
			
 
				-	LAYOUT_INTENT_GLIMPSE   = 3,
			
 
				-	LAYOUT_INTENT_TRUNC     = 4,
			
 
				-	LAYOUT_INTENT_RELEASE   = 5,
			
 
				-	LAYOUT_INTENT_RESTORE   = 6
			
 
				-};
			
 
				-
			
 
				-/* enqueue layout lock with intent */
			
 
				-struct layout_intent {
			
 
				-	__u32 li_opc; /* intent operation for enqueue, read, write etc */
			
 
				-	__u32 li_flags;
			
 
				-	__u64 li_start;
			
 
				-	__u64 li_end;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * On the wire version of hsm_progress structure.
			
 
				- *
			
 
				- * Contains the userspace hsm_progress and some internal fields.
			
 
				- */
			
 
				-struct hsm_progress_kernel {
			
 
				-	/* Field taken from struct hsm_progress */
			
 
				-	struct lu_fid		hpk_fid;
			
 
				-	__u64			hpk_cookie;
			
 
				-	struct hsm_extent	hpk_extent;
			
 
				-	__u16			hpk_flags;
			
 
				-	__u16			hpk_errval; /* positive val */
			
 
				-	__u32			hpk_padding1;
			
 
				-	/* Additional fields */
			
 
				-	__u64			hpk_data_version;
			
 
				-	__u64			hpk_padding2;
			
 
				-} __packed;
			
 
				-
			
 
				-/** layout swap request structure
			
 
				- * fid1 and fid2 are in mdt_body
			
 
				- */
			
 
				-struct mdc_swap_layouts {
			
 
				-	__u64	   msl_flags;
			
 
				-} __packed;
			
 
				-
			
 
				-struct close_data {
			
 
				-	struct lustre_handle	cd_handle;
			
 
				-	struct lu_fid		cd_fid;
			
 
				-	__u64			cd_data_version;
			
 
				-	__u64			cd_reserved[8];
			
 
				-};
			
 
				-
			
 
				-#endif
			
 
				-/** @} lustreidl */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
@@ -1,229 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-#ifndef _UAPI_LUSTRE_IOCTL_H_
			
 
				-#define _UAPI_LUSTRE_IOCTL_H_
			
 
				-
			
 
				-#include <linux/ioctl.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <uapi/linux/lustre/lustre_idl.h>
			
 
				-
			
 
				-#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS)
			
 
				-# error This file is for Lustre internal use only.
			
 
				-#endif
			
 
				-
			
 
				-enum md_echo_cmd {
			
 
				-	ECHO_MD_CREATE		= 1, /* Open/Create file on MDT */
			
 
				-	ECHO_MD_MKDIR		= 2, /* Mkdir on MDT */
			
 
				-	ECHO_MD_DESTROY		= 3, /* Unlink file on MDT */
			
 
				-	ECHO_MD_RMDIR		= 4, /* Rmdir on MDT */
			
 
				-	ECHO_MD_LOOKUP		= 5, /* Lookup on MDT */
			
 
				-	ECHO_MD_GETATTR		= 6, /* Getattr on MDT */
			
 
				-	ECHO_MD_SETATTR		= 7, /* Setattr on MDT */
			
 
				-	ECHO_MD_ALLOC_FID	= 8, /* Get FIDs from MDT */
			
 
				-};
			
 
				-
			
 
				-#define OBD_DEV_ID 1
			
 
				-#define OBD_DEV_NAME "obd"
			
 
				-#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
			
 
				-
			
 
				-#define OBD_IOCTL_VERSION	0x00010004
			
 
				-#define OBD_DEV_BY_DEVNAME	0xffffd0de
			
 
				-
			
 
				-struct obd_ioctl_data {
			
 
				-	__u32		ioc_len;
			
 
				-	__u32		ioc_version;
			
 
				-
			
 
				-	union {
			
 
				-		__u64	ioc_cookie;
			
 
				-		__u64	ioc_u64_1;
			
 
				-	};
			
 
				-	union {
			
 
				-		__u32	ioc_conn1;
			
 
				-		__u32	ioc_u32_1;
			
 
				-	};
			
 
				-	union {
			
 
				-		__u32	ioc_conn2;
			
 
				-		__u32	ioc_u32_2;
			
 
				-	};
			
 
				-
			
 
				-	struct obdo	ioc_obdo1;
			
 
				-	struct obdo	ioc_obdo2;
			
 
				-
			
 
				-	__u64		ioc_count;
			
 
				-	__u64		ioc_offset;
			
 
				-	__u32		ioc_dev;
			
 
				-	__u32		ioc_command;
			
 
				-
			
 
				-	__u64		ioc_nid;
			
 
				-	__u32		ioc_nal;
			
 
				-	__u32		ioc_type;
			
 
				-
			
 
				-	/* buffers the kernel will treat as user pointers */
			
 
				-	__u32		ioc_plen1;
			
 
				-	char __user    *ioc_pbuf1;
			
 
				-	__u32		ioc_plen2;
			
 
				-	char __user    *ioc_pbuf2;
			
 
				-
			
 
				-	/* inline buffers for various arguments */
			
 
				-	__u32		ioc_inllen1;
			
 
				-	char	       *ioc_inlbuf1;
			
 
				-	__u32		ioc_inllen2;
			
 
				-	char	       *ioc_inlbuf2;
			
 
				-	__u32		ioc_inllen3;
			
 
				-	char	       *ioc_inlbuf3;
			
 
				-	__u32		ioc_inllen4;
			
 
				-	char	       *ioc_inlbuf4;
			
 
				-
			
 
				-	char		ioc_bulk[0];
			
 
				-};
			
 
				-
			
 
				-struct obd_ioctl_hdr {
			
 
				-	__u32		ioc_len;
			
 
				-	__u32		ioc_version;
			
 
				-};
			
 
				-
			
 
				-static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data)
			
 
				-{
			
 
				-	__u32 len = __ALIGN_KERNEL(sizeof(*data), 8);
			
 
				-
			
 
				-	len += __ALIGN_KERNEL(data->ioc_inllen1, 8);
			
 
				-	len += __ALIGN_KERNEL(data->ioc_inllen2, 8);
			
 
				-	len += __ALIGN_KERNEL(data->ioc_inllen3, 8);
			
 
				-	len += __ALIGN_KERNEL(data->ioc_inllen4, 8);
			
 
				-
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * OBD_IOC_DATA_TYPE is only for compatibility reasons with older
			
 
				- * Linux Lustre user tools. New ioctls should NOT use this macro as
			
 
				- * the ioctl "size". Instead the ioctl should get a "size" argument
			
 
				- * which is the actual data type used by the ioctl, to ensure the
			
 
				- * ioctl interface is versioned correctly.
			
 
				- */
			
 
				-#define OBD_IOC_DATA_TYPE	long
			
 
				-
			
 
				-/*	IOC_LDLM_TEST		_IOWR('f', 40, long) */
			
 
				-/*	IOC_LDLM_DUMP		_IOWR('f', 41, long) */
			
 
				-/*	IOC_LDLM_REGRESS_START	_IOWR('f', 42, long) */
			
 
				-/*	IOC_LDLM_REGRESS_STOP	_IOWR('f', 43, long) */
			
 
				-
			
 
				-#define OBD_IOC_CREATE		_IOWR('f', 101, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_DESTROY		_IOW('f', 104, OBD_IOC_DATA_TYPE)
			
 
				-/*	OBD_IOC_PREALLOCATE	_IOWR('f', 105, OBD_IOC_DATA_TYPE) */
			
 
				-
			
 
				-#define OBD_IOC_SETATTR		_IOW('f', 107, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_GETATTR		_IOWR('f', 108, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_READ		_IOWR('f', 109, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_WRITE		_IOWR('f', 110, OBD_IOC_DATA_TYPE)
			
 
				-
			
 
				-#define OBD_IOC_STATFS		_IOWR('f', 113, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_SYNC		_IOW('f', 114, OBD_IOC_DATA_TYPE)
			
 
				-/*	OBD_IOC_READ2		_IOWR('f', 115, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_FORMAT		_IOWR('f', 116, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_PARTITION	_IOWR('f', 117, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_COPY		_IOWR('f', 120, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_MIGR		_IOWR('f', 121, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_PUNCH		_IOWR('f', 122, OBD_IOC_DATA_TYPE) */
			
 
				-
			
 
				-/*	OBD_IOC_MODULE_DEBUG	_IOWR('f', 124, OBD_IOC_DATA_TYPE) */
			
 
				-#define OBD_IOC_BRW_READ	_IOWR('f', 125, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_BRW_WRITE	_IOWR('f', 126, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_NAME2DEV	_IOWR('f', 127, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_UUID2DEV	_IOWR('f', 130, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_GETNAME		_IOWR('f', 131, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_GETMDNAME	_IOR('f', 131, char[MAX_OBD_NAME])
			
 
				-#define OBD_IOC_GETDTNAME	OBD_IOC_GETNAME
			
 
				-#define OBD_IOC_LOV_GET_CONFIG	_IOWR('f', 132, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_CLIENT_RECOVER	_IOW('f', 133, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_PING_TARGET	_IOW('f', 136, OBD_IOC_DATA_TYPE)
			
 
				-
			
 
				-/*	OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */
			
 
				-#define OBD_IOC_NO_TRANSNO	_IOW('f', 140, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_SET_READONLY	_IOW('f', 141, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_ABORT_RECOVERY	_IOR('f', 142, OBD_IOC_DATA_TYPE)
			
 
				-/*	OBD_IOC_ROOT_SQUASH	_IOWR('f', 143, OBD_IOC_DATA_TYPE) */
			
 
				-#define OBD_GET_VERSION		_IOWR('f', 144, OBD_IOC_DATA_TYPE)
			
 
				-/*	OBD_IOC_GSS_SUPPORT	_IOWR('f', 145, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_CLOSE_UUID	_IOWR('f', 147, OBD_IOC_DATA_TYPE) */
			
 
				-#define OBD_IOC_CHANGELOG_SEND	_IOW('f', 148, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_GETDEVICE	_IOWR('f', 149, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_FID2PATH	_IOWR('f', 150, OBD_IOC_DATA_TYPE)
			
 
				-/*	lustre/lustre_user.h	151-153 */
			
 
				-/*	OBD_IOC_LOV_SETSTRIPE	154 LL_IOC_LOV_SETSTRIPE */
			
 
				-/*	OBD_IOC_LOV_GETSTRIPE	155 LL_IOC_LOV_GETSTRIPE */
			
 
				-/*	OBD_IOC_LOV_SETEA	156 LL_IOC_LOV_SETEA */
			
 
				-/*	lustre/lustre_user.h	157-159 */
			
 
				-/*	OBD_IOC_QUOTACHECK	_IOW('f', 160, int) */
			
 
				-/*	OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *) */
			
 
				-#define OBD_IOC_QUOTACTL	_IOWR('f', 162, struct if_quotactl)
			
 
				-/*	lustre/lustre_user.h	163-176 */
			
 
				-#define OBD_IOC_CHANGELOG_REG	_IOW('f', 177, struct obd_ioctl_data)
			
 
				-#define OBD_IOC_CHANGELOG_DEREG	_IOW('f', 178, struct obd_ioctl_data)
			
 
				-#define OBD_IOC_CHANGELOG_CLEAR	_IOW('f', 179, struct obd_ioctl_data)
			
 
				-/*	OBD_IOC_RECORD		_IOWR('f', 180, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_ENDRECORD	_IOWR('f', 181, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_PARSE		_IOWR('f', 182, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_DORECORD	_IOWR('f', 183, OBD_IOC_DATA_TYPE) */
			
 
				-#define OBD_IOC_PROCESS_CFG	_IOWR('f', 184, OBD_IOC_DATA_TYPE)
			
 
				-/*	OBD_IOC_DUMP_LOG	_IOWR('f', 185, OBD_IOC_DATA_TYPE) */
			
 
				-/*	OBD_IOC_CLEAR_LOG	_IOWR('f', 186, OBD_IOC_DATA_TYPE) */
			
 
				-#define OBD_IOC_PARAM		_IOW('f', 187, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_POOL		_IOWR('f', 188, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_REPLACE_NIDS	_IOWR('f', 189, OBD_IOC_DATA_TYPE)
			
 
				-
			
 
				-#define OBD_IOC_CATLOGLIST	_IOWR('f', 190, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_LLOG_INFO	_IOWR('f', 191, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_LLOG_PRINT	_IOWR('f', 192, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_LLOG_CANCEL	_IOWR('f', 193, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_LLOG_REMOVE	_IOWR('f', 194, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_LLOG_CHECK	_IOWR('f', 195, OBD_IOC_DATA_TYPE)
			
 
				-/*	OBD_IOC_LLOG_CATINFO	_IOWR('f', 196, OBD_IOC_DATA_TYPE) */
			
 
				-#define OBD_IOC_NODEMAP		_IOWR('f', 197, OBD_IOC_DATA_TYPE)
			
 
				-
			
 
				-/*	ECHO_IOC_GET_STRIPE	_IOWR('f', 200, OBD_IOC_DATA_TYPE) */
			
 
				-/*	ECHO_IOC_SET_STRIPE	_IOWR('f', 201, OBD_IOC_DATA_TYPE) */
			
 
				-/*	ECHO_IOC_ENQUEUE	_IOWR('f', 202, OBD_IOC_DATA_TYPE) */
			
 
				-/*	ECHO_IOC_CANCEL		_IOWR('f', 203, OBD_IOC_DATA_TYPE) */
			
 
				-
			
 
				-#define OBD_IOC_GET_OBJ_VERSION	_IOR('f', 210, OBD_IOC_DATA_TYPE)
			
 
				-
			
 
				-/*	lustre/lustre_user.h	212-217 */
			
 
				-#define OBD_IOC_GET_MNTOPT	_IOW('f', 220, mntopt_t)
			
 
				-#define OBD_IOC_ECHO_MD		_IOR('f', 221, struct obd_ioctl_data)
			
 
				-#define OBD_IOC_ECHO_ALLOC_SEQ	_IOWR('f', 222, struct obd_ioctl_data)
			
 
				-#define OBD_IOC_START_LFSCK	_IOWR('f', 230, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_STOP_LFSCK	_IOW('f', 231, OBD_IOC_DATA_TYPE)
			
 
				-#define OBD_IOC_QUERY_LFSCK	_IOR('f', 232, struct obd_ioctl_data)
			
 
				-/*	lustre/lustre_user.h	240-249 */
			
 
				-/*	LIBCFS_IOC_DEBUG_MASK	250 */
			
 
				-
			
 
				-#define IOC_OSC_SET_ACTIVE	_IOWR('h', 21, void *)
			
 
				-
			
 
				-#endif /* _UAPI_LUSTRE_IOCTL_H_ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h
@@ -1,94 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2013, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- *
			
 
				- * Author: Nathan Rutman <nathan.rutman@sun.com>
			
 
				- *
			
 
				- * Kernel <-> userspace communication routines.
			
 
				- * The definitions below are used in the kernel and userspace.
			
 
				- */
			
 
				-
			
 
				-#ifndef __UAPI_LUSTRE_KERNELCOMM_H__
			
 
				-#define __UAPI_LUSTRE_KERNELCOMM_H__
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-/* KUC message header.
			
 
				- * All current and future KUC messages should use this header.
			
 
				- * To avoid having to include Lustre headers from libcfs, define this here.
			
 
				- */
			
 
				-struct kuc_hdr {
			
 
				-	__u16 kuc_magic;
			
 
				-	/* Each new Lustre feature should use a different transport */
			
 
				-	__u8  kuc_transport;
			
 
				-	__u8  kuc_flags;
			
 
				-	/* Message type or opcode, transport-specific */
			
 
				-	__u16 kuc_msgtype;
			
 
				-	/* Including header */
			
 
				-	__u16 kuc_msglen;
			
 
				-} __aligned(sizeof(__u64));
			
 
				-
			
 
				-#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr) + CR_MAXSIZE)
			
 
				-
			
 
				-#define KUC_MAGIC		0x191C /*Lustre9etLinC */
			
 
				-
			
 
				-/* kuc_msgtype values are defined in each transport */
			
 
				-enum kuc_transport_type {
			
 
				-	KUC_TRANSPORT_GENERIC	= 1,
			
 
				-	KUC_TRANSPORT_HSM	= 2,
			
 
				-	KUC_TRANSPORT_CHANGELOG	= 3,
			
 
				-};
			
 
				-
			
 
				-enum kuc_generic_message_type {
			
 
				-	KUC_MSG_SHUTDOWN	= 1,
			
 
				-};
			
 
				-
			
 
				-/* KUC Broadcast Groups. This determines which userspace process hears which
			
 
				- * messages.  Mutliple transports may be used within a group, or multiple
			
 
				- * groups may use the same transport.  Broadcast
			
 
				- * groups need not be used if e.g. a UID is specified instead;
			
 
				- * use group 0 to signify unicast.
			
 
				- */
			
 
				-#define KUC_GRP_HSM	0x02
			
 
				-#define KUC_GRP_MAX	KUC_GRP_HSM
			
 
				-
			
 
				-#define LK_FLG_STOP 0x01
			
 
				-#define LK_NOFD -1U
			
 
				-
			
 
				-/* kernelcomm control structure, passed from userspace to kernel */
			
 
				-struct lustre_kernelcomm {
			
 
				-	__u32 lk_wfd;
			
 
				-	__u32 lk_rfd;
			
 
				-	__u32 lk_uid;
			
 
				-	__u32 lk_group;
			
 
				-	__u32 lk_data;
			
 
				-	__u32 lk_flags;
			
 
				-} __packed;
			
 
				-
			
 
				-#endif	/* __UAPI_LUSTRE_KERNELCOMM_H__ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h
@@ -1,236 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2014, Intel Corporation.
			
 
				- *
			
 
				- * Copyright 2015 Cray Inc, all rights reserved.
			
 
				- * Author: Ben Evans.
			
 
				- *
			
 
				- * Define ost_id  associated functions
			
 
				- */
			
 
				-
			
 
				-#ifndef _UAPI_LUSTRE_OSTID_H_
			
 
				-#define _UAPI_LUSTRE_OSTID_H_
			
 
				-
			
 
				-#include <linux/errno.h>
			
 
				-#include <uapi/linux/lustre/lustre_fid.h>
			
 
				-
			
 
				-static inline __u64 lmm_oi_id(const struct ost_id *oi)
			
 
				-{
			
 
				-	return oi->oi.oi_id;
			
 
				-}
			
 
				-
			
 
				-static inline __u64 lmm_oi_seq(const struct ost_id *oi)
			
 
				-{
			
 
				-	return oi->oi.oi_seq;
			
 
				-}
			
 
				-
			
 
				-static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
			
 
				-{
			
 
				-	oi->oi.oi_seq = seq;
			
 
				-}
			
 
				-
			
 
				-static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
			
 
				-{
			
 
				-	oi->oi.oi_id = oid;
			
 
				-}
			
 
				-
			
 
				-static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
			
 
				-				    const struct ost_id *src_oi)
			
 
				-{
			
 
				-	dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
			
 
				-	dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
			
 
				-}
			
 
				-
			
 
				-static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
			
 
				-				    const struct ost_id *src_oi)
			
 
				-{
			
 
				-	dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
			
 
				-	dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
			
 
				-}
			
 
				-
			
 
				-/* extract OST sequence (group) from a wire ost_id (id/seq) pair */
			
 
				-static inline __u64 ostid_seq(const struct ost_id *ostid)
			
 
				-{
			
 
				-	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
			
 
				-		return FID_SEQ_OST_MDT0;
			
 
				-
			
 
				-	if (fid_seq_is_default(ostid->oi.oi_seq))
			
 
				-		return FID_SEQ_LOV_DEFAULT;
			
 
				-
			
 
				-	if (fid_is_idif(&ostid->oi_fid))
			
 
				-		return FID_SEQ_OST_MDT0;
			
 
				-
			
 
				-	return fid_seq(&ostid->oi_fid);
			
 
				-}
			
 
				-
			
 
				-/* extract OST objid from a wire ost_id (id/seq) pair */
			
 
				-static inline __u64 ostid_id(const struct ost_id *ostid)
			
 
				-{
			
 
				-	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
			
 
				-		return ostid->oi.oi_id & IDIF_OID_MASK;
			
 
				-
			
 
				-	if (fid_seq_is_default(ostid->oi.oi_seq))
			
 
				-		return ostid->oi.oi_id;
			
 
				-
			
 
				-	if (fid_is_idif(&ostid->oi_fid))
			
 
				-		return fid_idif_id(fid_seq(&ostid->oi_fid),
			
 
				-				   fid_oid(&ostid->oi_fid), 0);
			
 
				-
			
 
				-	return fid_oid(&ostid->oi_fid);
			
 
				-}
			
 
				-
			
 
				-static inline void ostid_set_seq(struct ost_id *oi, __u64 seq)
			
 
				-{
			
 
				-	if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) {
			
 
				-		oi->oi.oi_seq = seq;
			
 
				-	} else {
			
 
				-		oi->oi_fid.f_seq = seq;
			
 
				-		/*
			
 
				-		 * Note: if f_oid + f_ver is zero, we need init it
			
 
				-		 * to be 1, otherwise, ostid_seq will treat this
			
 
				-		 * as old ostid (oi_seq == 0)
			
 
				-		 */
			
 
				-		if (!oi->oi_fid.f_oid && !oi->oi_fid.f_ver)
			
 
				-			oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline void ostid_set_seq_mdt0(struct ost_id *oi)
			
 
				-{
			
 
				-	ostid_set_seq(oi, FID_SEQ_OST_MDT0);
			
 
				-}
			
 
				-
			
 
				-static inline void ostid_set_seq_echo(struct ost_id *oi)
			
 
				-{
			
 
				-	ostid_set_seq(oi, FID_SEQ_ECHO);
			
 
				-}
			
 
				-
			
 
				-static inline void ostid_set_seq_llog(struct ost_id *oi)
			
 
				-{
			
 
				-	ostid_set_seq(oi, FID_SEQ_LLOG);
			
 
				-}
			
 
				-
			
 
				-static inline void ostid_cpu_to_le(const struct ost_id *src_oi,
			
 
				-				   struct ost_id *dst_oi)
			
 
				-{
			
 
				-	if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
			
 
				-		dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
			
 
				-		dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
			
 
				-	} else {
			
 
				-		fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline void ostid_le_to_cpu(const struct ost_id *src_oi,
			
 
				-				   struct ost_id *dst_oi)
			
 
				-{
			
 
				-	if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
			
 
				-		dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
			
 
				-		dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
			
 
				-	} else {
			
 
				-		fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Sigh, because pre-2.4 uses
			
 
				- * struct lov_mds_md_v1 {
			
 
				- *	........
			
 
				- *	__u64 lmm_object_id;
			
 
				- *	__u64 lmm_object_seq;
			
 
				- *      ......
			
 
				- *      }
			
 
				- * to identify the LOV(MDT) object, and lmm_object_seq will
			
 
				- * be normal_fid, which make it hard to combine these conversion
			
 
				- * to ostid_to FID. so we will do lmm_oi/fid conversion separately
			
 
				- *
			
 
				- * We can tell the lmm_oi by this way,
			
 
				- * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0
			
 
				- * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL
			
 
				- * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k},
			
 
				- *      lmm_oi.f_ver = 0
			
 
				- *
			
 
				- * But currently lmm_oi/lsm_oi does not have any "real" usages,
			
 
				- * except for printing some information, and the user can always
			
 
				- * get the real FID from LMA, besides this multiple case check might
			
 
				- * make swab more complicate. So we will keep using id/seq for lmm_oi.
			
 
				- */
			
 
				-
			
 
				-static inline void fid_to_lmm_oi(const struct lu_fid *fid,
			
 
				-				 struct ost_id *oi)
			
 
				-{
			
 
				-	oi->oi.oi_id = fid_oid(fid);
			
 
				-	oi->oi.oi_seq = fid_seq(fid);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Unpack an OST object id/seq (group) into a FID.  This is needed for
			
 
				- * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper
			
 
				- * FIDs.  Note that if an id/seq is already in FID/IDIF format it will
			
 
				- * be passed through unchanged.  Only legacy OST objects in "group 0"
			
 
				- * will be mapped into the IDIF namespace so that they can fit into the
			
 
				- * struct lu_fid fields without loss.
			
 
				- */
			
 
				-static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid,
			
 
				-			       __u32 ost_idx)
			
 
				-{
			
 
				-	__u64 seq = ostid_seq(ostid);
			
 
				-
			
 
				-	if (ost_idx > 0xffff)
			
 
				-		return -EBADF;
			
 
				-
			
 
				-	if (fid_seq_is_mdt0(seq)) {
			
 
				-		__u64 oid = ostid_id(ostid);
			
 
				-
			
 
				-		/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
			
 
				-		 * that we map into the IDIF namespace.  It allows up to 2^48
			
 
				-		 * objects per OST, as this is the object namespace that has
			
 
				-		 * been in production for years.  This can handle create rates
			
 
				-		 * of 1M objects/s/OST for 9 years, or combinations thereof.
			
 
				-		 */
			
 
				-		if (oid >= IDIF_MAX_OID)
			
 
				-			return -EBADF;
			
 
				-
			
 
				-		fid->f_seq = fid_idif_seq(oid, ost_idx);
			
 
				-		/* truncate to 32 bits by assignment */
			
 
				-		fid->f_oid = oid;
			
 
				-		/* in theory, not currently used */
			
 
				-		fid->f_ver = oid >> 48;
			
 
				-	} else if (!fid_seq_is_default(seq)) {
			
 
				-		/* This is either an IDIF object, which identifies objects
			
 
				-		 * across all OSTs, or a regular FID.  The IDIF namespace
			
 
				-		 * maps legacy OST objects into the FID namespace.  In both
			
 
				-		 * cases, we just pass the FID through, no conversion needed.
			
 
				-		 */
			
 
				-		if (ostid->oi_fid.f_ver)
			
 
				-			return -EBADF;
			
 
				-
			
 
				-		*fid = ostid->oi_fid;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif /* _UAPI_LUSTRE_OSTID_H_ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h
@@ -1,94 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * User-settable parameter keys
			
 
				- *
			
 
				- * Author: Nathan Rutman <nathan@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef _UAPI_LUSTRE_PARAM_H_
			
 
				-#define _UAPI_LUSTRE_PARAM_H_
			
 
				-
			
 
				-/** \defgroup param param
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-/****************** User-settable parameter keys *********************/
			
 
				-/* e.g.
			
 
				- *	tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda
			
 
				- *	lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0
			
 
				- *		    ... testfs-MDT0000.lov.stripesize=4M
			
 
				- *		    ... testfs-OST0000.ost.client_cache_seconds=15
			
 
				- *		    ... testfs.sys.timeout=<secs>
			
 
				- *		    ... testfs.llite.max_read_ahead_mb=16
			
 
				- */
			
 
				-
			
 
				-/* System global or special params not handled in obd's proc
			
 
				- * See mgs_write_log_sys()
			
 
				- */
			
 
				-#define PARAM_TIMEOUT		"timeout="	   /* global */
			
 
				-#define PARAM_LDLM_TIMEOUT	"ldlm_timeout="	   /* global */
			
 
				-#define PARAM_AT_MIN		"at_min="	   /* global */
			
 
				-#define PARAM_AT_MAX		"at_max="	   /* global */
			
 
				-#define PARAM_AT_EXTRA		"at_extra="	   /* global */
			
 
				-#define PARAM_AT_EARLY_MARGIN	"at_early_margin=" /* global */
			
 
				-#define PARAM_AT_HISTORY	"at_history="	   /* global */
			
 
				-#define PARAM_JOBID_VAR		"jobid_var="	   /* global */
			
 
				-#define PARAM_MGSNODE		"mgsnode="	   /* only at mounttime */
			
 
				-#define PARAM_FAILNODE		"failover.node="   /* add failover nid */
			
 
				-#define PARAM_FAILMODE		"failover.mode="   /* initial mount only */
			
 
				-#define PARAM_ACTIVE		"active="	   /* activate/deactivate */
			
 
				-#define PARAM_NETWORK		"network="	   /* bind on nid */
			
 
				-#define PARAM_ID_UPCALL		"identity_upcall=" /* identity upcall */
			
 
				-
			
 
				-/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
			
 
				-#define PARAM_OST		"ost."
			
 
				-#define PARAM_OSD		"osd."
			
 
				-#define PARAM_OSC		"osc."
			
 
				-#define PARAM_MDT		"mdt."
			
 
				-#define PARAM_HSM		"mdt.hsm."
			
 
				-#define PARAM_MDD		"mdd."
			
 
				-#define PARAM_MDC		"mdc."
			
 
				-#define PARAM_LLITE		"llite."
			
 
				-#define PARAM_LOV		"lov."
			
 
				-#define PARAM_LOD		"lod."
			
 
				-#define PARAM_OSP		"osp."
			
 
				-#define PARAM_SYS		"sys."		/* global */
			
 
				-#define PARAM_SRPC		"srpc."
			
 
				-#define PARAM_SRPC_FLVR		"srpc.flavor."
			
 
				-#define PARAM_SRPC_UDESC	"srpc.udesc.cli2mdt"
			
 
				-#define PARAM_SEC		"security."
			
 
				-#define PARAM_QUOTA		"quota."	/* global */
			
 
				-
			
 
				-/** @} param */
			
 
				-
			
 
				-#endif /* _UAPI_LUSTRE_PARAM_H_ */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h
@@ -1,1327 +0,0 @@
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2010, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lustre/include/lustre/lustre_user.h
			
 
				- *
			
 
				- * Lustre public user-space interface definitions.
			
 
				- */
			
 
				-
			
 
				-#ifndef _LUSTRE_USER_H
			
 
				-#define _LUSTRE_USER_H
			
 
				-
			
 
				-/** \defgroup lustreuser lustreuser
			
 
				- *
			
 
				- * @{
			
 
				- */
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				-# include <linux/fs.h>
			
 
				-# include <linux/quota.h>
			
 
				-# include <linux/sched/signal.h>
			
 
				-# include <linux/string.h> /* snprintf() */
			
 
				-# include <linux/version.h>
			
 
				-#else /* !__KERNEL__ */
			
 
				-# define NEED_QUOTA_DEFS
			
 
				-# include <stdio.h> /* snprintf() */
			
 
				-# include <string.h>
			
 
				-# include <sys/quota.h>
			
 
				-# include <sys/stat.h>
			
 
				-#endif /* __KERNEL__ */
			
 
				-#include <uapi/linux/lustre/lustre_fiemap.h>
			
 
				-
			
 
				-/*
			
 
				- * We need to always use 64bit version because the structure
			
 
				- * is shared across entire cluster where 32bit and 64bit machines
			
 
				- * are co-existing.
			
 
				- */
			
 
				-#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
			
 
				-typedef struct stat64   lstat_t;
			
 
				-#define lstat_f  lstat64
			
 
				-#define fstat_f		fstat64
			
 
				-#define fstatat_f	fstatat64
			
 
				-#else
			
 
				-typedef struct stat     lstat_t;
			
 
				-#define lstat_f  lstat
			
 
				-#define fstat_f		fstat
			
 
				-#define fstatat_f	fstatat
			
 
				-#endif
			
 
				-
			
 
				-#define HAVE_LOV_USER_MDS_DATA
			
 
				-
			
 
				-#define LUSTRE_EOF 0xffffffffffffffffULL
			
 
				-
			
 
				-/* for statfs() */
			
 
				-#define LL_SUPER_MAGIC 0x0BD00BD0
			
 
				-
			
 
				-#ifndef FSFILT_IOC_GETFLAGS
			
 
				-#define FSFILT_IOC_GETFLAGS	       _IOR('f', 1, long)
			
 
				-#define FSFILT_IOC_SETFLAGS	       _IOW('f', 2, long)
			
 
				-#define FSFILT_IOC_GETVERSION	     _IOR('f', 3, long)
			
 
				-#define FSFILT_IOC_SETVERSION	     _IOW('f', 4, long)
			
 
				-#define FSFILT_IOC_GETVERSION_OLD	 _IOR('v', 1, long)
			
 
				-#define FSFILT_IOC_SETVERSION_OLD	 _IOW('v', 2, long)
			
 
				-#endif
			
 
				-
			
 
				-/* FIEMAP flags supported by Lustre */
			
 
				-#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
			
 
				-
			
 
				-enum obd_statfs_state {
			
 
				-	OS_STATE_DEGRADED       = 0x00000001, /**< RAID degraded/rebuilding */
			
 
				-	OS_STATE_READONLY       = 0x00000002, /**< filesystem is read-only */
			
 
				-	OS_STATE_RDONLY_1       = 0x00000004, /**< obsolete 1.6, was EROFS=30 */
			
 
				-	OS_STATE_RDONLY_2       = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
			
 
				-	OS_STATE_RDONLY_3       = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
			
 
				-};
			
 
				-
			
 
				-struct obd_statfs {
			
 
				-	__u64	   os_type;
			
 
				-	__u64	   os_blocks;
			
 
				-	__u64	   os_bfree;
			
 
				-	__u64	   os_bavail;
			
 
				-	__u64	   os_files;
			
 
				-	__u64	   os_ffree;
			
 
				-	__u8	    os_fsid[40];
			
 
				-	__u32	   os_bsize;
			
 
				-	__u32	   os_namelen;
			
 
				-	__u64	   os_maxbytes;
			
 
				-	__u32	   os_state;       /**< obd_statfs_state OS_STATE_* flag */
			
 
				-	__u32	   os_fprecreated; /* objs available now to the caller */
			
 
				-				   /* used in QoS code to find preferred OSTs */
			
 
				-	__u32	   os_spare2;
			
 
				-	__u32	   os_spare3;
			
 
				-	__u32	   os_spare4;
			
 
				-	__u32	   os_spare5;
			
 
				-	__u32	   os_spare6;
			
 
				-	__u32	   os_spare7;
			
 
				-	__u32	   os_spare8;
			
 
				-	__u32	   os_spare9;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * File IDentifier.
			
 
				- *
			
 
				- * FID is a cluster-wide unique identifier of a file or an object (stripe).
			
 
				- * FIDs are never reused.
			
 
				- **/
			
 
				-struct lu_fid {
			
 
				-       /**
			
 
				-	* FID sequence. Sequence is a unit of migration: all files (objects)
			
 
				-	* with FIDs from a given sequence are stored on the same server.
			
 
				-	* Lustre should support 2^64 objects, so even if each sequence
			
 
				-	* has only a single object we can still enumerate 2^64 objects.
			
 
				-	**/
			
 
				-	__u64 f_seq;
			
 
				-	/* FID number within sequence. */
			
 
				-	__u32 f_oid;
			
 
				-	/**
			
 
				-	 * FID version, used to distinguish different versions (in the sense
			
 
				-	 * of snapshots, etc.) of the same file system object. Not currently
			
 
				-	 * used.
			
 
				-	 **/
			
 
				-	__u32 f_ver;
			
 
				-};
			
 
				-
			
 
				-static inline bool fid_is_zero(const struct lu_fid *fid)
			
 
				-{
			
 
				-	return !fid->f_seq && !fid->f_oid;
			
 
				-}
			
 
				-
			
 
				-struct filter_fid {
			
 
				-	struct lu_fid	ff_parent;  /* ff_parent.f_ver == file stripe number */
			
 
				-};
			
 
				-
			
 
				-/* keep this one for compatibility */
			
 
				-struct filter_fid_old {
			
 
				-	struct lu_fid	ff_parent;
			
 
				-	__u64		ff_objid;
			
 
				-	__u64		ff_seq;
			
 
				-};
			
 
				-
			
 
				-/* Userspace should treat lu_fid as opaque, and only use the following methods
			
 
				- * to print or parse them.  Other functions (e.g. compare, swab) could be moved
			
 
				- * here from lustre_idl.h if needed.
			
 
				- */
			
 
				-struct lu_fid;
			
 
				-
			
 
				-/**
			
 
				- * Following struct for object attributes, that will be kept inode's EA.
			
 
				- * Introduced in 2.0 release (please see b15993, for details)
			
 
				- * Added to all objects since Lustre 2.4 as contains self FID
			
 
				- */
			
 
				-struct lustre_mdt_attrs {
			
 
				-	/**
			
 
				-	 * Bitfield for supported data in this structure. From enum lma_compat.
			
 
				-	 * lma_self_fid and lma_flags are always available.
			
 
				-	 */
			
 
				-	__u32   lma_compat;
			
 
				-	/**
			
 
				-	 * Per-file incompat feature list. Lustre version should support all
			
 
				-	 * flags set in this field. The supported feature mask is available in
			
 
				-	 * LMA_INCOMPAT_SUPP.
			
 
				-	 */
			
 
				-	__u32   lma_incompat;
			
 
				-	/** FID of this inode */
			
 
				-	struct lu_fid  lma_self_fid;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Prior to 2.4, the LMA structure also included SOM attributes which has since
			
 
				- * been moved to a dedicated xattr
			
 
				- * lma_flags was also removed because of lma_compat/incompat fields.
			
 
				- */
			
 
				-#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
			
 
				-
			
 
				-/**
			
 
				- * OST object IDentifier.
			
 
				- */
			
 
				-struct ost_id {
			
 
				-	union {
			
 
				-		struct {
			
 
				-			__u64	oi_id;
			
 
				-			__u64	oi_seq;
			
 
				-		} oi;
			
 
				-		struct lu_fid oi_fid;
			
 
				-	};
			
 
				-};
			
 
				-
			
 
				-#define DOSTID "%#llx:%llu"
			
 
				-#define POSTID(oi) ostid_seq(oi), ostid_id(oi)
			
 
				-
			
 
				-/*
			
 
				- * The ioctl naming rules:
			
 
				- * LL_*     - works on the currently opened filehandle instead of parent dir
			
 
				- * *_OBD_*  - gets data for both OSC or MDC (LOV, LMV indirectly)
			
 
				- * *_MDC_*  - gets/sets data related to MDC
			
 
				- * *_LOV_*  - gets/sets data related to OSC/LOV
			
 
				- * *FILE*   - called on parent dir and passes in a filename
			
 
				- * *STRIPE* - set/get lov_user_md
			
 
				- * *INFO    - set/get lov_user_mds_data
			
 
				- */
			
 
				-/*	lustre_ioctl.h			101-150 */
			
 
				-#define LL_IOC_GETFLAGS		 _IOR('f', 151, long)
			
 
				-#define LL_IOC_SETFLAGS		 _IOW('f', 152, long)
			
 
				-#define LL_IOC_CLRFLAGS		 _IOW('f', 153, long)
			
 
				-#define LL_IOC_LOV_SETSTRIPE	    _IOW('f', 154, long)
			
 
				-#define LL_IOC_LOV_GETSTRIPE	    _IOW('f', 155, long)
			
 
				-#define LL_IOC_LOV_SETEA		_IOW('f', 156, long)
			
 
				-/*	LL_IOC_RECREATE_OBJ		157 obsolete */
			
 
				-/*	LL_IOC_RECREATE_FID		158 obsolete */
			
 
				-#define LL_IOC_GROUP_LOCK	       _IOW('f', 158, long)
			
 
				-#define LL_IOC_GROUP_UNLOCK	     _IOW('f', 159, long)
			
 
				-/* #define LL_IOC_QUOTACHECK		160 OBD_IOC_QUOTACHECK */
			
 
				-/* #define LL_IOC_POLL_QUOTACHECK	161 OBD_IOC_POLL_QUOTACHECK */
			
 
				-/* #define LL_IOC_QUOTACTL		162 OBD_IOC_QUOTACTL */
			
 
				-#define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
			
 
				-/*	IOC_LOV_GETINFO			165 obsolete */
			
 
				-#define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
			
 
				-/* LL_IOC_RMTACL			167 obsolete */
			
 
				-#define LL_IOC_GETOBDCOUNT	      _IOR('f', 168, long)
			
 
				-#define LL_IOC_LLOOP_ATTACH	     _IOWR('f', 169, long)
			
 
				-#define LL_IOC_LLOOP_DETACH	     _IOWR('f', 170, long)
			
 
				-#define LL_IOC_LLOOP_INFO	       _IOWR('f', 171, struct lu_fid)
			
 
				-#define LL_IOC_LLOOP_DETACH_BYDEV       _IOWR('f', 172, long)
			
 
				-#define LL_IOC_PATH2FID		 _IOR('f', 173, long)
			
 
				-#define LL_IOC_GET_CONNECT_FLAGS	_IOWR('f', 174, __u64 *)
			
 
				-#define LL_IOC_GET_MDTIDX	       _IOR('f', 175, int)
			
 
				-
			
 
				-/*	lustre_ioctl.h			177-210 */
			
 
				-#define LL_IOC_HSM_STATE_GET		_IOR('f', 211, struct hsm_user_state)
			
 
				-#define LL_IOC_HSM_STATE_SET		_IOW('f', 212, struct hsm_state_set)
			
 
				-#define LL_IOC_HSM_CT_START		_IOW('f', 213, struct lustre_kernelcomm)
			
 
				-#define LL_IOC_HSM_COPY_START		_IOW('f', 214, struct hsm_copy *)
			
 
				-#define LL_IOC_HSM_COPY_END		_IOW('f', 215, struct hsm_copy *)
			
 
				-#define LL_IOC_HSM_PROGRESS		_IOW('f', 216, struct hsm_user_request)
			
 
				-#define LL_IOC_HSM_REQUEST		_IOW('f', 217, struct hsm_user_request)
			
 
				-#define LL_IOC_DATA_VERSION		_IOR('f', 218, struct ioc_data_version)
			
 
				-#define LL_IOC_LOV_SWAP_LAYOUTS		_IOW('f', 219, \
			
 
				-						struct lustre_swap_layouts)
			
 
				-#define LL_IOC_HSM_ACTION		_IOR('f', 220, \
			
 
				-						struct hsm_current_action)
			
 
				-/* see <lustre_lib.h> for ioctl numbers 221-232 */
			
 
				-
			
 
				-#define LL_IOC_LMV_SETSTRIPE	    _IOWR('f', 240, struct lmv_user_md)
			
 
				-#define LL_IOC_LMV_GETSTRIPE	    _IOWR('f', 241, struct lmv_user_md)
			
 
				-#define LL_IOC_SET_LEASE		_IOWR('f', 243, long)
			
 
				-#define LL_IOC_GET_LEASE		_IO('f', 244)
			
 
				-#define LL_IOC_HSM_IMPORT		_IOWR('f', 245, struct hsm_user_import)
			
 
				-#define LL_IOC_LMV_SET_DEFAULT_STRIPE	_IOWR('f', 246, struct lmv_user_md)
			
 
				-#define LL_IOC_MIGRATE			_IOR('f', 247, int)
			
 
				-#define LL_IOC_FID2MDTIDX		_IOWR('f', 248, struct lu_fid)
			
 
				-#define LL_IOC_GETPARENT		_IOWR('f', 249, struct getparent)
			
 
				-
			
 
				-/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
			
 
				-enum ll_lease_type {
			
 
				-	LL_LEASE_RDLCK	= 0x1,
			
 
				-	LL_LEASE_WRLCK	= 0x2,
			
 
				-	LL_LEASE_UNLCK	= 0x4,
			
 
				-};
			
 
				-
			
 
				-#define LL_STATFS_LMV	   1
			
 
				-#define LL_STATFS_LOV	   2
			
 
				-#define LL_STATFS_NODELAY	4
			
 
				-
			
 
				-#define IOC_MDC_TYPE	    'i'
			
 
				-#define IOC_MDC_LOOKUP	  _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
			
 
				-#define IOC_MDC_GETFILESTRIPE   _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
			
 
				-#define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
			
 
				-#define LL_IOC_MDC_GETINFO      _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
			
 
				-
			
 
				-#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
			
 
				-
			
 
				-/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
			
 
				- * files, but are unlikely to be used in practice and are not harmful if
			
 
				- * used incorrectly.  O_NOCTTY and FASYNC are only meaningful for character
			
 
				- * devices and are safe for use on new files (See LU-812, LU-4209).
			
 
				- */
			
 
				-#define O_LOV_DELAY_CREATE	(O_NOCTTY | FASYNC)
			
 
				-
			
 
				-#define LL_FILE_IGNORE_LOCK     0x00000001
			
 
				-#define LL_FILE_GROUP_LOCKED    0x00000002
			
 
				-#define LL_FILE_READAHEA	0x00000004
			
 
				-#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
			
 
				-#define LL_FILE_LOCKLESS_IO     0x00000010 /* server-side locks with cio */
			
 
				-#define LL_FILE_RMTACL	  0x00000020
			
 
				-
			
 
				-#define LOV_USER_MAGIC_V1	0x0BD10BD0
			
 
				-#define LOV_USER_MAGIC		LOV_USER_MAGIC_V1
			
 
				-#define LOV_USER_MAGIC_JOIN_V1	0x0BD20BD0
			
 
				-#define LOV_USER_MAGIC_V3	0x0BD30BD0
			
 
				-/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
			
 
				-#define LOV_USER_MAGIC_SPECIFIC	0x0BD50BD0	/* for specific OSTs */
			
 
				-
			
 
				-#define LMV_USER_MAGIC    0x0CD30CD0    /*default lmv magic*/
			
 
				-
			
 
				-#define LOV_PATTERN_RAID0	0x001
			
 
				-#define LOV_PATTERN_RAID1	0x002
			
 
				-#define LOV_PATTERN_FIRST	0x100
			
 
				-#define LOV_PATTERN_CMOBD	0x200
			
 
				-
			
 
				-#define LOV_PATTERN_F_MASK	0xffff0000
			
 
				-#define LOV_PATTERN_F_HOLE	0x40000000 /* there is hole in LOV EA */
			
 
				-#define LOV_PATTERN_F_RELEASED	0x80000000 /* HSM released file */
			
 
				-
			
 
				-#define LOV_MAXPOOLNAME 15
			
 
				-#define LOV_POOLNAMEF "%.15s"
			
 
				-
			
 
				-#define LOV_MIN_STRIPE_BITS 16   /* maximum PAGE_SIZE (ia64), power of 2 */
			
 
				-#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
			
 
				-#define LOV_MAX_STRIPE_COUNT_OLD 160
			
 
				-/* This calculation is crafted so that input of 4096 will result in 160
			
 
				- * which in turn is equal to old maximal stripe count.
			
 
				- * XXX: In fact this is too simplified for now, what it also need is to get
			
 
				- * ea_type argument to clearly know how much space each stripe consumes.
			
 
				- *
			
 
				- * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
			
 
				- * allocation that is sufficient for the current generation of systems.
			
 
				- *
			
 
				- * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1)
			
 
				- */
			
 
				-#define LOV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
			
 
				-#define LOV_ALL_STRIPES       0xffff /* only valid for directories */
			
 
				-#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
			
 
				-
			
 
				-#define XATTR_LUSTRE_PREFIX	"lustre."
			
 
				-#define XATTR_LUSTRE_LOV	"lustre.lov"
			
 
				-
			
 
				-#define lov_user_ost_data lov_user_ost_data_v1
			
 
				-struct lov_user_ost_data_v1 {     /* per-stripe data structure */
			
 
				-	struct ost_id l_ost_oi;	  /* OST object ID */
			
 
				-	__u32 l_ost_gen;	  /* generation of this OST index */
			
 
				-	__u32 l_ost_idx;	  /* OST index in LOV */
			
 
				-} __packed;
			
 
				-
			
 
				-#define lov_user_md lov_user_md_v1
			
 
				-struct lov_user_md_v1 {	   /* LOV EA user data (host-endian) */
			
 
				-	__u32 lmm_magic;	  /* magic number = LOV_USER_MAGIC_V1 */
			
 
				-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
			
 
				-	struct ost_id lmm_oi;	  /* LOV object ID */
			
 
				-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
			
 
				-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
			
 
				-	union {
			
 
				-		__u16 lmm_stripe_offset;  /* starting stripe offset in
			
 
				-					   * lmm_objects, use when writing
			
 
				-					   */
			
 
				-		__u16 lmm_layout_gen;     /* layout generation number
			
 
				-					   * used when reading
			
 
				-					   */
			
 
				-	};
			
 
				-	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
			
 
				-} __attribute__((packed,  __may_alias__));
			
 
				-
			
 
				-struct lov_user_md_v3 {	   /* LOV EA user data (host-endian) */
			
 
				-	__u32 lmm_magic;	  /* magic number = LOV_USER_MAGIC_V3 */
			
 
				-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
			
 
				-	struct ost_id lmm_oi;	  /* LOV object ID */
			
 
				-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
			
 
				-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
			
 
				-	union {
			
 
				-		__u16 lmm_stripe_offset;  /* starting stripe offset in
			
 
				-					   * lmm_objects, use when writing
			
 
				-					   */
			
 
				-		__u16 lmm_layout_gen;     /* layout generation number
			
 
				-					   * used when reading
			
 
				-					   */
			
 
				-	};
			
 
				-	char  lmm_pool_name[LOV_MAXPOOLNAME + 1];   /* pool name */
			
 
				-	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
			
 
				-} __packed;
			
 
				-
			
 
				-static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
			
 
				-{
			
 
				-	if (lmm_magic == LOV_USER_MAGIC_V1)
			
 
				-		return sizeof(struct lov_user_md_v1) +
			
 
				-				stripes * sizeof(struct lov_user_ost_data_v1);
			
 
				-	return sizeof(struct lov_user_md_v3) +
			
 
				-	       stripes * sizeof(struct lov_user_ost_data_v1);
			
 
				-}
			
 
				-
			
 
				-/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
			
 
				- * use this.  It is unsafe to #define those values in this header as it
			
 
				- * is possible the application has already #included <sys/stat.h>.
			
 
				- */
			
 
				-#ifdef HAVE_LOV_USER_MDS_DATA
			
 
				-#define lov_user_mds_data lov_user_mds_data_v1
			
 
				-struct lov_user_mds_data_v1 {
			
 
				-	lstat_t lmd_st;		 /* MDS stat struct */
			
 
				-	struct lov_user_md_v1 lmd_lmm;  /* LOV EA V1 user data */
			
 
				-} __packed;
			
 
				-
			
 
				-struct lov_user_mds_data_v3 {
			
 
				-	lstat_t lmd_st;		 /* MDS stat struct */
			
 
				-	struct lov_user_md_v3 lmd_lmm;  /* LOV EA V3 user data */
			
 
				-} __packed;
			
 
				-#endif
			
 
				-
			
 
				-struct lmv_user_mds_data {
			
 
				-	struct lu_fid	lum_fid;
			
 
				-	__u32		lum_padding;
			
 
				-	__u32		lum_mds;
			
 
				-};
			
 
				-
			
 
				-enum lmv_hash_type {
			
 
				-	LMV_HASH_TYPE_UNKNOWN	= 0,	/* 0 is reserved for testing purpose */
			
 
				-	LMV_HASH_TYPE_ALL_CHARS = 1,
			
 
				-	LMV_HASH_TYPE_FNV_1A_64 = 2,
			
 
				-};
			
 
				-
			
 
				-#define LMV_HASH_NAME_ALL_CHARS		"all_char"
			
 
				-#define LMV_HASH_NAME_FNV_1A_64		"fnv_1a_64"
			
 
				-
			
 
				-/*
			
 
				- * Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
			
 
				- * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
			
 
				- */
			
 
				-#define LMV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
			
 
				-#define lmv_user_md lmv_user_md_v1
			
 
				-struct lmv_user_md_v1 {
			
 
				-	__u32	lum_magic;	 /* must be the first field */
			
 
				-	__u32	lum_stripe_count;  /* dirstripe count */
			
 
				-	__u32	lum_stripe_offset; /* MDT idx for default dirstripe */
			
 
				-	__u32	lum_hash_type;     /* Dir stripe policy */
			
 
				-	__u32	lum_type;	  /* LMV type: default or normal */
			
 
				-	__u32	lum_padding1;
			
 
				-	__u32	lum_padding2;
			
 
				-	__u32	lum_padding3;
			
 
				-	char	lum_pool_name[LOV_MAXPOOLNAME + 1];
			
 
				-	struct	lmv_user_mds_data  lum_objects[0];
			
 
				-} __packed;
			
 
				-
			
 
				-static inline int lmv_user_md_size(int stripes, int lmm_magic)
			
 
				-{
			
 
				-	return sizeof(struct lmv_user_md) +
			
 
				-		      stripes * sizeof(struct lmv_user_mds_data);
			
 
				-}
			
 
				-
			
 
				-struct ll_recreate_obj {
			
 
				-	__u64 lrc_id;
			
 
				-	__u32 lrc_ost_idx;
			
 
				-};
			
 
				-
			
 
				-struct ll_fid {
			
 
				-	__u64 id;	 /* holds object id */
			
 
				-	__u32 generation; /* holds object generation */
			
 
				-	__u32 f_type;     /* holds object type or stripe idx when passing it to
			
 
				-			   * OST for saving into EA.
			
 
				-			   */
			
 
				-};
			
 
				-
			
 
				-#define UUID_MAX	40
			
 
				-struct obd_uuid {
			
 
				-	char uuid[UUID_MAX];
			
 
				-};
			
 
				-
			
 
				-static inline bool obd_uuid_equals(const struct obd_uuid *u1,
			
 
				-				   const struct obd_uuid *u2)
			
 
				-{
			
 
				-	return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
			
 
				-}
			
 
				-
			
 
				-static inline int obd_uuid_empty(struct obd_uuid *uuid)
			
 
				-{
			
 
				-	return uuid->uuid[0] == '\0';
			
 
				-}
			
 
				-
			
 
				-static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
			
 
				-{
			
 
				-	strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
			
 
				-	uuid->uuid[sizeof(*uuid) - 1] = '\0';
			
 
				-}
			
 
				-
			
 
				-/* For printf's only, make sure uuid is terminated */
			
 
				-static inline char *obd_uuid2str(const struct obd_uuid *uuid)
			
 
				-{
			
 
				-	if (!uuid)
			
 
				-		return NULL;
			
 
				-
			
 
				-	if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
			
 
				-		/* Obviously not safe, but for printfs, no real harm done...
			
 
				-		 * we're always null-terminated, even in a race.
			
 
				-		 */
			
 
				-		static char temp[sizeof(*uuid)];
			
 
				-
			
 
				-		memcpy(temp, uuid->uuid, sizeof(*uuid) - 1);
			
 
				-		temp[sizeof(*uuid) - 1] = '\0';
			
 
				-		return temp;
			
 
				-	}
			
 
				-	return (char *)(uuid->uuid);
			
 
				-}
			
 
				-
			
 
				-/* Extract fsname from uuid (or target name) of a target
			
 
				- * e.g. (myfs-OST0007_UUID -> myfs)
			
 
				- * see also deuuidify.
			
 
				- */
			
 
				-static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
			
 
				-{
			
 
				-	char *p;
			
 
				-
			
 
				-	strncpy(buf, uuid, buflen - 1);
			
 
				-	buf[buflen - 1] = '\0';
			
 
				-	p = strrchr(buf, '-');
			
 
				-	if (p)
			
 
				-		*p = '\0';
			
 
				-}
			
 
				-
			
 
				-/* printf display format
			
 
				- * * usage: printf("file FID is "DFID"\n", PFID(fid));
			
 
				- */
			
 
				-#define FID_NOBRACE_LEN 40
			
 
				-#define FID_LEN (FID_NOBRACE_LEN + 2)
			
 
				-#define DFID_NOBRACE "%#llx:0x%x:0x%x"
			
 
				-#define DFID "[" DFID_NOBRACE "]"
			
 
				-#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
			
 
				-
			
 
				-/* scanf input parse format for fids in DFID_NOBRACE format
			
 
				- * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
			
 
				- * usage: sscanf(fidstr, SFID, RFID(&fid));
			
 
				- */
			
 
				-#define SFID "0x%llx:0x%x:0x%x"
			
 
				-#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
			
 
				-
			
 
				-/********* Quotas **********/
			
 
				-
			
 
				-#define Q_QUOTACHECK   0x800100 /* deprecated as of 2.4 */
			
 
				-#define Q_INITQUOTA    0x800101 /* deprecated as of 2.4  */
			
 
				-#define Q_GETOINFO     0x800102 /* get obd quota info */
			
 
				-#define Q_GETOQUOTA    0x800103 /* get obd quotas */
			
 
				-#define Q_FINVALIDATE  0x800104 /* deprecated as of 2.4 */
			
 
				-
			
 
				-/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
			
 
				-#define LUSTRE_Q_QUOTAON    0x800002	/* deprecated as of 2.4 */
			
 
				-#define LUSTRE_Q_QUOTAOFF   0x800003	/* deprecated as of 2.4 */
			
 
				-#define LUSTRE_Q_GETINFO    0x800005     /* get information about quota files */
			
 
				-#define LUSTRE_Q_SETINFO    0x800006     /* set information about quota files */
			
 
				-#define LUSTRE_Q_GETQUOTA   0x800007     /* get user quota structure */
			
 
				-#define LUSTRE_Q_SETQUOTA   0x800008     /* set user quota structure */
			
 
				-/* lustre-specific control commands */
			
 
				-#define LUSTRE_Q_INVALIDATE  0x80000b	/* deprecated as of 2.4 */
			
 
				-#define LUSTRE_Q_FINVALIDATE 0x80000c	/* deprecated as of 2.4 */
			
 
				-
			
 
				-#define UGQUOTA 2       /* set both USRQUOTA and GRPQUOTA */
			
 
				-
			
 
				-#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
			
 
				-
			
 
				-/* permission */
			
 
				-#define N_PERMS_MAX      64
			
 
				-
			
 
				-struct perm_downcall_data {
			
 
				-	__u64 pdd_nid;
			
 
				-	__u32 pdd_perm;
			
 
				-	__u32 pdd_padding;
			
 
				-};
			
 
				-
			
 
				-struct identity_downcall_data {
			
 
				-	__u32			    idd_magic;
			
 
				-	__u32			    idd_err;
			
 
				-	__u32			    idd_uid;
			
 
				-	__u32			    idd_gid;
			
 
				-	__u32			    idd_nperms;
			
 
				-	__u32			    idd_ngroups;
			
 
				-	struct perm_downcall_data idd_perms[N_PERMS_MAX];
			
 
				-	__u32			    idd_groups[0];
			
 
				-};
			
 
				-
			
 
				-/* lustre volatile file support
			
 
				- * file name header: .^L^S^T^R:volatile"
			
 
				- */
			
 
				-#define LUSTRE_VOLATILE_HDR	".\x0c\x13\x14\x12:VOLATILE"
			
 
				-#define LUSTRE_VOLATILE_HDR_LEN	14
			
 
				-/* hdr + MDT index */
			
 
				-#define LUSTRE_VOLATILE_IDX	LUSTRE_VOLATILE_HDR":%.4X:"
			
 
				-
			
 
				-enum lustre_quota_version {
			
 
				-	LUSTRE_QUOTA_V2 = 1
			
 
				-};
			
 
				-
			
 
				-/* XXX: same as if_dqinfo struct in kernel */
			
 
				-struct obd_dqinfo {
			
 
				-	__u64 dqi_bgrace;
			
 
				-	__u64 dqi_igrace;
			
 
				-	__u32 dqi_flags;
			
 
				-	__u32 dqi_valid;
			
 
				-};
			
 
				-
			
 
				-/* XXX: same as if_dqblk struct in kernel, plus one padding */
			
 
				-struct obd_dqblk {
			
 
				-	__u64 dqb_bhardlimit;
			
 
				-	__u64 dqb_bsoftlimit;
			
 
				-	__u64 dqb_curspace;
			
 
				-	__u64 dqb_ihardlimit;
			
 
				-	__u64 dqb_isoftlimit;
			
 
				-	__u64 dqb_curinodes;
			
 
				-	__u64 dqb_btime;
			
 
				-	__u64 dqb_itime;
			
 
				-	__u32 dqb_valid;
			
 
				-	__u32 dqb_padding;
			
 
				-};
			
 
				-
			
 
				-enum {
			
 
				-	QC_GENERAL      = 0,
			
 
				-	QC_MDTIDX       = 1,
			
 
				-	QC_OSTIDX       = 2,
			
 
				-	QC_UUID	 = 3
			
 
				-};
			
 
				-
			
 
				-struct if_quotactl {
			
 
				-	__u32		   qc_cmd;
			
 
				-	__u32		   qc_type;
			
 
				-	__u32		   qc_id;
			
 
				-	__u32		   qc_stat;
			
 
				-	__u32		   qc_valid;
			
 
				-	__u32		   qc_idx;
			
 
				-	struct obd_dqinfo       qc_dqinfo;
			
 
				-	struct obd_dqblk	qc_dqblk;
			
 
				-	char		    obd_type[16];
			
 
				-	struct obd_uuid	 obd_uuid;
			
 
				-};
			
 
				-
			
 
				-/* swap layout flags */
			
 
				-#define SWAP_LAYOUTS_CHECK_DV1		(1 << 0)
			
 
				-#define SWAP_LAYOUTS_CHECK_DV2		(1 << 1)
			
 
				-#define SWAP_LAYOUTS_KEEP_MTIME		(1 << 2)
			
 
				-#define SWAP_LAYOUTS_KEEP_ATIME		(1 << 3)
			
 
				-#define SWAP_LAYOUTS_CLOSE		(1 << 4)
			
 
				-
			
 
				-/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
			
 
				-#define SWAP_LAYOUTS_MDS_HSM		(1 << 31)
			
 
				-struct lustre_swap_layouts {
			
 
				-	__u64	sl_flags;
			
 
				-	__u32	sl_fd;
			
 
				-	__u32	sl_gid;
			
 
				-	__u64	sl_dv1;
			
 
				-	__u64	sl_dv2;
			
 
				-};
			
 
				-
			
 
				-/********* Changelogs **********/
			
 
				-/** Changelog record types */
			
 
				-enum changelog_rec_type {
			
 
				-	CL_MARK     = 0,
			
 
				-	CL_CREATE   = 1,  /* namespace */
			
 
				-	CL_MKDIR    = 2,  /* namespace */
			
 
				-	CL_HARDLINK = 3,  /* namespace */
			
 
				-	CL_SOFTLINK = 4,  /* namespace */
			
 
				-	CL_MKNOD    = 5,  /* namespace */
			
 
				-	CL_UNLINK   = 6,  /* namespace */
			
 
				-	CL_RMDIR    = 7,  /* namespace */
			
 
				-	CL_RENAME   = 8,  /* namespace */
			
 
				-	CL_EXT      = 9,  /* namespace extended record (2nd half of rename) */
			
 
				-	CL_OPEN     = 10, /* not currently used */
			
 
				-	CL_CLOSE    = 11, /* may be written to log only with mtime change */
			
 
				-	CL_LAYOUT   = 12, /* file layout/striping modified */
			
 
				-	CL_TRUNC    = 13,
			
 
				-	CL_SETATTR  = 14,
			
 
				-	CL_XATTR    = 15,
			
 
				-	CL_HSM      = 16, /* HSM specific events, see flags */
			
 
				-	CL_MTIME    = 17, /* Precedence: setattr > mtime > ctime > atime */
			
 
				-	CL_CTIME    = 18,
			
 
				-	CL_ATIME    = 19,
			
 
				-	CL_LAST
			
 
				-};
			
 
				-
			
 
				-static inline const char *changelog_type2str(int type)
			
 
				-{
			
 
				-	static const char *changelog_str[] = {
			
 
				-		"MARK",  "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
			
 
				-		"RMDIR", "RENME", "RNMTO", "OPEN",  "CLOSE", "LYOUT", "TRUNC",
			
 
				-		"SATTR", "XATTR", "HSM",   "MTIME", "CTIME", "ATIME",
			
 
				-	};
			
 
				-
			
 
				-	if (type >= 0 && type < CL_LAST)
			
 
				-		return changelog_str[type];
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-/* per-record flags */
			
 
				-#define CLF_FLAGSHIFT   12
			
 
				-#define CLF_FLAGMASK    ((1U << CLF_FLAGSHIFT) - 1)
			
 
				-#define CLF_VERMASK     (~CLF_FLAGMASK)
			
 
				-enum changelog_rec_flags {
			
 
				-	CLF_VERSION	= 0x1000,
			
 
				-	CLF_RENAME	= 0x2000,
			
 
				-	CLF_JOBID	= 0x4000,
			
 
				-	CLF_SUPPORTED	= CLF_VERSION | CLF_RENAME | CLF_JOBID
			
 
				-};
			
 
				-
			
 
				-/* Anything under the flagmask may be per-type (if desired) */
			
 
				-/* Flags for unlink */
			
 
				-#define CLF_UNLINK_LAST       0x0001 /* Unlink of last hardlink */
			
 
				-#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
			
 
				-				     /* HSM cleaning needed */
			
 
				-/* Flags for rename */
			
 
				-#define CLF_RENAME_LAST		0x0001	/* rename unlink last hardlink of
			
 
				-					 * target
			
 
				-					 */
			
 
				-#define CLF_RENAME_LAST_EXISTS	0x0002	/* rename unlink last hardlink of target
			
 
				-					 * has an archive in backend
			
 
				-					 */
			
 
				-
			
 
				-/* Flags for HSM */
			
 
				-/* 12b used (from high weight to low weight):
			
 
				- * 2b for flags
			
 
				- * 3b for event
			
 
				- * 7b for error code
			
 
				- */
			
 
				-#define CLF_HSM_ERR_L	0 /* HSM return code, 7 bits */
			
 
				-#define CLF_HSM_ERR_H	6
			
 
				-#define CLF_HSM_EVENT_L      7 /* HSM event, 3 bits, see enum hsm_event */
			
 
				-#define CLF_HSM_EVENT_H      9
			
 
				-#define CLF_HSM_FLAG_L      10 /* HSM flags, 2 bits, 1 used, 1 spare */
			
 
				-#define CLF_HSM_FLAG_H      11
			
 
				-#define CLF_HSM_SPARE_L     12 /* 4 spare bits */
			
 
				-#define CLF_HSM_SPARE_H     15
			
 
				-#define CLF_HSM_LAST	15
			
 
				-
			
 
				-/* Remove bits higher than _h, then extract the value
			
 
				- * between _h and _l by shifting lower weigth to bit 0.
			
 
				- */
			
 
				-#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
			
 
				-				   >> (CLF_HSM_LAST - _h + _l))
			
 
				-
			
 
				-#define CLF_HSM_SUCCESS      0x00
			
 
				-#define CLF_HSM_MAXERROR     0x7E
			
 
				-#define CLF_HSM_ERROVERFLOW  0x7F
			
 
				-
			
 
				-#define CLF_HSM_DIRTY	1 /* file is dirty after HSM request end */
			
 
				-
			
 
				-/* 3 bits field => 8 values allowed */
			
 
				-enum hsm_event {
			
 
				-	HE_ARCHIVE      = 0,
			
 
				-	HE_RESTORE      = 1,
			
 
				-	HE_CANCEL       = 2,
			
 
				-	HE_RELEASE      = 3,
			
 
				-	HE_REMOVE       = 4,
			
 
				-	HE_STATE	= 5,
			
 
				-	HE_SPARE1       = 6,
			
 
				-	HE_SPARE2       = 7,
			
 
				-};
			
 
				-
			
 
				-static inline enum hsm_event hsm_get_cl_event(__u16 flags)
			
 
				-{
			
 
				-	return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L);
			
 
				-}
			
 
				-
			
 
				-static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
			
 
				-{
			
 
				-	*flags |= (he << CLF_HSM_EVENT_L);
			
 
				-}
			
 
				-
			
 
				-static inline __u16 hsm_get_cl_flags(int flags)
			
 
				-{
			
 
				-	return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
			
 
				-}
			
 
				-
			
 
				-static inline void hsm_set_cl_flags(int *flags, int bits)
			
 
				-{
			
 
				-	*flags |= (bits << CLF_HSM_FLAG_L);
			
 
				-}
			
 
				-
			
 
				-static inline int hsm_get_cl_error(int flags)
			
 
				-{
			
 
				-	return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
			
 
				-}
			
 
				-
			
 
				-static inline void hsm_set_cl_error(int *flags, int error)
			
 
				-{
			
 
				-	*flags |= (error << CLF_HSM_ERR_L);
			
 
				-}
			
 
				-
			
 
				-enum changelog_send_flag {
			
 
				-	/* Not yet implemented */
			
 
				-	CHANGELOG_FLAG_FOLLOW	= 0x01,
			
 
				-	/*
			
 
				-	 * Blocking IO makes sense in case of slow user parsing of the records,
			
 
				-	 * but it also prevents us from cleaning up if the records are not
			
 
				-	 * consumed.
			
 
				-	 */
			
 
				-	CHANGELOG_FLAG_BLOCK	= 0x02,
			
 
				-	/* Pack jobid into the changelog records if available. */
			
 
				-	CHANGELOG_FLAG_JOBID	= 0x04,
			
 
				-};
			
 
				-
			
 
				-#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
			
 
				-				  changelog_rec_offset(CLF_SUPPORTED))
			
 
				-
			
 
				-/* 31 usable bytes string + null terminator. */
			
 
				-#define LUSTRE_JOBID_SIZE	32
			
 
				-
			
 
				-/*
			
 
				- * This is the minimal changelog record. It can contain extensions
			
 
				- * such as rename fields or process jobid. Its exact content is described
			
 
				- * by the cr_flags.
			
 
				- *
			
 
				- * Extensions are packed in the same order as their corresponding flags.
			
 
				- */
			
 
				-struct changelog_rec {
			
 
				-	__u16		 cr_namelen;
			
 
				-	__u16		 cr_flags; /**< \a changelog_rec_flags */
			
 
				-	__u32		 cr_type;  /**< \a changelog_rec_type */
			
 
				-	__u64		 cr_index; /**< changelog record number */
			
 
				-	__u64		 cr_prev;  /**< last index for this target fid */
			
 
				-	__u64		 cr_time;
			
 
				-	union {
			
 
				-		struct lu_fid    cr_tfid;	/**< target fid */
			
 
				-		__u32	 cr_markerflags; /**< CL_MARK flags */
			
 
				-	};
			
 
				-	struct lu_fid	    cr_pfid;	/**< parent fid */
			
 
				-} __packed;
			
 
				-
			
 
				-/* Changelog extension for RENAME. */
			
 
				-struct changelog_ext_rename {
			
 
				-	struct lu_fid	cr_sfid;	/**< source fid, or zero */
			
 
				-	struct lu_fid	cr_spfid;	/**< source parent fid, or zero */
			
 
				-};
			
 
				-
			
 
				-/* Changelog extension to include JOBID. */
			
 
				-struct changelog_ext_jobid {
			
 
				-	char	cr_jobid[LUSTRE_JOBID_SIZE];	/**< zero-terminated string. */
			
 
				-};
			
 
				-
			
 
				-static inline size_t changelog_rec_offset(enum changelog_rec_flags crf)
			
 
				-{
			
 
				-	size_t size = sizeof(struct changelog_rec);
			
 
				-
			
 
				-	if (crf & CLF_RENAME)
			
 
				-		size += sizeof(struct changelog_ext_rename);
			
 
				-
			
 
				-	if (crf & CLF_JOBID)
			
 
				-		size += sizeof(struct changelog_ext_jobid);
			
 
				-
			
 
				-	return size;
			
 
				-}
			
 
				-
			
 
				-static inline size_t changelog_rec_size(struct changelog_rec *rec)
			
 
				-{
			
 
				-	return changelog_rec_offset(rec->cr_flags);
			
 
				-}
			
 
				-
			
 
				-static inline size_t changelog_rec_varsize(struct changelog_rec *rec)
			
 
				-{
			
 
				-	return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
			
 
				-}
			
 
				-
			
 
				-static inline
			
 
				-struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec)
			
 
				-{
			
 
				-	enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION;
			
 
				-
			
 
				-	return (struct changelog_ext_rename *)((char *)rec +
			
 
				-					       changelog_rec_offset(crf));
			
 
				-}
			
 
				-
			
 
				-/* The jobid follows the rename extension, if present */
			
 
				-static inline
			
 
				-struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec)
			
 
				-{
			
 
				-	enum changelog_rec_flags crf = rec->cr_flags &
			
 
				-				       (CLF_VERSION | CLF_RENAME);
			
 
				-
			
 
				-	return (struct changelog_ext_jobid *)((char *)rec +
			
 
				-					      changelog_rec_offset(crf));
			
 
				-}
			
 
				-
			
 
				-/* The name follows the rename and jobid extensions, if present */
			
 
				-static inline char *changelog_rec_name(struct changelog_rec *rec)
			
 
				-{
			
 
				-	return (char *)rec + changelog_rec_offset(rec->cr_flags &
			
 
				-						  CLF_SUPPORTED);
			
 
				-}
			
 
				-
			
 
				-static inline size_t changelog_rec_snamelen(struct changelog_rec *rec)
			
 
				-{
			
 
				-	return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1;
			
 
				-}
			
 
				-
			
 
				-static inline char *changelog_rec_sname(struct changelog_rec *rec)
			
 
				-{
			
 
				-	char *cr_name = changelog_rec_name(rec);
			
 
				-
			
 
				-	return cr_name + strlen(cr_name) + 1;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Remap a record to the desired format as specified by the crf flags.
			
 
				- * The record must be big enough to contain the final remapped version.
			
 
				- * Superfluous extension fields are removed and missing ones are added
			
 
				- * and zeroed. The flags of the record are updated accordingly.
			
 
				- *
			
 
				- * The jobid and rename extensions can be added to a record, to match the
			
 
				- * format an application expects, typically. In this case, the newly added
			
 
				- * fields will be zeroed.
			
 
				- * The Jobid field can be removed, to guarantee compatibility with older
			
 
				- * clients that don't expect this field in the records they process.
			
 
				- *
			
 
				- * The following assumptions are being made:
			
 
				- *	- CLF_RENAME will not be removed
			
 
				- *	- CLF_JOBID will not be added without CLF_RENAME being added too
			
 
				- *
			
 
				- * @param[in,out]  rec		The record to remap.
			
 
				- * @param[in]	   crf_wanted	Flags describing the desired extensions.
			
 
				- */
			
 
				-static inline void changelog_remap_rec(struct changelog_rec *rec,
			
 
				-				       enum changelog_rec_flags crf_wanted)
			
 
				-{
			
 
				-	char *jid_mov, *rnm_mov;
			
 
				-
			
 
				-	crf_wanted &= CLF_SUPPORTED;
			
 
				-
			
 
				-	if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted)
			
 
				-		return;
			
 
				-
			
 
				-	/* First move the variable-length name field */
			
 
				-	memmove((char *)rec + changelog_rec_offset(crf_wanted),
			
 
				-		changelog_rec_name(rec), rec->cr_namelen);
			
 
				-
			
 
				-	/* Locations of jobid and rename extensions in the remapped record */
			
 
				-	jid_mov = (char *)rec +
			
 
				-		  changelog_rec_offset(crf_wanted & ~CLF_JOBID);
			
 
				-	rnm_mov = (char *)rec +
			
 
				-		  changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME));
			
 
				-
			
 
				-	/* Move the extension fields to the desired positions */
			
 
				-	if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
			
 
				-		memmove(jid_mov, changelog_rec_jobid(rec),
			
 
				-			sizeof(struct changelog_ext_jobid));
			
 
				-
			
 
				-	if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
			
 
				-		memmove(rnm_mov, changelog_rec_rename(rec),
			
 
				-			sizeof(struct changelog_ext_rename));
			
 
				-
			
 
				-	/* Clear newly added fields */
			
 
				-	if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
			
 
				-		memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
			
 
				-
			
 
				-	if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
			
 
				-		memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
			
 
				-
			
 
				-	/* Update the record's flags accordingly */
			
 
				-	rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
			
 
				-}
			
 
				-
			
 
				-struct ioc_changelog {
			
 
				-	__u64 icc_recno;
			
 
				-	__u32 icc_mdtindex;
			
 
				-	__u32 icc_id;
			
 
				-	__u32 icc_flags;
			
 
				-};
			
 
				-
			
 
				-enum changelog_message_type {
			
 
				-	CL_RECORD = 10, /* message is a changelog_rec */
			
 
				-	CL_EOF    = 11, /* at end of current changelog */
			
 
				-};
			
 
				-
			
 
				-/********* Misc **********/
			
 
				-
			
 
				-struct ioc_data_version {
			
 
				-	__u64 idv_version;
			
 
				-	__u64 idv_flags;     /* See LL_DV_xxx */
			
 
				-};
			
 
				-
			
 
				-#define LL_DV_RD_FLUSH	(1 << 0) /* Flush dirty pages from clients */
			
 
				-#define LL_DV_WR_FLUSH	(1 << 1) /* Flush all caching pages from clients */
			
 
				-
			
 
				-#ifndef offsetof
			
 
				-# define offsetof(typ, memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
			
 
				-#endif
			
 
				-
			
 
				-#define dot_lustre_name ".lustre"
			
 
				-
			
 
				-/********* HSM **********/
			
 
				-
			
 
				-/** HSM per-file state
			
 
				- * See HSM_FLAGS below.
			
 
				- */
			
 
				-enum hsm_states {
			
 
				-	HS_NONE		= 0x00000000,
			
 
				-	HS_EXISTS	= 0x00000001,
			
 
				-	HS_DIRTY	= 0x00000002,
			
 
				-	HS_RELEASED	= 0x00000004,
			
 
				-	HS_ARCHIVED	= 0x00000008,
			
 
				-	HS_NORELEASE	= 0x00000010,
			
 
				-	HS_NOARCHIVE	= 0x00000020,
			
 
				-	HS_LOST		= 0x00000040,
			
 
				-};
			
 
				-
			
 
				-/* HSM user-setable flags. */
			
 
				-#define HSM_USER_MASK   (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
			
 
				-
			
 
				-/* Other HSM flags. */
			
 
				-#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
			
 
				-
			
 
				-/*
			
 
				- * All HSM-related possible flags that could be applied to a file.
			
 
				- * This should be kept in sync with hsm_states.
			
 
				- */
			
 
				-#define HSM_FLAGS_MASK  (HSM_USER_MASK | HSM_STATUS_MASK)
			
 
				-
			
 
				-/**
			
 
				- * HSM request progress state
			
 
				- */
			
 
				-enum hsm_progress_states {
			
 
				-	HPS_WAITING	= 1,
			
 
				-	HPS_RUNNING	= 2,
			
 
				-	HPS_DONE	= 3,
			
 
				-};
			
 
				-
			
 
				-#define HPS_NONE	0
			
 
				-
			
 
				-static inline char *hsm_progress_state2name(enum hsm_progress_states s)
			
 
				-{
			
 
				-	switch  (s) {
			
 
				-	case HPS_WAITING:	return "waiting";
			
 
				-	case HPS_RUNNING:	return "running";
			
 
				-	case HPS_DONE:		return "done";
			
 
				-	default:		return "unknown";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-struct hsm_extent {
			
 
				-	__u64 offset;
			
 
				-	__u64 length;
			
 
				-} __packed;
			
 
				-
			
 
				-/**
			
 
				- * Current HSM states of a Lustre file.
			
 
				- *
			
 
				- * This structure purpose is to be sent to user-space mainly. It describes the
			
 
				- * current HSM flags and in-progress action.
			
 
				- */
			
 
				-struct hsm_user_state {
			
 
				-	/** Current HSM states, from enum hsm_states. */
			
 
				-	__u32			hus_states;
			
 
				-	__u32			hus_archive_id;
			
 
				-	/**  The current undergoing action, if there is one */
			
 
				-	__u32			hus_in_progress_state;
			
 
				-	__u32			hus_in_progress_action;
			
 
				-	struct hsm_extent	hus_in_progress_location;
			
 
				-	char			hus_extended_info[];
			
 
				-};
			
 
				-
			
 
				-struct hsm_state_set_ioc {
			
 
				-	struct lu_fid	hssi_fid;
			
 
				-	__u64		hssi_setmask;
			
 
				-	__u64		hssi_clearmask;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * This structure describes the current in-progress action for a file.
			
 
				- * it is returned to user space and send over the wire
			
 
				- */
			
 
				-struct hsm_current_action {
			
 
				-	/**  The current undergoing action, if there is one */
			
 
				-	/* state is one of hsm_progress_states */
			
 
				-	__u32			hca_state;
			
 
				-	/* action is one of hsm_user_action */
			
 
				-	__u32			hca_action;
			
 
				-	struct hsm_extent	hca_location;
			
 
				-};
			
 
				-
			
 
				-/***** HSM user requests ******/
			
 
				-/* User-generated (lfs/ioctl) request types */
			
 
				-enum hsm_user_action {
			
 
				-	HUA_NONE    =  1, /* no action (noop) */
			
 
				-	HUA_ARCHIVE = 10, /* copy to hsm */
			
 
				-	HUA_RESTORE = 11, /* prestage */
			
 
				-	HUA_RELEASE = 12, /* drop ost objects */
			
 
				-	HUA_REMOVE  = 13, /* remove from archive */
			
 
				-	HUA_CANCEL  = 14  /* cancel a request */
			
 
				-};
			
 
				-
			
 
				-static inline char *hsm_user_action2name(enum hsm_user_action  a)
			
 
				-{
			
 
				-	switch  (a) {
			
 
				-	case HUA_NONE:    return "NOOP";
			
 
				-	case HUA_ARCHIVE: return "ARCHIVE";
			
 
				-	case HUA_RESTORE: return "RESTORE";
			
 
				-	case HUA_RELEASE: return "RELEASE";
			
 
				-	case HUA_REMOVE:  return "REMOVE";
			
 
				-	case HUA_CANCEL:  return "CANCEL";
			
 
				-	default:	  return "UNKNOWN";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * List of hr_flags (bit field)
			
 
				- */
			
 
				-#define HSM_FORCE_ACTION 0x0001
			
 
				-/* used by CT, connot be set by user */
			
 
				-#define HSM_GHOST_COPY   0x0002
			
 
				-
			
 
				-/**
			
 
				- * Contains all the fixed part of struct hsm_user_request.
			
 
				- *
			
 
				- */
			
 
				-struct hsm_request {
			
 
				-	__u32 hr_action;	/* enum hsm_user_action */
			
 
				-	__u32 hr_archive_id;	/* archive id, used only with HUA_ARCHIVE */
			
 
				-	__u64 hr_flags;		/* request flags */
			
 
				-	__u32 hr_itemcount;	/* item count in hur_user_item vector */
			
 
				-	__u32 hr_data_len;
			
 
				-};
			
 
				-
			
 
				-struct hsm_user_item {
			
 
				-	struct lu_fid	hui_fid;
			
 
				-	struct hsm_extent hui_extent;
			
 
				-} __packed;
			
 
				-
			
 
				-struct hsm_user_request {
			
 
				-	struct hsm_request	hur_request;
			
 
				-	struct hsm_user_item	hur_user_item[0];
			
 
				-	/* extra data blob at end of struct (after all
			
 
				-	 * hur_user_items), only use helpers to access it
			
 
				-	 */
			
 
				-} __packed;
			
 
				-
			
 
				-/** Return pointer to data field in a hsm user request */
			
 
				-static inline void *hur_data(struct hsm_user_request *hur)
			
 
				-{
			
 
				-	return &hur->hur_user_item[hur->hur_request.hr_itemcount];
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Compute the current length of the provided hsm_user_request.  This returns -1
			
 
				- * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ]
			
 
				- *
			
 
				- * return -1 on bounds check error.
			
 
				- */
			
 
				-static inline ssize_t hur_len(struct hsm_user_request *hur)
			
 
				-{
			
 
				-	__u64	size;
			
 
				-
			
 
				-	/* can't overflow a __u64 since hr_itemcount is only __u32 */
			
 
				-	size = offsetof(struct hsm_user_request, hur_user_item[0]) +
			
 
				-		(__u64)hur->hur_request.hr_itemcount *
			
 
				-		sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len;
			
 
				-
			
 
				-	if (size != (ssize_t)size)
			
 
				-		return -1;
			
 
				-
			
 
				-	return size;
			
 
				-}
			
 
				-
			
 
				-/****** HSM RPCs to copytool *****/
			
 
				-/* Message types the copytool may receive */
			
 
				-enum hsm_message_type {
			
 
				-	HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
			
 
				-};
			
 
				-
			
 
				-/* Actions the copytool may be instructed to take for a given action_item */
			
 
				-enum hsm_copytool_action {
			
 
				-	HSMA_NONE    = 10, /* no action */
			
 
				-	HSMA_ARCHIVE = 20, /* arbitrary offset */
			
 
				-	HSMA_RESTORE = 21,
			
 
				-	HSMA_REMOVE  = 22,
			
 
				-	HSMA_CANCEL  = 23
			
 
				-};
			
 
				-
			
 
				-static inline char *hsm_copytool_action2name(enum hsm_copytool_action  a)
			
 
				-{
			
 
				-	switch  (a) {
			
 
				-	case HSMA_NONE:    return "NOOP";
			
 
				-	case HSMA_ARCHIVE: return "ARCHIVE";
			
 
				-	case HSMA_RESTORE: return "RESTORE";
			
 
				-	case HSMA_REMOVE:  return "REMOVE";
			
 
				-	case HSMA_CANCEL:  return "CANCEL";
			
 
				-	default:	   return "UNKNOWN";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* Copytool item action description */
			
 
				-struct hsm_action_item {
			
 
				-	__u32      hai_len;     /* valid size of this struct */
			
 
				-	__u32      hai_action;  /* hsm_copytool_action, but use known size */
			
 
				-	struct lu_fid hai_fid;     /* Lustre FID to operated on */
			
 
				-	struct lu_fid hai_dfid;    /* fid used for data access */
			
 
				-	struct hsm_extent hai_extent;  /* byte range to operate on */
			
 
				-	__u64      hai_cookie;  /* action cookie from coordinator */
			
 
				-	__u64      hai_gid;     /* grouplock id */
			
 
				-	char       hai_data[0]; /* variable length */
			
 
				-} __packed;
			
 
				-
			
 
				-/*
			
 
				- * helper function which print in hexa the first bytes of
			
 
				- * hai opaque field
			
 
				- * \param hai [IN] record to print
			
 
				- * \param buffer [OUT] output buffer
			
 
				- * \param len [IN] max buffer len
			
 
				- * \retval buffer
			
 
				- */
			
 
				-static inline char *hai_dump_data_field(struct hsm_action_item *hai,
			
 
				-					char *buffer, size_t len)
			
 
				-{
			
 
				-	int i, data_len;
			
 
				-	char *ptr;
			
 
				-
			
 
				-	ptr = buffer;
			
 
				-	data_len = hai->hai_len - sizeof(*hai);
			
 
				-	for (i = 0; (i < data_len) && (len > 2); i++) {
			
 
				-		snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]);
			
 
				-		ptr += 2;
			
 
				-		len -= 2;
			
 
				-	}
			
 
				-
			
 
				-	*ptr = '\0';
			
 
				-
			
 
				-	return buffer;
			
 
				-}
			
 
				-
			
 
				-/* Copytool action list */
			
 
				-#define HAL_VERSION 1
			
 
				-#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
			
 
				-struct hsm_action_list {
			
 
				-	__u32 hal_version;
			
 
				-	__u32 hal_count;       /* number of hai's to follow */
			
 
				-	__u64 hal_compound_id; /* returned by coordinator */
			
 
				-	__u64 hal_flags;
			
 
				-	__u32 hal_archive_id; /* which archive backend */
			
 
				-	__u32 padding1;
			
 
				-	char  hal_fsname[0];   /* null-terminated */
			
 
				-	/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
			
 
				-	 * boundaries. See hai_first
			
 
				-	 */
			
 
				-} __packed;
			
 
				-
			
 
				-#ifndef HAVE_CFS_SIZE_ROUND
			
 
				-static inline int cfs_size_round(int val)
			
 
				-{
			
 
				-	return (val + 7) & (~0x7);
			
 
				-}
			
 
				-
			
 
				-#define HAVE_CFS_SIZE_ROUND
			
 
				-#endif
			
 
				-
			
 
				-/* Return pointer to first hai in action list */
			
 
				-static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
			
 
				-{
			
 
				-	return (struct hsm_action_item *)(hal->hal_fsname +
			
 
				-					  cfs_size_round(strlen(hal-> \
			
 
				-								hal_fsname)
			
 
				-							 + 1));
			
 
				-}
			
 
				-
			
 
				-/* Return pointer to next hai */
			
 
				-static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai)
			
 
				-{
			
 
				-	return (struct hsm_action_item *)((char *)hai +
			
 
				-					  cfs_size_round(hai->hai_len));
			
 
				-}
			
 
				-
			
 
				-/* Return size of an hsm_action_list */
			
 
				-static inline int hal_size(struct hsm_action_list *hal)
			
 
				-{
			
 
				-	int i, sz;
			
 
				-	struct hsm_action_item *hai;
			
 
				-
			
 
				-	sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
			
 
				-	hai = hai_first(hal);
			
 
				-	for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
			
 
				-		sz += cfs_size_round(hai->hai_len);
			
 
				-
			
 
				-	return sz;
			
 
				-}
			
 
				-
			
 
				-/* HSM file import
			
 
				- * describe the attributes to be set on imported file
			
 
				- */
			
 
				-struct hsm_user_import {
			
 
				-	__u64		hui_size;
			
 
				-	__u64		hui_atime;
			
 
				-	__u64		hui_mtime;
			
 
				-	__u32		hui_atime_ns;
			
 
				-	__u32		hui_mtime_ns;
			
 
				-	__u32		hui_uid;
			
 
				-	__u32		hui_gid;
			
 
				-	__u32		hui_mode;
			
 
				-	__u32		hui_archive_id;
			
 
				-};
			
 
				-
			
 
				-/* Copytool progress reporting */
			
 
				-#define HP_FLAG_COMPLETED 0x01
			
 
				-#define HP_FLAG_RETRY     0x02
			
 
				-
			
 
				-struct hsm_progress {
			
 
				-	struct lu_fid		hp_fid;
			
 
				-	__u64			hp_cookie;
			
 
				-	struct hsm_extent	hp_extent;
			
 
				-	__u16			hp_flags;
			
 
				-	__u16			hp_errval; /* positive val */
			
 
				-	__u32			padding;
			
 
				-};
			
 
				-
			
 
				-struct hsm_copy {
			
 
				-	__u64			hc_data_version;
			
 
				-	__u16			hc_flags;
			
 
				-	__u16			hc_errval; /* positive val */
			
 
				-	__u32			padding;
			
 
				-	struct hsm_action_item	hc_hai;
			
 
				-};
			
 
				-
			
 
				-/** @} lustreuser */
			
 
				-
			
 
				-#endif /* _LUSTRE_USER_H */
			
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h
@@ -1,27 +0,0 @@
 
				-#ifndef _LUSTRE_VER_H_
			
 
				-#define _LUSTRE_VER_H_
			
 
				-
			
 
				-#define LUSTRE_MAJOR 2
			
 
				-#define LUSTRE_MINOR 6
			
 
				-#define LUSTRE_PATCH 99
			
 
				-#define LUSTRE_FIX 0
			
 
				-#define LUSTRE_VERSION_STRING "2.6.99"
			
 
				-
			
 
				-#define OBD_OCD_VERSION(major, minor, patch, fix)			\
			
 
				-	(((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix))
			
 
				-
			
 
				-#define OBD_OCD_VERSION_MAJOR(version)	((int)((version) >> 24) & 255)
			
 
				-#define OBD_OCD_VERSION_MINOR(version)	((int)((version) >> 16) & 255)
			
 
				-#define OBD_OCD_VERSION_PATCH(version)	((int)((version) >>  8) & 255)
			
 
				-#define OBD_OCD_VERSION_FIX(version)	((int)((version) >>  0) & 255)
			
 
				-
			
 
				-#define LUSTRE_VERSION_CODE						\
			
 
				-	OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX)
			
 
				-
			
 
				-/*
			
 
				- * If lustre version of client and servers it connects to differs by more
			
 
				- * than this amount, client would issue a warning.
			
 
				- */
			
 
				-#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0)
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/lnet/Kconfig
+++ b/drivers/staging/lustre/lnet/Kconfig
@@ -1,46 +0,0 @@
 
				-config LNET
			
 
				-	tristate "Lustre networking subsystem (LNet)"
			
 
				-	depends on INET
			
 
				-	help
			
 
				-	  The Lustre network layer, also known as LNet, is a networking abstaction
			
 
				-	  level API that was initially created to allow Lustre Filesystem to utilize
			
 
				-	  very different networks like tcp and ib verbs in a uniform way. In the
			
 
				-	  case of Lustre routers only the LNet layer is required. Lately other
			
 
				-	  projects are also looking into using LNet as their networking API as well.
			
 
				-
			
 
				-config LNET_MAX_PAYLOAD
			
 
				-	int "Lustre lnet max transfer payload (default 1MB)"
			
 
				-	depends on LNET
			
 
				-	default "1048576"
			
 
				-	help
			
 
				-	  This option defines the maximum size of payload in bytes that lnet
			
 
				-	  can put into its transport.
			
 
				-
			
 
				-	  If unsure, use default.
			
 
				-
			
 
				-config LNET_SELFTEST
			
 
				-	tristate "Lustre networking self testing"
			
 
				-	depends on LNET
			
 
				-	help
			
 
				-	  Choose Y here if you want to do lnet self testing. To compile this
			
 
				-	  as a module, choose M here: the module will be called lnet_selftest.
			
 
				-
			
 
				-	  To compile this as a kernel modules, choose M here and it will be
			
 
				-	  called lnet_selftest.
			
 
				-
			
 
				-	  If unsure, say N.
			
 
				-
			
 
				-	  See also http://wiki.lustre.org/
			
 
				-
			
 
				-config LNET_XPRT_IB
			
 
				-	tristate "LNET infiniband support"
			
 
				-	depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
			
 
				-	default LNET && INFINIBAND
			
 
				-	help
			
 
				-	  This option allows the LNET users to use infiniband as an
			
 
				-	  RDMA-enabled transport.
			
 
				-
			
 
				-	  To compile this as a kernel module, choose M here and it will be
			
 
				-	  called ko2iblnd.
			
 
				-
			
 
				-	  If unsure, say N.
			
--- a/drivers/staging/lustre/lnet/Makefile
+++ b/drivers/staging/lustre/lnet/Makefile
@@ -1 +0,0 @@
 
				-obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/
			
--- a/drivers/staging/lustre/lnet/klnds/Makefile
+++ b/drivers/staging/lustre/lnet/klnds/Makefile
@@ -1 +0,0 @@
 
				-obj-$(CONFIG_LNET) += o2iblnd/  socklnd/
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
@@ -1,5 +0,0 @@
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
			
 
				-
			
 
				-obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
			
 
				-ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -1,2958 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/klnds/o2iblnd/o2iblnd.c
			
 
				- *
			
 
				- * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- */
			
 
				-
			
 
				-#include <asm/div64.h>
			
 
				-#include <asm/page.h>
			
 
				-#include "o2iblnd.h"
			
 
				-
			
 
				-static struct lnet_lnd the_o2iblnd;
			
 
				-
			
 
				-struct kib_data kiblnd_data;
			
 
				-
			
 
				-static __u32 kiblnd_cksum(void *ptr, int nob)
			
 
				-{
			
 
				-	char *c = ptr;
			
 
				-	__u32 sum = 0;
			
 
				-
			
 
				-	while (nob-- > 0)
			
 
				-		sum = ((sum << 1) | (sum >> 31)) + *c++;
			
 
				-
			
 
				-	/* ensure I don't return 0 (== no checksum) */
			
 
				-	return !sum ? 1 : sum;
			
 
				-}
			
 
				-
			
 
				-static char *kiblnd_msgtype2str(int type)
			
 
				-{
			
 
				-	switch (type) {
			
 
				-	case IBLND_MSG_CONNREQ:
			
 
				-		return "CONNREQ";
			
 
				-
			
 
				-	case IBLND_MSG_CONNACK:
			
 
				-		return "CONNACK";
			
 
				-
			
 
				-	case IBLND_MSG_NOOP:
			
 
				-		return "NOOP";
			
 
				-
			
 
				-	case IBLND_MSG_IMMEDIATE:
			
 
				-		return "IMMEDIATE";
			
 
				-
			
 
				-	case IBLND_MSG_PUT_REQ:
			
 
				-		return "PUT_REQ";
			
 
				-
			
 
				-	case IBLND_MSG_PUT_NAK:
			
 
				-		return "PUT_NAK";
			
 
				-
			
 
				-	case IBLND_MSG_PUT_ACK:
			
 
				-		return "PUT_ACK";
			
 
				-
			
 
				-	case IBLND_MSG_PUT_DONE:
			
 
				-		return "PUT_DONE";
			
 
				-
			
 
				-	case IBLND_MSG_GET_REQ:
			
 
				-		return "GET_REQ";
			
 
				-
			
 
				-	case IBLND_MSG_GET_DONE:
			
 
				-		return "GET_DONE";
			
 
				-
			
 
				-	default:
			
 
				-		return "???";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_msgtype2size(int type)
			
 
				-{
			
 
				-	const int hdr_size = offsetof(struct kib_msg, ibm_u);
			
 
				-
			
 
				-	switch (type) {
			
 
				-	case IBLND_MSG_CONNREQ:
			
 
				-	case IBLND_MSG_CONNACK:
			
 
				-		return hdr_size + sizeof(struct kib_connparams);
			
 
				-
			
 
				-	case IBLND_MSG_NOOP:
			
 
				-		return hdr_size;
			
 
				-
			
 
				-	case IBLND_MSG_IMMEDIATE:
			
 
				-		return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]);
			
 
				-
			
 
				-	case IBLND_MSG_PUT_REQ:
			
 
				-		return hdr_size + sizeof(struct kib_putreq_msg);
			
 
				-
			
 
				-	case IBLND_MSG_PUT_ACK:
			
 
				-		return hdr_size + sizeof(struct kib_putack_msg);
			
 
				-
			
 
				-	case IBLND_MSG_GET_REQ:
			
 
				-		return hdr_size + sizeof(struct kib_get_msg);
			
 
				-
			
 
				-	case IBLND_MSG_PUT_NAK:
			
 
				-	case IBLND_MSG_PUT_DONE:
			
 
				-	case IBLND_MSG_GET_DONE:
			
 
				-		return hdr_size + sizeof(struct kib_completion_msg);
			
 
				-	default:
			
 
				-		return -1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
			
 
				-{
			
 
				-	struct kib_rdma_desc *rd;
			
 
				-	int msg_size;
			
 
				-	int nob;
			
 
				-	int n;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ ||
			
 
				-		msg->ibm_type == IBLND_MSG_PUT_ACK);
			
 
				-
			
 
				-	rd = msg->ibm_type == IBLND_MSG_GET_REQ ?
			
 
				-			      &msg->ibm_u.get.ibgm_rd :
			
 
				-			      &msg->ibm_u.putack.ibpam_rd;
			
 
				-
			
 
				-	if (flip) {
			
 
				-		__swab32s(&rd->rd_key);
			
 
				-		__swab32s(&rd->rd_nfrags);
			
 
				-	}
			
 
				-
			
 
				-	n = rd->rd_nfrags;
			
 
				-
			
 
				-	nob = offsetof(struct kib_msg, ibm_u) +
			
 
				-	      kiblnd_rd_msg_size(rd, msg->ibm_type, n);
			
 
				-
			
 
				-	if (msg->ibm_nob < nob) {
			
 
				-		CERROR("Short %s: %d(%d)\n",
			
 
				-		       kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob);
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	msg_size = kiblnd_rd_size(rd);
			
 
				-	if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
			
 
				-		CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
			
 
				-		       msg_size, LNET_MAX_PAYLOAD);
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	if (!flip)
			
 
				-		return 0;
			
 
				-
			
 
				-	for (i = 0; i < n; i++) {
			
 
				-		__swab32s(&rd->rd_frags[i].rf_nob);
			
 
				-		__swab64s(&rd->rd_frags[i].rf_addr);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
			
 
				-		     int credits, lnet_nid_t dstnid, __u64 dststamp)
			
 
				-{
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-
			
 
				-	/*
			
 
				-	 * CAVEAT EMPTOR! all message fields not set here should have been
			
 
				-	 * initialised previously.
			
 
				-	 */
			
 
				-	msg->ibm_magic    = IBLND_MSG_MAGIC;
			
 
				-	msg->ibm_version  = version;
			
 
				-	/*   ibm_type */
			
 
				-	msg->ibm_credits  = credits;
			
 
				-	/*   ibm_nob */
			
 
				-	msg->ibm_cksum    = 0;
			
 
				-	msg->ibm_srcnid   = ni->ni_nid;
			
 
				-	msg->ibm_srcstamp = net->ibn_incarnation;
			
 
				-	msg->ibm_dstnid   = dstnid;
			
 
				-	msg->ibm_dststamp = dststamp;
			
 
				-
			
 
				-	if (*kiblnd_tunables.kib_cksum) {
			
 
				-		/* NB ibm_cksum zero while computing cksum */
			
 
				-		msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int kiblnd_unpack_msg(struct kib_msg *msg, int nob)
			
 
				-{
			
 
				-	const int hdr_size = offsetof(struct kib_msg, ibm_u);
			
 
				-	__u32 msg_cksum;
			
 
				-	__u16 version;
			
 
				-	int msg_nob;
			
 
				-	int flip;
			
 
				-
			
 
				-	/* 6 bytes are enough to have received magic + version */
			
 
				-	if (nob < 6) {
			
 
				-		CERROR("Short message: %d\n", nob);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ibm_magic == IBLND_MSG_MAGIC) {
			
 
				-		flip = 0;
			
 
				-	} else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
			
 
				-		flip = 1;
			
 
				-	} else {
			
 
				-		CERROR("Bad magic: %08x\n", msg->ibm_magic);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
			
 
				-	if (version != IBLND_MSG_VERSION &&
			
 
				-	    version != IBLND_MSG_VERSION_1) {
			
 
				-		CERROR("Bad version: %x\n", version);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (nob < hdr_size) {
			
 
				-		CERROR("Short message: %d\n", nob);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
			
 
				-	if (msg_nob > nob) {
			
 
				-		CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * checksum must be computed with ibm_cksum zero and BEFORE anything
			
 
				-	 * gets flipped
			
 
				-	 */
			
 
				-	msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
			
 
				-	msg->ibm_cksum = 0;
			
 
				-	if (msg_cksum &&
			
 
				-	    msg_cksum != kiblnd_cksum(msg, msg_nob)) {
			
 
				-		CERROR("Bad checksum\n");
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	msg->ibm_cksum = msg_cksum;
			
 
				-
			
 
				-	if (flip) {
			
 
				-		/* leave magic unflipped as a clue to peer endianness */
			
 
				-		msg->ibm_version = version;
			
 
				-		BUILD_BUG_ON(sizeof(msg->ibm_type) != 1);
			
 
				-		BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1);
			
 
				-		msg->ibm_nob     = msg_nob;
			
 
				-		__swab64s(&msg->ibm_srcnid);
			
 
				-		__swab64s(&msg->ibm_srcstamp);
			
 
				-		__swab64s(&msg->ibm_dstnid);
			
 
				-		__swab64s(&msg->ibm_dststamp);
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ibm_srcnid == LNET_NID_ANY) {
			
 
				-		CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) {
			
 
				-		CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type),
			
 
				-		       msg_nob, kiblnd_msgtype2size(msg->ibm_type));
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	switch (msg->ibm_type) {
			
 
				-	default:
			
 
				-		CERROR("Unknown message type %x\n", msg->ibm_type);
			
 
				-		return -EPROTO;
			
 
				-
			
 
				-	case IBLND_MSG_NOOP:
			
 
				-	case IBLND_MSG_IMMEDIATE:
			
 
				-	case IBLND_MSG_PUT_REQ:
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_ACK:
			
 
				-	case IBLND_MSG_GET_REQ:
			
 
				-		if (kiblnd_unpack_rd(msg, flip))
			
 
				-			return -EPROTO;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_NAK:
			
 
				-	case IBLND_MSG_PUT_DONE:
			
 
				-	case IBLND_MSG_GET_DONE:
			
 
				-		if (flip)
			
 
				-			__swab32s(&msg->ibm_u.completion.ibcm_status);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_CONNREQ:
			
 
				-	case IBLND_MSG_CONNACK:
			
 
				-		if (flip) {
			
 
				-			__swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
			
 
				-			__swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
			
 
				-			__swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
			
 
				-		}
			
 
				-		break;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
			
 
				-		       lnet_nid_t nid)
			
 
				-{
			
 
				-	struct kib_peer *peer;
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	int cpt = lnet_cpt_of_nid(nid);
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(nid != LNET_NID_ANY);
			
 
				-
			
 
				-	peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
			
 
				-	if (!peer) {
			
 
				-		CERROR("Cannot allocate peer\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	peer->ibp_ni = ni;
			
 
				-	peer->ibp_nid = nid;
			
 
				-	peer->ibp_error = 0;
			
 
				-	peer->ibp_last_alive = 0;
			
 
				-	peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
			
 
				-	peer->ibp_queue_depth = ni->ni_peertxcredits;
			
 
				-	atomic_set(&peer->ibp_refcount, 1);  /* 1 ref for caller */
			
 
				-
			
 
				-	INIT_LIST_HEAD(&peer->ibp_list);     /* not in the peer table yet */
			
 
				-	INIT_LIST_HEAD(&peer->ibp_conns);
			
 
				-	INIT_LIST_HEAD(&peer->ibp_tx_queue);
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	/* always called with a ref on ni, which prevents ni being shutdown */
			
 
				-	LASSERT(!net->ibn_shutdown);
			
 
				-
			
 
				-	/* npeers only grows with the global lock held */
			
 
				-	atomic_inc(&net->ibn_npeers);
			
 
				-
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	*peerp = peer;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_destroy_peer(struct kib_peer *peer)
			
 
				-{
			
 
				-	struct kib_net *net = peer->ibp_ni->ni_data;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(!atomic_read(&peer->ibp_refcount));
			
 
				-	LASSERT(!kiblnd_peer_active(peer));
			
 
				-	LASSERT(kiblnd_peer_idle(peer));
			
 
				-	LASSERT(list_empty(&peer->ibp_tx_queue));
			
 
				-
			
 
				-	kfree(peer);
			
 
				-
			
 
				-	/*
			
 
				-	 * NB a peer's connections keep a reference on their peer until
			
 
				-	 * they are destroyed, so we can be assured that _all_ state to do
			
 
				-	 * with this peer has been cleaned up when its refcount drops to
			
 
				-	 * zero.
			
 
				-	 */
			
 
				-	atomic_dec(&net->ibn_npeers);
			
 
				-}
			
 
				-
			
 
				-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid)
			
 
				-{
			
 
				-	/*
			
 
				-	 * the caller is responsible for accounting the additional reference
			
 
				-	 * that this creates
			
 
				-	 */
			
 
				-	struct list_head *peer_list = kiblnd_nid2peerlist(nid);
			
 
				-	struct list_head *tmp;
			
 
				-	struct kib_peer *peer;
			
 
				-
			
 
				-	list_for_each(tmp, peer_list) {
			
 
				-		peer = list_entry(tmp, struct kib_peer, ibp_list);
			
 
				-		LASSERT(!kiblnd_peer_idle(peer));
			
 
				-
			
 
				-		if (peer->ibp_nid != nid)
			
 
				-			continue;
			
 
				-
			
 
				-		CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
			
 
				-		       peer, libcfs_nid2str(nid),
			
 
				-		       atomic_read(&peer->ibp_refcount),
			
 
				-		       peer->ibp_version);
			
 
				-		return peer;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_unlink_peer_locked(struct kib_peer *peer)
			
 
				-{
			
 
				-	LASSERT(list_empty(&peer->ibp_conns));
			
 
				-
			
 
				-	LASSERT(kiblnd_peer_active(peer));
			
 
				-	list_del_init(&peer->ibp_list);
			
 
				-	/* lose peerlist's ref */
			
 
				-	kiblnd_peer_decref(peer);
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_get_peer_info(struct lnet_ni *ni, int index,
			
 
				-				lnet_nid_t *nidp, int *count)
			
 
				-{
			
 
				-	struct kib_peer *peer;
			
 
				-	struct list_head *ptmp;
			
 
				-	int i;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
			
 
				-		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
			
 
				-			LASSERT(!kiblnd_peer_idle(peer));
			
 
				-
			
 
				-			if (peer->ibp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			if (index-- > 0)
			
 
				-				continue;
			
 
				-
			
 
				-			*nidp = peer->ibp_nid;
			
 
				-			*count = atomic_read(&peer->ibp_refcount);
			
 
				-
			
 
				-			read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
			
 
				-					       flags);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_del_peer_locked(struct kib_peer *peer)
			
 
				-{
			
 
				-	struct list_head *ctmp;
			
 
				-	struct list_head *cnxt;
			
 
				-	struct kib_conn *conn;
			
 
				-
			
 
				-	if (list_empty(&peer->ibp_conns)) {
			
 
				-		kiblnd_unlink_peer_locked(peer);
			
 
				-	} else {
			
 
				-		list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
			
 
				-			conn = list_entry(ctmp, struct kib_conn, ibc_list);
			
 
				-
			
 
				-			kiblnd_close_conn_locked(conn, 0);
			
 
				-		}
			
 
				-		/* NB closing peer's last conn unlinked it. */
			
 
				-	}
			
 
				-	/*
			
 
				-	 * NB peer now unlinked; might even be freed if the peer table had the
			
 
				-	 * last ref on it.
			
 
				-	 */
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct list_head *ptmp;
			
 
				-	struct list_head *pnxt;
			
 
				-	struct kib_peer *peer;
			
 
				-	int lo;
			
 
				-	int hi;
			
 
				-	int i;
			
 
				-	unsigned long flags;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	if (nid != LNET_NID_ANY) {
			
 
				-		lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
			
 
				-		hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
			
 
				-	} else {
			
 
				-		lo = 0;
			
 
				-		hi = kiblnd_data.kib_peer_hash_size - 1;
			
 
				-	}
			
 
				-
			
 
				-	for (i = lo; i <= hi; i++) {
			
 
				-		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
			
 
				-			LASSERT(!kiblnd_peer_idle(peer));
			
 
				-
			
 
				-			if (peer->ibp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
			
 
				-				continue;
			
 
				-
			
 
				-			if (!list_empty(&peer->ibp_tx_queue)) {
			
 
				-				LASSERT(list_empty(&peer->ibp_conns));
			
 
				-
			
 
				-				list_splice_init(&peer->ibp_tx_queue,
			
 
				-						 &zombies);
			
 
				-			}
			
 
				-
			
 
				-			kiblnd_del_peer_locked(peer);
			
 
				-			rc = 0;	 /* matched something */
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	kiblnd_txlist_done(ni, &zombies, -EIO);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static struct kib_conn *kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index)
			
 
				-{
			
 
				-	struct kib_peer *peer;
			
 
				-	struct list_head *ptmp;
			
 
				-	struct kib_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-	int i;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
			
 
				-		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
			
 
				-			LASSERT(!kiblnd_peer_idle(peer));
			
 
				-
			
 
				-			if (peer->ibp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			list_for_each(ctmp, &peer->ibp_conns) {
			
 
				-				if (index-- > 0)
			
 
				-					continue;
			
 
				-
			
 
				-				conn = list_entry(ctmp, struct kib_conn,
			
 
				-						  ibc_list);
			
 
				-				kiblnd_conn_addref(conn);
			
 
				-				read_unlock_irqrestore(
			
 
				-					&kiblnd_data.kib_global_lock,
			
 
				-					flags);
			
 
				-				return conn;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int kiblnd_translate_mtu(int value)
			
 
				-{
			
 
				-	switch (value) {
			
 
				-	default:
			
 
				-		return -1;
			
 
				-	case 0:
			
 
				-		return 0;
			
 
				-	case 256:
			
 
				-		return IB_MTU_256;
			
 
				-	case 512:
			
 
				-		return IB_MTU_512;
			
 
				-	case 1024:
			
 
				-		return IB_MTU_1024;
			
 
				-	case 2048:
			
 
				-		return IB_MTU_2048;
			
 
				-	case 4096:
			
 
				-		return IB_MTU_4096;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
			
 
				-{
			
 
				-	int mtu;
			
 
				-
			
 
				-	/* XXX There is no path record for iWARP, set by netdev->change_mtu? */
			
 
				-	if (!cmid->route.path_rec)
			
 
				-		return;
			
 
				-
			
 
				-	mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
			
 
				-	LASSERT(mtu >= 0);
			
 
				-	if (mtu)
			
 
				-		cmid->route.path_rec->mtu = mtu;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
			
 
				-{
			
 
				-	cpumask_var_t *mask;
			
 
				-	int vectors;
			
 
				-	int off;
			
 
				-	int i;
			
 
				-	lnet_nid_t nid = conn->ibc_peer->ibp_nid;
			
 
				-
			
 
				-	vectors = conn->ibc_cmid->device->num_comp_vectors;
			
 
				-	if (vectors <= 1)
			
 
				-		return 0;
			
 
				-
			
 
				-	mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt);
			
 
				-	if (!mask)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* hash NID to CPU id in this partition... */
			
 
				-	off = do_div(nid, cpumask_weight(*mask));
			
 
				-	for_each_cpu(i, *mask) {
			
 
				-		if (!off--)
			
 
				-			return i % vectors;
			
 
				-	}
			
 
				-
			
 
				-	LBUG();
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
			
 
				-				    int state, int version)
			
 
				-{
			
 
				-	/*
			
 
				-	 * CAVEAT EMPTOR:
			
 
				-	 * If the new conn is created successfully it takes over the caller's
			
 
				-	 * ref on 'peer'.  It also "owns" 'cmid' and destroys it when it itself
			
 
				-	 * is destroyed.  On failure, the caller's ref on 'peer' remains and
			
 
				-	 * she must dispose of 'cmid'.  (Actually I'd block forever if I tried
			
 
				-	 * to destroy 'cmid' here since I'm called from the CM which still has
			
 
				-	 * its ref on 'cmid').
			
 
				-	 */
			
 
				-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
			
 
				-	struct kib_net *net = peer->ibp_ni->ni_data;
			
 
				-	struct kib_dev *dev;
			
 
				-	struct ib_qp_init_attr *init_qp_attr;
			
 
				-	struct kib_sched_info *sched;
			
 
				-	struct ib_cq_init_attr cq_attr = {};
			
 
				-	struct kib_conn *conn;
			
 
				-	struct ib_cq *cq;
			
 
				-	unsigned long flags;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	dev = net->ibn_dev;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_nid(peer->ibp_nid);
			
 
				-	sched = kiblnd_data.kib_scheds[cpt];
			
 
				-
			
 
				-	LASSERT(sched->ibs_nthreads > 0);
			
 
				-
			
 
				-	init_qp_attr = kzalloc_cpt(sizeof(*init_qp_attr), GFP_NOFS, cpt);
			
 
				-	if (!init_qp_attr) {
			
 
				-		CERROR("Can't allocate qp_attr for %s\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid));
			
 
				-		goto failed_0;
			
 
				-	}
			
 
				-
			
 
				-	conn = kzalloc_cpt(sizeof(*conn), GFP_NOFS, cpt);
			
 
				-	if (!conn) {
			
 
				-		CERROR("Can't allocate connection for %s\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid));
			
 
				-		goto failed_1;
			
 
				-	}
			
 
				-
			
 
				-	conn->ibc_state = IBLND_CONN_INIT;
			
 
				-	conn->ibc_version = version;
			
 
				-	conn->ibc_peer = peer;		  /* I take the caller's ref */
			
 
				-	cmid->context = conn;		   /* for future CM callbacks */
			
 
				-	conn->ibc_cmid = cmid;
			
 
				-	conn->ibc_max_frags = peer->ibp_max_frags;
			
 
				-	conn->ibc_queue_depth = peer->ibp_queue_depth;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&conn->ibc_early_rxs);
			
 
				-	INIT_LIST_HEAD(&conn->ibc_tx_noops);
			
 
				-	INIT_LIST_HEAD(&conn->ibc_tx_queue);
			
 
				-	INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
			
 
				-	INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
			
 
				-	INIT_LIST_HEAD(&conn->ibc_active_txs);
			
 
				-	spin_lock_init(&conn->ibc_lock);
			
 
				-
			
 
				-	conn->ibc_connvars = kzalloc_cpt(sizeof(*conn->ibc_connvars), GFP_NOFS, cpt);
			
 
				-	if (!conn->ibc_connvars) {
			
 
				-		CERROR("Can't allocate in-progress connection state\n");
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	write_lock_irqsave(glock, flags);
			
 
				-	if (dev->ibd_failover) {
			
 
				-		write_unlock_irqrestore(glock, flags);
			
 
				-		CERROR("%s: failover in progress\n", dev->ibd_ifname);
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	if (dev->ibd_hdev->ibh_ibdev != cmid->device) {
			
 
				-		/* wakeup failover thread and teardown connection */
			
 
				-		if (kiblnd_dev_can_failover(dev)) {
			
 
				-			list_add_tail(&dev->ibd_fail_list,
			
 
				-				      &kiblnd_data.kib_failed_devs);
			
 
				-			wake_up(&kiblnd_data.kib_failover_waitq);
			
 
				-		}
			
 
				-
			
 
				-		write_unlock_irqrestore(glock, flags);
			
 
				-		CERROR("cmid HCA(%s), kib_dev(%s) need failover\n",
			
 
				-		       cmid->device->name, dev->ibd_ifname);
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_hdev_addref_locked(dev->ibd_hdev);
			
 
				-	conn->ibc_hdev = dev->ibd_hdev;
			
 
				-
			
 
				-	kiblnd_setup_mtu_locked(cmid);
			
 
				-
			
 
				-	write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-	conn->ibc_rxs = kzalloc_cpt(IBLND_RX_MSGS(conn) * sizeof(struct kib_rx),
			
 
				-				    GFP_NOFS, cpt);
			
 
				-	if (!conn->ibc_rxs) {
			
 
				-		CERROR("Cannot allocate RX buffers\n");
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
			
 
				-				IBLND_RX_MSG_PAGES(conn));
			
 
				-	if (rc)
			
 
				-		goto failed_2;
			
 
				-
			
 
				-	kiblnd_map_rx_descs(conn);
			
 
				-
			
 
				-	cq_attr.cqe = IBLND_CQ_ENTRIES(conn);
			
 
				-	cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt);
			
 
				-	cq = ib_create_cq(cmid->device,
			
 
				-			  kiblnd_cq_completion, kiblnd_cq_event, conn,
			
 
				-			  &cq_attr);
			
 
				-	if (IS_ERR(cq)) {
			
 
				-		CERROR("Failed to create CQ with %d CQEs: %ld\n",
			
 
				-		       IBLND_CQ_ENTRIES(conn), PTR_ERR(cq));
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	conn->ibc_cq = cq;
			
 
				-
			
 
				-	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't request completion notification: %d\n", rc);
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	init_qp_attr->event_handler = kiblnd_qp_event;
			
 
				-	init_qp_attr->qp_context = conn;
			
 
				-	init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
			
 
				-	init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
			
 
				-	init_qp_attr->cap.max_send_sge = 1;
			
 
				-	init_qp_attr->cap.max_recv_sge = 1;
			
 
				-	init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
			
 
				-	init_qp_attr->qp_type = IB_QPT_RC;
			
 
				-	init_qp_attr->send_cq = cq;
			
 
				-	init_qp_attr->recv_cq = cq;
			
 
				-
			
 
				-	conn->ibc_sched = sched;
			
 
				-
			
 
				-	rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
			
 
				-		       rc, init_qp_attr->cap.max_send_wr,
			
 
				-		       init_qp_attr->cap.max_recv_wr);
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	kfree(init_qp_attr);
			
 
				-
			
 
				-	/* 1 ref for caller and each rxmsg */
			
 
				-	atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
			
 
				-	conn->ibc_nrx = IBLND_RX_MSGS(conn);
			
 
				-
			
 
				-	/* post receives */
			
 
				-	for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
			
 
				-		rc = kiblnd_post_rx(&conn->ibc_rxs[i],
			
 
				-				    IBLND_POSTRX_NO_CREDIT);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't post rxmsg: %d\n", rc);
			
 
				-
			
 
				-			/* Make posted receives complete */
			
 
				-			kiblnd_abort_receives(conn);
			
 
				-
			
 
				-			/*
			
 
				-			 * correct # of posted buffers
			
 
				-			 * NB locking needed now I'm racing with completion
			
 
				-			 */
			
 
				-			spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-			conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i;
			
 
				-			spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-
			
 
				-			/*
			
 
				-			 * cmid will be destroyed by CM(ofed) after cm_callback
			
 
				-			 * returned, so we can't refer it anymore
			
 
				-			 * (by kiblnd_connd()->kiblnd_destroy_conn)
			
 
				-			 */
			
 
				-			rdma_destroy_qp(conn->ibc_cmid);
			
 
				-			conn->ibc_cmid = NULL;
			
 
				-
			
 
				-			/* Drop my own and unused rxbuffer refcounts */
			
 
				-			while (i++ <= IBLND_RX_MSGS(conn))
			
 
				-				kiblnd_conn_decref(conn);
			
 
				-
			
 
				-			return NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* Init successful! */
			
 
				-	LASSERT(state == IBLND_CONN_ACTIVE_CONNECT ||
			
 
				-		state == IBLND_CONN_PASSIVE_WAIT);
			
 
				-	conn->ibc_state = state;
			
 
				-
			
 
				-	/* 1 more conn */
			
 
				-	atomic_inc(&net->ibn_nconns);
			
 
				-	return conn;
			
 
				-
			
 
				- failed_2:
			
 
				-	kiblnd_destroy_conn(conn);
			
 
				-	kfree(conn);
			
 
				- failed_1:
			
 
				-	kfree(init_qp_attr);
			
 
				- failed_0:
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_destroy_conn(struct kib_conn *conn)
			
 
				-{
			
 
				-	struct rdma_cm_id *cmid = conn->ibc_cmid;
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(!atomic_read(&conn->ibc_refcount));
			
 
				-	LASSERT(list_empty(&conn->ibc_early_rxs));
			
 
				-	LASSERT(list_empty(&conn->ibc_tx_noops));
			
 
				-	LASSERT(list_empty(&conn->ibc_tx_queue));
			
 
				-	LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd));
			
 
				-	LASSERT(list_empty(&conn->ibc_tx_queue_nocred));
			
 
				-	LASSERT(list_empty(&conn->ibc_active_txs));
			
 
				-	LASSERT(!conn->ibc_noops_posted);
			
 
				-	LASSERT(!conn->ibc_nsends_posted);
			
 
				-
			
 
				-	switch (conn->ibc_state) {
			
 
				-	default:
			
 
				-		/* conn must be completely disengaged from the network */
			
 
				-		LBUG();
			
 
				-
			
 
				-	case IBLND_CONN_DISCONNECTED:
			
 
				-		/* connvars should have been freed already */
			
 
				-		LASSERT(!conn->ibc_connvars);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_CONN_INIT:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	/* conn->ibc_cmid might be destroyed by CM already */
			
 
				-	if (cmid && cmid->qp)
			
 
				-		rdma_destroy_qp(cmid);
			
 
				-
			
 
				-	if (conn->ibc_cq) {
			
 
				-		rc = ib_destroy_cq(conn->ibc_cq);
			
 
				-		if (rc)
			
 
				-			CWARN("Error destroying CQ: %d\n", rc);
			
 
				-	}
			
 
				-
			
 
				-	if (conn->ibc_rx_pages)
			
 
				-		kiblnd_unmap_rx_descs(conn);
			
 
				-
			
 
				-	kfree(conn->ibc_rxs);
			
 
				-	kfree(conn->ibc_connvars);
			
 
				-
			
 
				-	if (conn->ibc_hdev)
			
 
				-		kiblnd_hdev_decref(conn->ibc_hdev);
			
 
				-
			
 
				-	/* See CAVEAT EMPTOR above in kiblnd_create_conn */
			
 
				-	if (conn->ibc_state != IBLND_CONN_INIT) {
			
 
				-		struct kib_net *net = peer->ibp_ni->ni_data;
			
 
				-
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		rdma_destroy_id(cmid);
			
 
				-		atomic_dec(&net->ibn_nconns);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why)
			
 
				-{
			
 
				-	struct kib_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-	struct list_head *cnxt;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
			
 
				-		conn = list_entry(ctmp, struct kib_conn, ibc_list);
			
 
				-
			
 
				-		CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid),
			
 
				-		       conn->ibc_version, why);
			
 
				-
			
 
				-		kiblnd_close_conn_locked(conn, why);
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
			
 
				-				    int version, __u64 incarnation)
			
 
				-{
			
 
				-	struct kib_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-	struct list_head *cnxt;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
			
 
				-		conn = list_entry(ctmp, struct kib_conn, ibc_list);
			
 
				-
			
 
				-		if (conn->ibc_version     == version &&
			
 
				-		    conn->ibc_incarnation == incarnation)
			
 
				-			continue;
			
 
				-
			
 
				-		CDEBUG(D_NET,
			
 
				-		       "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid),
			
 
				-		       conn->ibc_version, conn->ibc_incarnation,
			
 
				-		       version, incarnation);
			
 
				-
			
 
				-		kiblnd_close_conn_locked(conn, -ESTALE);
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid)
			
 
				-{
			
 
				-	struct kib_peer *peer;
			
 
				-	struct list_head *ptmp;
			
 
				-	struct list_head *pnxt;
			
 
				-	int lo;
			
 
				-	int hi;
			
 
				-	int i;
			
 
				-	unsigned long flags;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	if (nid != LNET_NID_ANY) {
			
 
				-		lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
			
 
				-		hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
			
 
				-	} else {
			
 
				-		lo = 0;
			
 
				-		hi = kiblnd_data.kib_peer_hash_size - 1;
			
 
				-	}
			
 
				-
			
 
				-	for (i = lo; i <= hi; i++) {
			
 
				-		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
			
 
				-			LASSERT(!kiblnd_peer_idle(peer));
			
 
				-
			
 
				-			if (peer->ibp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
			
 
				-				continue;
			
 
				-
			
 
				-			count += kiblnd_close_peer_conns_locked(peer, 0);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	/* wildcards always succeed */
			
 
				-	if (nid == LNET_NID_ANY)
			
 
				-		return 0;
			
 
				-
			
 
				-	return !count ? -ENOENT : 0;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
			
 
				-{
			
 
				-	struct libcfs_ioctl_data *data = arg;
			
 
				-	int rc = -EINVAL;
			
 
				-
			
 
				-	switch (cmd) {
			
 
				-	case IOC_LIBCFS_GET_PEER: {
			
 
				-		lnet_nid_t nid = 0;
			
 
				-		int count = 0;
			
 
				-
			
 
				-		rc = kiblnd_get_peer_info(ni, data->ioc_count,
			
 
				-					  &nid, &count);
			
 
				-		data->ioc_nid   = nid;
			
 
				-		data->ioc_count = count;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_DEL_PEER: {
			
 
				-		rc = kiblnd_del_peer(ni, data->ioc_nid);
			
 
				-		break;
			
 
				-	}
			
 
				-	case IOC_LIBCFS_GET_CONN: {
			
 
				-		struct kib_conn *conn;
			
 
				-
			
 
				-		rc = 0;
			
 
				-		conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
			
 
				-		if (!conn) {
			
 
				-			rc = -ENOENT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(conn->ibc_cmid);
			
 
				-		data->ioc_nid = conn->ibc_peer->ibp_nid;
			
 
				-		if (!conn->ibc_cmid->route.path_rec)
			
 
				-			data->ioc_u32[0] = 0; /* iWarp has no path MTU */
			
 
				-		else
			
 
				-			data->ioc_u32[0] =
			
 
				-			ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-		break;
			
 
				-	}
			
 
				-	case IOC_LIBCFS_CLOSE_CONNECTION: {
			
 
				-		rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	default:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid,
			
 
				-			 unsigned long *when)
			
 
				-{
			
 
				-	unsigned long last_alive = 0;
			
 
				-	unsigned long now = jiffies;
			
 
				-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
			
 
				-	struct kib_peer *peer;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	read_lock_irqsave(glock, flags);
			
 
				-
			
 
				-	peer = kiblnd_find_peer_locked(nid);
			
 
				-	if (peer)
			
 
				-		last_alive = peer->ibp_last_alive;
			
 
				-
			
 
				-	read_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-	if (last_alive)
			
 
				-		*when = last_alive;
			
 
				-
			
 
				-	/*
			
 
				-	 * peer is not persistent in hash, trigger peer creation
			
 
				-	 * and connection establishment with a NULL tx
			
 
				-	 */
			
 
				-	if (!peer)
			
 
				-		kiblnd_launch_tx(ni, NULL, nid);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
			
 
				-	       libcfs_nid2str(nid), peer,
			
 
				-	       last_alive ? (now - last_alive) / HZ : -1);
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_free_pages(struct kib_pages *p)
			
 
				-{
			
 
				-	int npages = p->ibp_npages;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < npages; i++) {
			
 
				-		if (p->ibp_pages[i])
			
 
				-			__free_page(p->ibp_pages[i]);
			
 
				-	}
			
 
				-
			
 
				-	kfree(p);
			
 
				-}
			
 
				-
			
 
				-int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages)
			
 
				-{
			
 
				-	struct kib_pages *p;
			
 
				-	int i;
			
 
				-
			
 
				-	p = kzalloc_cpt(offsetof(struct kib_pages, ibp_pages[npages]),
			
 
				-			GFP_NOFS, cpt);
			
 
				-	if (!p) {
			
 
				-		CERROR("Can't allocate descriptor for %d pages\n", npages);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	p->ibp_npages = npages;
			
 
				-
			
 
				-	for (i = 0; i < npages; i++) {
			
 
				-		p->ibp_pages[i] = alloc_pages_node(
			
 
				-				    cfs_cpt_spread_node(lnet_cpt_table(), cpt),
			
 
				-				    GFP_NOFS, 0);
			
 
				-		if (!p->ibp_pages[i]) {
			
 
				-			CERROR("Can't allocate page %d of %d\n", i, npages);
			
 
				-			kiblnd_free_pages(p);
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	*pp = p;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_unmap_rx_descs(struct kib_conn *conn)
			
 
				-{
			
 
				-	struct kib_rx *rx;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(conn->ibc_rxs);
			
 
				-	LASSERT(conn->ibc_hdev);
			
 
				-
			
 
				-	for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
			
 
				-		rx = &conn->ibc_rxs[i];
			
 
				-
			
 
				-		LASSERT(rx->rx_nob >= 0); /* not posted */
			
 
				-
			
 
				-		kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev,
			
 
				-					KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
			
 
				-							  rx->rx_msgaddr),
			
 
				-					IBLND_MSG_SIZE, DMA_FROM_DEVICE);
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_free_pages(conn->ibc_rx_pages);
			
 
				-
			
 
				-	conn->ibc_rx_pages = NULL;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_map_rx_descs(struct kib_conn *conn)
			
 
				-{
			
 
				-	struct kib_rx *rx;
			
 
				-	struct page *pg;
			
 
				-	int pg_off;
			
 
				-	int ipg;
			
 
				-	int i;
			
 
				-
			
 
				-	for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) {
			
 
				-		pg = conn->ibc_rx_pages->ibp_pages[ipg];
			
 
				-		rx = &conn->ibc_rxs[i];
			
 
				-
			
 
				-		rx->rx_conn = conn;
			
 
				-		rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off);
			
 
				-
			
 
				-		rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
			
 
				-						       rx->rx_msg,
			
 
				-						       IBLND_MSG_SIZE,
			
 
				-						       DMA_FROM_DEVICE);
			
 
				-		LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev,
			
 
				-						  rx->rx_msgaddr));
			
 
				-		KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
			
 
				-
			
 
				-		CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n",
			
 
				-		       i, rx->rx_msg, rx->rx_msgaddr,
			
 
				-		       (__u64)(page_to_phys(pg) + pg_off));
			
 
				-
			
 
				-		pg_off += IBLND_MSG_SIZE;
			
 
				-		LASSERT(pg_off <= PAGE_SIZE);
			
 
				-
			
 
				-		if (pg_off == PAGE_SIZE) {
			
 
				-			pg_off = 0;
			
 
				-			ipg++;
			
 
				-			LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn));
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo)
			
 
				-{
			
 
				-	struct kib_hca_dev *hdev = tpo->tpo_hdev;
			
 
				-	struct kib_tx *tx;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!tpo->tpo_pool.po_allocated);
			
 
				-
			
 
				-	if (!hdev)
			
 
				-		return;
			
 
				-
			
 
				-	for (i = 0; i < tpo->tpo_pool.po_size; i++) {
			
 
				-		tx = &tpo->tpo_tx_descs[i];
			
 
				-		kiblnd_dma_unmap_single(hdev->ibh_ibdev,
			
 
				-					KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
			
 
				-							  tx->tx_msgaddr),
			
 
				-					IBLND_MSG_SIZE, DMA_TO_DEVICE);
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_hdev_decref(hdev);
			
 
				-	tpo->tpo_hdev = NULL;
			
 
				-}
			
 
				-
			
 
				-static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev)
			
 
				-{
			
 
				-	struct kib_hca_dev *hdev;
			
 
				-	unsigned long flags;
			
 
				-	int i = 0;
			
 
				-
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-	while (dev->ibd_failover) {
			
 
				-		read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-		if (!(i++ % 50))
			
 
				-			CDEBUG(D_NET, "%s: Wait for failover\n",
			
 
				-			       dev->ibd_ifname);
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ / 100);
			
 
				-
			
 
				-		read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_hdev_addref_locked(dev->ibd_hdev);
			
 
				-	hdev = dev->ibd_hdev;
			
 
				-
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	return hdev;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo)
			
 
				-{
			
 
				-	struct kib_pages *txpgs = tpo->tpo_tx_pages;
			
 
				-	struct kib_pool *pool = &tpo->tpo_pool;
			
 
				-	struct kib_net *net = pool->po_owner->ps_net;
			
 
				-	struct kib_dev *dev;
			
 
				-	struct page *page;
			
 
				-	struct kib_tx *tx;
			
 
				-	int page_offset;
			
 
				-	int ipage;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-
			
 
				-	dev = net->ibn_dev;
			
 
				-
			
 
				-	/* pre-mapped messages are not bigger than 1 page */
			
 
				-	BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE);
			
 
				-
			
 
				-	/* No fancy arithmetic when we do the buffer calculations */
			
 
				-	BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE);
			
 
				-
			
 
				-	tpo->tpo_hdev = kiblnd_current_hdev(dev);
			
 
				-
			
 
				-	for (ipage = page_offset = i = 0; i < pool->po_size; i++) {
			
 
				-		page = txpgs->ibp_pages[ipage];
			
 
				-		tx = &tpo->tpo_tx_descs[i];
			
 
				-
			
 
				-		tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) +
			
 
				-					   page_offset);
			
 
				-
			
 
				-		tx->tx_msgaddr = kiblnd_dma_map_single(
			
 
				-			tpo->tpo_hdev->ibh_ibdev, tx->tx_msg,
			
 
				-			IBLND_MSG_SIZE, DMA_TO_DEVICE);
			
 
				-		LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev,
			
 
				-						  tx->tx_msgaddr));
			
 
				-		KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
			
 
				-
			
 
				-		list_add(&tx->tx_list, &pool->po_free_list);
			
 
				-
			
 
				-		page_offset += IBLND_MSG_SIZE;
			
 
				-		LASSERT(page_offset <= PAGE_SIZE);
			
 
				-
			
 
				-		if (page_offset == PAGE_SIZE) {
			
 
				-			page_offset = 0;
			
 
				-			ipage++;
			
 
				-			LASSERT(ipage <= txpgs->ibp_npages);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo)
			
 
				-{
			
 
				-	LASSERT(!fpo->fpo_map_count);
			
 
				-
			
 
				-	if (fpo->fpo_is_fmr) {
			
 
				-		if (fpo->fmr.fpo_fmr_pool)
			
 
				-			ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
			
 
				-	} else {
			
 
				-		struct kib_fast_reg_descriptor *frd, *tmp;
			
 
				-		int i = 0;
			
 
				-
			
 
				-		list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
			
 
				-					 frd_list) {
			
 
				-			list_del(&frd->frd_list);
			
 
				-			ib_dereg_mr(frd->frd_mr);
			
 
				-			kfree(frd);
			
 
				-			i++;
			
 
				-		}
			
 
				-		if (i < fpo->fast_reg.fpo_pool_size)
			
 
				-			CERROR("FastReg pool still has %d regions registered\n",
			
 
				-			       fpo->fast_reg.fpo_pool_size - i);
			
 
				-	}
			
 
				-
			
 
				-	if (fpo->fpo_hdev)
			
 
				-		kiblnd_hdev_decref(fpo->fpo_hdev);
			
 
				-
			
 
				-	kfree(fpo);
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_destroy_fmr_pool_list(struct list_head *head)
			
 
				-{
			
 
				-	struct kib_fmr_pool *fpo, *tmp;
			
 
				-
			
 
				-	list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
			
 
				-		list_del(&fpo->fpo_list);
			
 
				-		kiblnd_destroy_fmr_pool(fpo);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
			
 
				-		     int ncpts)
			
 
				-{
			
 
				-	int size = tunables->lnd_fmr_pool_size / ncpts;
			
 
				-
			
 
				-	return max(IBLND_FMR_POOL, size);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
			
 
				-			 int ncpts)
			
 
				-{
			
 
				-	int size = tunables->lnd_fmr_flush_trigger / ncpts;
			
 
				-
			
 
				-	return max(IBLND_FMR_POOL_FLUSH, size);
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
			
 
				-{
			
 
				-	struct ib_fmr_pool_param param = {
			
 
				-		.max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE,
			
 
				-		.page_shift        = PAGE_SHIFT,
			
 
				-		.access            = (IB_ACCESS_LOCAL_WRITE |
			
 
				-				      IB_ACCESS_REMOTE_WRITE),
			
 
				-		.pool_size         = fps->fps_pool_size,
			
 
				-		.dirty_watermark   = fps->fps_flush_trigger,
			
 
				-		.flush_function    = NULL,
			
 
				-		.flush_arg         = NULL,
			
 
				-		.cache             = !!fps->fps_cache };
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
			
 
				-						   &param);
			
 
				-	if (IS_ERR(fpo->fmr.fpo_fmr_pool)) {
			
 
				-		rc = PTR_ERR(fpo->fmr.fpo_fmr_pool);
			
 
				-		if (rc != -ENOSYS)
			
 
				-			CERROR("Failed to create FMR pool: %d\n", rc);
			
 
				-		else
			
 
				-			CERROR("FMRs are not supported\n");
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
			
 
				-{
			
 
				-	struct kib_fast_reg_descriptor *frd, *tmp;
			
 
				-	int i, rc;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
			
 
				-	fpo->fast_reg.fpo_pool_size = 0;
			
 
				-	for (i = 0; i < fps->fps_pool_size; i++) {
			
 
				-		frd = kzalloc_cpt(sizeof(*frd), GFP_NOFS, fps->fps_cpt);
			
 
				-		if (!frd) {
			
 
				-			CERROR("Failed to allocate a new fast_reg descriptor\n");
			
 
				-			rc = -ENOMEM;
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
			
 
				-					  IB_MR_TYPE_MEM_REG,
			
 
				-					  LNET_MAX_PAYLOAD / PAGE_SIZE);
			
 
				-		if (IS_ERR(frd->frd_mr)) {
			
 
				-			rc = PTR_ERR(frd->frd_mr);
			
 
				-			CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
			
 
				-			frd->frd_mr = NULL;
			
 
				-			goto out_middle;
			
 
				-		}
			
 
				-
			
 
				-		frd->frd_valid = true;
			
 
				-
			
 
				-		list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
			
 
				-		fpo->fast_reg.fpo_pool_size++;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-out_middle:
			
 
				-	if (frd->frd_mr)
			
 
				-		ib_dereg_mr(frd->frd_mr);
			
 
				-	kfree(frd);
			
 
				-
			
 
				-out:
			
 
				-	list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
			
 
				-				 frd_list) {
			
 
				-		list_del(&frd->frd_list);
			
 
				-		ib_dereg_mr(frd->frd_mr);
			
 
				-		kfree(frd);
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps,
			
 
				-				  struct kib_fmr_pool **pp_fpo)
			
 
				-{
			
 
				-	struct kib_dev *dev = fps->fps_net->ibn_dev;
			
 
				-	struct ib_device_attr *dev_attr;
			
 
				-	struct kib_fmr_pool *fpo;
			
 
				-	int rc;
			
 
				-
			
 
				-	fpo = kzalloc_cpt(sizeof(*fpo), GFP_NOFS, fps->fps_cpt);
			
 
				-	if (!fpo)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	fpo->fpo_hdev = kiblnd_current_hdev(dev);
			
 
				-	dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs;
			
 
				-
			
 
				-	/* Check for FMR or FastReg support */
			
 
				-	fpo->fpo_is_fmr = 0;
			
 
				-	if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
			
 
				-	    fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
			
 
				-	    fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
			
 
				-	    fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
			
 
				-		LCONSOLE_INFO("Using FMR for registration\n");
			
 
				-		fpo->fpo_is_fmr = 1;
			
 
				-	} else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
			
 
				-		LCONSOLE_INFO("Using FastReg for registration\n");
			
 
				-	} else {
			
 
				-		rc = -ENOSYS;
			
 
				-		LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
			
 
				-		goto out_fpo;
			
 
				-	}
			
 
				-
			
 
				-	if (fpo->fpo_is_fmr)
			
 
				-		rc = kiblnd_alloc_fmr_pool(fps, fpo);
			
 
				-	else
			
 
				-		rc = kiblnd_alloc_freg_pool(fps, fpo);
			
 
				-	if (rc)
			
 
				-		goto out_fpo;
			
 
				-
			
 
				-	fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
			
 
				-	fpo->fpo_owner = fps;
			
 
				-	*pp_fpo = fpo;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-out_fpo:
			
 
				-	kiblnd_hdev_decref(fpo->fpo_hdev);
			
 
				-	kfree(fpo);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps,
			
 
				-				    struct list_head *zombies)
			
 
				-{
			
 
				-	if (!fps->fps_net) /* initialized? */
			
 
				-		return;
			
 
				-
			
 
				-	spin_lock(&fps->fps_lock);
			
 
				-
			
 
				-	while (!list_empty(&fps->fps_pool_list)) {
			
 
				-		struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next,
			
 
				-						 struct kib_fmr_pool, fpo_list);
			
 
				-		fpo->fpo_failed = 1;
			
 
				-		list_del(&fpo->fpo_list);
			
 
				-		if (!fpo->fpo_map_count)
			
 
				-			list_add(&fpo->fpo_list, zombies);
			
 
				-		else
			
 
				-			list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&fps->fps_lock);
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps)
			
 
				-{
			
 
				-	if (fps->fps_net) { /* initialized? */
			
 
				-		kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
			
 
				-		kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts,
			
 
				-			struct kib_net *net,
			
 
				-			struct lnet_ioctl_config_o2iblnd_tunables *tunables)
			
 
				-{
			
 
				-	struct kib_fmr_pool *fpo;
			
 
				-	int rc;
			
 
				-
			
 
				-	memset(fps, 0, sizeof(*fps));
			
 
				-
			
 
				-	fps->fps_net = net;
			
 
				-	fps->fps_cpt = cpt;
			
 
				-
			
 
				-	fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
			
 
				-	fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
			
 
				-	fps->fps_cache = tunables->lnd_fmr_cache;
			
 
				-
			
 
				-	spin_lock_init(&fps->fps_lock);
			
 
				-	INIT_LIST_HEAD(&fps->fps_pool_list);
			
 
				-	INIT_LIST_HEAD(&fps->fps_failed_pool_list);
			
 
				-
			
 
				-	rc = kiblnd_create_fmr_pool(fps, &fpo);
			
 
				-	if (!rc)
			
 
				-		list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now)
			
 
				-{
			
 
				-	if (fpo->fpo_map_count) /* still in use */
			
 
				-		return 0;
			
 
				-	if (fpo->fpo_failed)
			
 
				-		return 1;
			
 
				-	return time_after_eq(now, fpo->fpo_deadline);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd)
			
 
				-{
			
 
				-	__u64 *pages = tx->tx_pages;
			
 
				-	struct kib_hca_dev *hdev;
			
 
				-	int npages;
			
 
				-	int size;
			
 
				-	int i;
			
 
				-
			
 
				-	hdev = tx->tx_pool->tpo_hdev;
			
 
				-
			
 
				-	for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
			
 
				-		for (size = 0; size <  rd->rd_frags[i].rf_nob;
			
 
				-		     size += hdev->ibh_page_size) {
			
 
				-			pages[npages++] = (rd->rd_frags[i].rf_addr &
			
 
				-					   hdev->ibh_page_mask) + size;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return npages;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct kib_fmr_pool *fpo = fmr->fmr_pool;
			
 
				-	struct kib_fmr_poolset *fps;
			
 
				-	unsigned long now = jiffies;
			
 
				-	struct kib_fmr_pool *tmp;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (!fpo)
			
 
				-		return;
			
 
				-
			
 
				-	fps = fpo->fpo_owner;
			
 
				-	if (fpo->fpo_is_fmr) {
			
 
				-		if (fmr->fmr_pfmr) {
			
 
				-			rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
			
 
				-			LASSERT(!rc);
			
 
				-			fmr->fmr_pfmr = NULL;
			
 
				-		}
			
 
				-
			
 
				-		if (status) {
			
 
				-			rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
			
 
				-			LASSERT(!rc);
			
 
				-		}
			
 
				-	} else {
			
 
				-		struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
			
 
				-
			
 
				-		if (frd) {
			
 
				-			frd->frd_valid = false;
			
 
				-			spin_lock(&fps->fps_lock);
			
 
				-			list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
			
 
				-			spin_unlock(&fps->fps_lock);
			
 
				-			fmr->fmr_frd = NULL;
			
 
				-		}
			
 
				-	}
			
 
				-	fmr->fmr_pool = NULL;
			
 
				-
			
 
				-	spin_lock(&fps->fps_lock);
			
 
				-	fpo->fpo_map_count--;  /* decref the pool */
			
 
				-
			
 
				-	list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) {
			
 
				-		/* the first pool is persistent */
			
 
				-		if (fps->fps_pool_list.next == &fpo->fpo_list)
			
 
				-			continue;
			
 
				-
			
 
				-		if (kiblnd_fmr_pool_is_idle(fpo, now)) {
			
 
				-			list_move(&fpo->fpo_list, &zombies);
			
 
				-			fps->fps_version++;
			
 
				-		}
			
 
				-	}
			
 
				-	spin_unlock(&fps->fps_lock);
			
 
				-
			
 
				-	if (!list_empty(&zombies))
			
 
				-		kiblnd_destroy_fmr_pool_list(&zombies);
			
 
				-}
			
 
				-
			
 
				-int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
			
 
				-			struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
			
 
				-			struct kib_fmr *fmr)
			
 
				-{
			
 
				-	__u64 *pages = tx->tx_pages;
			
 
				-	bool is_rx = (rd != tx->tx_rd);
			
 
				-	bool tx_pages_mapped = false;
			
 
				-	struct kib_fmr_pool *fpo;
			
 
				-	int npages = 0;
			
 
				-	__u64 version;
			
 
				-	int rc;
			
 
				-
			
 
				- again:
			
 
				-	spin_lock(&fps->fps_lock);
			
 
				-	version = fps->fps_version;
			
 
				-	list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
			
 
				-		fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
			
 
				-		fpo->fpo_map_count++;
			
 
				-
			
 
				-		if (fpo->fpo_is_fmr) {
			
 
				-			struct ib_pool_fmr *pfmr;
			
 
				-
			
 
				-			spin_unlock(&fps->fps_lock);
			
 
				-
			
 
				-			if (!tx_pages_mapped) {
			
 
				-				npages = kiblnd_map_tx_pages(tx, rd);
			
 
				-				tx_pages_mapped = 1;
			
 
				-			}
			
 
				-
			
 
				-			pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
			
 
				-						    pages, npages, iov);
			
 
				-			if (likely(!IS_ERR(pfmr))) {
			
 
				-				fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
			
 
				-						       pfmr->fmr->lkey;
			
 
				-				fmr->fmr_frd = NULL;
			
 
				-				fmr->fmr_pfmr = pfmr;
			
 
				-				fmr->fmr_pool = fpo;
			
 
				-				return 0;
			
 
				-			}
			
 
				-			rc = PTR_ERR(pfmr);
			
 
				-		} else {
			
 
				-			if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
			
 
				-				struct kib_fast_reg_descriptor *frd;
			
 
				-				struct ib_reg_wr *wr;
			
 
				-				struct ib_mr *mr;
			
 
				-				int n;
			
 
				-
			
 
				-				frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
			
 
				-						       struct kib_fast_reg_descriptor,
			
 
				-						       frd_list);
			
 
				-				list_del(&frd->frd_list);
			
 
				-				spin_unlock(&fps->fps_lock);
			
 
				-
			
 
				-				mr = frd->frd_mr;
			
 
				-
			
 
				-				if (!frd->frd_valid) {
			
 
				-					__u32 key = is_rx ? mr->rkey : mr->lkey;
			
 
				-					struct ib_send_wr *inv_wr;
			
 
				-
			
 
				-					inv_wr = &frd->frd_inv_wr;
			
 
				-					memset(inv_wr, 0, sizeof(*inv_wr));
			
 
				-					inv_wr->opcode = IB_WR_LOCAL_INV;
			
 
				-					inv_wr->wr_id = IBLND_WID_MR;
			
 
				-					inv_wr->ex.invalidate_rkey = key;
			
 
				-
			
 
				-					/* Bump the key */
			
 
				-					key = ib_inc_rkey(key);
			
 
				-					ib_update_fast_reg_key(mr, key);
			
 
				-				}
			
 
				-
			
 
				-				n = ib_map_mr_sg(mr, tx->tx_frags,
			
 
				-						 tx->tx_nfrags, NULL, PAGE_SIZE);
			
 
				-				if (unlikely(n != tx->tx_nfrags)) {
			
 
				-					CERROR("Failed to map mr %d/%d elements\n",
			
 
				-					       n, tx->tx_nfrags);
			
 
				-					return n < 0 ? n : -EINVAL;
			
 
				-				}
			
 
				-
			
 
				-				mr->iova = iov;
			
 
				-
			
 
				-				/* Prepare FastReg WR */
			
 
				-				wr = &frd->frd_fastreg_wr;
			
 
				-				memset(wr, 0, sizeof(*wr));
			
 
				-				wr->wr.opcode = IB_WR_REG_MR;
			
 
				-				wr->wr.wr_id = IBLND_WID_MR;
			
 
				-				wr->wr.num_sge = 0;
			
 
				-				wr->wr.send_flags = 0;
			
 
				-				wr->mr = mr;
			
 
				-				wr->key = is_rx ? mr->rkey : mr->lkey;
			
 
				-				wr->access = (IB_ACCESS_LOCAL_WRITE |
			
 
				-					      IB_ACCESS_REMOTE_WRITE);
			
 
				-
			
 
				-				fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
			
 
				-				fmr->fmr_frd = frd;
			
 
				-				fmr->fmr_pfmr = NULL;
			
 
				-				fmr->fmr_pool = fpo;
			
 
				-				return 0;
			
 
				-			}
			
 
				-			spin_unlock(&fps->fps_lock);
			
 
				-			rc = -EAGAIN;
			
 
				-		}
			
 
				-
			
 
				-		spin_lock(&fps->fps_lock);
			
 
				-		fpo->fpo_map_count--;
			
 
				-		if (rc != -EAGAIN) {
			
 
				-			spin_unlock(&fps->fps_lock);
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		/* EAGAIN and ... */
			
 
				-		if (version != fps->fps_version) {
			
 
				-			spin_unlock(&fps->fps_lock);
			
 
				-			goto again;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (fps->fps_increasing) {
			
 
				-		spin_unlock(&fps->fps_lock);
			
 
				-		CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n");
			
 
				-		schedule();
			
 
				-		goto again;
			
 
				-	}
			
 
				-
			
 
				-	if (time_before(jiffies, fps->fps_next_retry)) {
			
 
				-		/* someone failed recently */
			
 
				-		spin_unlock(&fps->fps_lock);
			
 
				-		return -EAGAIN;
			
 
				-	}
			
 
				-
			
 
				-	fps->fps_increasing = 1;
			
 
				-	spin_unlock(&fps->fps_lock);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Allocate new FMR pool\n");
			
 
				-	rc = kiblnd_create_fmr_pool(fps, &fpo);
			
 
				-	spin_lock(&fps->fps_lock);
			
 
				-	fps->fps_increasing = 0;
			
 
				-	if (!rc) {
			
 
				-		fps->fps_version++;
			
 
				-		list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
			
 
				-	} else {
			
 
				-		fps->fps_next_retry = jiffies + IBLND_POOL_RETRY * HZ;
			
 
				-	}
			
 
				-	spin_unlock(&fps->fps_lock);
			
 
				-
			
 
				-	goto again;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_fini_pool(struct kib_pool *pool)
			
 
				-{
			
 
				-	LASSERT(list_empty(&pool->po_free_list));
			
 
				-	LASSERT(!pool->po_allocated);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size)
			
 
				-{
			
 
				-	CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
			
 
				-
			
 
				-	memset(pool, 0, sizeof(*pool));
			
 
				-	INIT_LIST_HEAD(&pool->po_free_list);
			
 
				-	pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
			
 
				-	pool->po_owner    = ps;
			
 
				-	pool->po_size     = size;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_destroy_pool_list(struct list_head *head)
			
 
				-{
			
 
				-	struct kib_pool *pool;
			
 
				-
			
 
				-	while (!list_empty(head)) {
			
 
				-		pool = list_entry(head->next, struct kib_pool, po_list);
			
 
				-		list_del(&pool->po_list);
			
 
				-
			
 
				-		LASSERT(pool->po_owner);
			
 
				-		pool->po_owner->ps_pool_destroy(pool);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
			
 
				-{
			
 
				-	if (!ps->ps_net) /* initialized? */
			
 
				-		return;
			
 
				-
			
 
				-	spin_lock(&ps->ps_lock);
			
 
				-	while (!list_empty(&ps->ps_pool_list)) {
			
 
				-		struct kib_pool *po = list_entry(ps->ps_pool_list.next,
			
 
				-					    struct kib_pool, po_list);
			
 
				-		po->po_failed = 1;
			
 
				-		list_del(&po->po_list);
			
 
				-		if (!po->po_allocated)
			
 
				-			list_add(&po->po_list, zombies);
			
 
				-		else
			
 
				-			list_add(&po->po_list, &ps->ps_failed_pool_list);
			
 
				-	}
			
 
				-	spin_unlock(&ps->ps_lock);
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_fini_poolset(struct kib_poolset *ps)
			
 
				-{
			
 
				-	if (ps->ps_net) { /* initialized? */
			
 
				-		kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
			
 
				-		kiblnd_destroy_pool_list(&ps->ps_pool_list);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt,
			
 
				-			       struct kib_net *net, char *name, int size,
			
 
				-			       kib_ps_pool_create_t po_create,
			
 
				-			       kib_ps_pool_destroy_t po_destroy,
			
 
				-			       kib_ps_node_init_t nd_init,
			
 
				-			       kib_ps_node_fini_t nd_fini)
			
 
				-{
			
 
				-	struct kib_pool *pool;
			
 
				-	int rc;
			
 
				-
			
 
				-	memset(ps, 0, sizeof(*ps));
			
 
				-
			
 
				-	ps->ps_cpt          = cpt;
			
 
				-	ps->ps_net          = net;
			
 
				-	ps->ps_pool_create  = po_create;
			
 
				-	ps->ps_pool_destroy = po_destroy;
			
 
				-	ps->ps_node_init    = nd_init;
			
 
				-	ps->ps_node_fini    = nd_fini;
			
 
				-	ps->ps_pool_size    = size;
			
 
				-	if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
			
 
				-	    >= sizeof(ps->ps_name))
			
 
				-		return -E2BIG;
			
 
				-	spin_lock_init(&ps->ps_lock);
			
 
				-	INIT_LIST_HEAD(&ps->ps_pool_list);
			
 
				-	INIT_LIST_HEAD(&ps->ps_failed_pool_list);
			
 
				-
			
 
				-	rc = ps->ps_pool_create(ps, size, &pool);
			
 
				-	if (!rc)
			
 
				-		list_add(&pool->po_list, &ps->ps_pool_list);
			
 
				-	else
			
 
				-		CERROR("Failed to create the first pool for %s\n", ps->ps_name);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now)
			
 
				-{
			
 
				-	if (pool->po_allocated) /* still in use */
			
 
				-		return 0;
			
 
				-	if (pool->po_failed)
			
 
				-		return 1;
			
 
				-	return time_after_eq(now, pool->po_deadline);
			
 
				-}
			
 
				-
			
 
				-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct kib_poolset *ps = pool->po_owner;
			
 
				-	struct kib_pool *tmp;
			
 
				-	unsigned long now = jiffies;
			
 
				-
			
 
				-	spin_lock(&ps->ps_lock);
			
 
				-
			
 
				-	if (ps->ps_node_fini)
			
 
				-		ps->ps_node_fini(pool, node);
			
 
				-
			
 
				-	LASSERT(pool->po_allocated > 0);
			
 
				-	list_add(node, &pool->po_free_list);
			
 
				-	pool->po_allocated--;
			
 
				-
			
 
				-	list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
			
 
				-		/* the first pool is persistent */
			
 
				-		if (ps->ps_pool_list.next == &pool->po_list)
			
 
				-			continue;
			
 
				-
			
 
				-		if (kiblnd_pool_is_idle(pool, now))
			
 
				-			list_move(&pool->po_list, &zombies);
			
 
				-	}
			
 
				-	spin_unlock(&ps->ps_lock);
			
 
				-
			
 
				-	if (!list_empty(&zombies))
			
 
				-		kiblnd_destroy_pool_list(&zombies);
			
 
				-}
			
 
				-
			
 
				-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps)
			
 
				-{
			
 
				-	struct list_head *node;
			
 
				-	struct kib_pool *pool;
			
 
				-	unsigned int interval = 1;
			
 
				-	unsigned long time_before;
			
 
				-	unsigned int trips = 0;
			
 
				-	int rc;
			
 
				-
			
 
				- again:
			
 
				-	spin_lock(&ps->ps_lock);
			
 
				-	list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
			
 
				-		if (list_empty(&pool->po_free_list))
			
 
				-			continue;
			
 
				-
			
 
				-		pool->po_allocated++;
			
 
				-		pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
			
 
				-		node = pool->po_free_list.next;
			
 
				-		list_del(node);
			
 
				-
			
 
				-		if (ps->ps_node_init) {
			
 
				-			/* still hold the lock */
			
 
				-			ps->ps_node_init(pool, node);
			
 
				-		}
			
 
				-		spin_unlock(&ps->ps_lock);
			
 
				-		return node;
			
 
				-	}
			
 
				-
			
 
				-	/* no available tx pool and ... */
			
 
				-	if (ps->ps_increasing) {
			
 
				-		/* another thread is allocating a new pool */
			
 
				-		spin_unlock(&ps->ps_lock);
			
 
				-		trips++;
			
 
				-		CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n",
			
 
				-		       ps->ps_name, interval, trips);
			
 
				-
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		schedule_timeout(interval);
			
 
				-		if (interval < HZ)
			
 
				-			interval *= 2;
			
 
				-
			
 
				-		goto again;
			
 
				-	}
			
 
				-
			
 
				-	if (time_before(jiffies, ps->ps_next_retry)) {
			
 
				-		/* someone failed recently */
			
 
				-		spin_unlock(&ps->ps_lock);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	ps->ps_increasing = 1;
			
 
				-	spin_unlock(&ps->ps_lock);
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
			
 
				-	time_before = jiffies;
			
 
				-	rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
			
 
				-	CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete",
			
 
				-	       jiffies - time_before);
			
 
				-
			
 
				-	spin_lock(&ps->ps_lock);
			
 
				-	ps->ps_increasing = 0;
			
 
				-	if (!rc) {
			
 
				-		list_add_tail(&pool->po_list, &ps->ps_pool_list);
			
 
				-	} else {
			
 
				-		ps->ps_next_retry = jiffies + IBLND_POOL_RETRY * HZ;
			
 
				-		CERROR("Can't allocate new %s pool because out of memory\n",
			
 
				-		       ps->ps_name);
			
 
				-	}
			
 
				-	spin_unlock(&ps->ps_lock);
			
 
				-
			
 
				-	goto again;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_destroy_tx_pool(struct kib_pool *pool)
			
 
				-{
			
 
				-	struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool);
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!pool->po_allocated);
			
 
				-
			
 
				-	if (tpo->tpo_tx_pages) {
			
 
				-		kiblnd_unmap_tx_pool(tpo);
			
 
				-		kiblnd_free_pages(tpo->tpo_tx_pages);
			
 
				-	}
			
 
				-
			
 
				-	if (!tpo->tpo_tx_descs)
			
 
				-		goto out;
			
 
				-
			
 
				-	for (i = 0; i < pool->po_size; i++) {
			
 
				-		struct kib_tx *tx = &tpo->tpo_tx_descs[i];
			
 
				-
			
 
				-		list_del(&tx->tx_list);
			
 
				-		kfree(tx->tx_pages);
			
 
				-		kfree(tx->tx_frags);
			
 
				-		kfree(tx->tx_wrq);
			
 
				-		kfree(tx->tx_sge);
			
 
				-		kfree(tx->tx_rd);
			
 
				-	}
			
 
				-
			
 
				-	kfree(tpo->tpo_tx_descs);
			
 
				-out:
			
 
				-	kiblnd_fini_pool(pool);
			
 
				-	kfree(tpo);
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_tx_pool_size(int ncpts)
			
 
				-{
			
 
				-	int ntx = *kiblnd_tunables.kib_ntx / ncpts;
			
 
				-
			
 
				-	return max(IBLND_TX_POOL, ntx);
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size,
			
 
				-				 struct kib_pool **pp_po)
			
 
				-{
			
 
				-	int i;
			
 
				-	int npg;
			
 
				-	struct kib_pool *pool;
			
 
				-	struct kib_tx_pool *tpo;
			
 
				-
			
 
				-	tpo = kzalloc_cpt(sizeof(*tpo), GFP_NOFS, ps->ps_cpt);
			
 
				-	if (!tpo) {
			
 
				-		CERROR("Failed to allocate TX pool\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	pool = &tpo->tpo_pool;
			
 
				-	kiblnd_init_pool(ps, pool, size);
			
 
				-	tpo->tpo_tx_descs = NULL;
			
 
				-	tpo->tpo_tx_pages = NULL;
			
 
				-
			
 
				-	npg = DIV_ROUND_UP(size * IBLND_MSG_SIZE, PAGE_SIZE);
			
 
				-	if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) {
			
 
				-		CERROR("Can't allocate tx pages: %d\n", npg);
			
 
				-		kfree(tpo);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	tpo->tpo_tx_descs = kzalloc_cpt(size * sizeof(struct kib_tx),
			
 
				-					GFP_NOFS, ps->ps_cpt);
			
 
				-	if (!tpo->tpo_tx_descs) {
			
 
				-		CERROR("Can't allocate %d tx descriptors\n", size);
			
 
				-		ps->ps_pool_destroy(pool);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx));
			
 
				-
			
 
				-	for (i = 0; i < size; i++) {
			
 
				-		struct kib_tx *tx = &tpo->tpo_tx_descs[i];
			
 
				-
			
 
				-		tx->tx_pool = tpo;
			
 
				-		if (ps->ps_net->ibn_fmr_ps) {
			
 
				-			tx->tx_pages = kzalloc_cpt(LNET_MAX_IOV * sizeof(*tx->tx_pages),
			
 
				-						   GFP_NOFS, ps->ps_cpt);
			
 
				-			if (!tx->tx_pages)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		tx->tx_frags = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				-					   sizeof(*tx->tx_frags),
			
 
				-					   GFP_NOFS, ps->ps_cpt);
			
 
				-		if (!tx->tx_frags)
			
 
				-			break;
			
 
				-
			
 
				-		sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1);
			
 
				-
			
 
				-		tx->tx_wrq = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				-					 sizeof(*tx->tx_wrq),
			
 
				-					 GFP_NOFS, ps->ps_cpt);
			
 
				-		if (!tx->tx_wrq)
			
 
				-			break;
			
 
				-
			
 
				-		tx->tx_sge = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				-					 sizeof(*tx->tx_sge),
			
 
				-					 GFP_NOFS, ps->ps_cpt);
			
 
				-		if (!tx->tx_sge)
			
 
				-			break;
			
 
				-
			
 
				-		tx->tx_rd = kzalloc_cpt(offsetof(struct kib_rdma_desc,
			
 
				-						 rd_frags[IBLND_MAX_RDMA_FRAGS]),
			
 
				-					GFP_NOFS, ps->ps_cpt);
			
 
				-		if (!tx->tx_rd)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (i == size) {
			
 
				-		kiblnd_map_tx_pool(tpo);
			
 
				-		*pp_po = pool;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	ps->ps_pool_destroy(pool);
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node)
			
 
				-{
			
 
				-	struct kib_tx_poolset *tps = container_of(pool->po_owner,
			
 
				-						  struct kib_tx_poolset,
			
 
				-						  tps_poolset);
			
 
				-	struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list);
			
 
				-
			
 
				-	tx->tx_cookie = tps->tps_next_tx_cookie++;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_net_fini_pools(struct kib_net *net)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	cfs_cpt_for_each(i, lnet_cpt_table()) {
			
 
				-		struct kib_tx_poolset *tps;
			
 
				-		struct kib_fmr_poolset *fps;
			
 
				-
			
 
				-		if (net->ibn_tx_ps) {
			
 
				-			tps = net->ibn_tx_ps[i];
			
 
				-			kiblnd_fini_poolset(&tps->tps_poolset);
			
 
				-		}
			
 
				-
			
 
				-		if (net->ibn_fmr_ps) {
			
 
				-			fps = net->ibn_fmr_ps[i];
			
 
				-			kiblnd_fini_fmr_poolset(fps);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (net->ibn_tx_ps) {
			
 
				-		cfs_percpt_free(net->ibn_tx_ps);
			
 
				-		net->ibn_tx_ps = NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (net->ibn_fmr_ps) {
			
 
				-		cfs_percpt_free(net->ibn_fmr_ps);
			
 
				-		net->ibn_fmr_ps = NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni,
			
 
				-				 __u32 *cpts, int ncpts)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
			
 
				-
			
 
				-	if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
			
 
				-		CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
			
 
				-		       tunables->lnd_fmr_pool_size,
			
 
				-		       *kiblnd_tunables.kib_ntx / 4);
			
 
				-		rc = -EINVAL;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * TX pool must be created later than FMR, see LU-2268
			
 
				-	 * for details
			
 
				-	 */
			
 
				-	LASSERT(!net->ibn_tx_ps);
			
 
				-
			
 
				-	/*
			
 
				-	 * premapping can fail if ibd_nmr > 1, so we always create
			
 
				-	 * FMR pool and map-on-demand if premapping failed
			
 
				-	 *
			
 
				-	 * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset
			
 
				-	 * The number of struct kib_fmr_poolsets create is equal to the
			
 
				-	 * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt].
			
 
				-	 */
			
 
				-	net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-					   sizeof(struct kib_fmr_poolset));
			
 
				-	if (!net->ibn_fmr_ps) {
			
 
				-		CERROR("Failed to allocate FMR pool array\n");
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < ncpts; i++) {
			
 
				-		cpt = !cpts ? i : cpts[i];
			
 
				-		rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
			
 
				-					     net, tunables);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't initialize FMR pool for CPT %d: %d\n",
			
 
				-			       cpt, rc);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (i > 0)
			
 
				-		LASSERT(i == ncpts);
			
 
				-
			
 
				-	/*
			
 
				-	 * cfs_precpt_alloc is creating an array of struct kib_tx_poolset
			
 
				-	 * The number of struct kib_tx_poolsets create is equal to the
			
 
				-	 * number of CPTs that exist, i.e net->ibn_tx_ps[cpt].
			
 
				-	 */
			
 
				-	net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-					  sizeof(struct kib_tx_poolset));
			
 
				-	if (!net->ibn_tx_ps) {
			
 
				-		CERROR("Failed to allocate tx pool array\n");
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < ncpts; i++) {
			
 
				-		cpt = !cpts ? i : cpts[i];
			
 
				-		rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset,
			
 
				-					 cpt, net, "TX",
			
 
				-					 kiblnd_tx_pool_size(ncpts),
			
 
				-					 kiblnd_create_tx_pool,
			
 
				-					 kiblnd_destroy_tx_pool,
			
 
				-					 kiblnd_tx_init, NULL);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't initialize TX pool for CPT %d: %d\n",
			
 
				-			       cpt, rc);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				- failed:
			
 
				-	kiblnd_net_fini_pools(net);
			
 
				-	LASSERT(rc);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
			
 
				-{
			
 
				-	/*
			
 
				-	 * It's safe to assume a HCA can handle a page size
			
 
				-	 * matching that of the native system
			
 
				-	 */
			
 
				-	hdev->ibh_page_shift = PAGE_SHIFT;
			
 
				-	hdev->ibh_page_size  = 1 << PAGE_SHIFT;
			
 
				-	hdev->ibh_page_mask  = ~((__u64)hdev->ibh_page_size - 1);
			
 
				-
			
 
				-	hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
			
 
				-	if (hdev->ibh_mr_size == ~0ULL) {
			
 
				-		hdev->ibh_mr_shift = 64;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size);
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
			
 
				-{
			
 
				-	if (hdev->ibh_pd)
			
 
				-		ib_dealloc_pd(hdev->ibh_pd);
			
 
				-
			
 
				-	if (hdev->ibh_cmid)
			
 
				-		rdma_destroy_id(hdev->ibh_cmid);
			
 
				-
			
 
				-	kfree(hdev);
			
 
				-}
			
 
				-
			
 
				-/* DUMMY */
			
 
				-static int kiblnd_dummy_callback(struct rdma_cm_id *cmid,
			
 
				-				 struct rdma_cm_event *event)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_dev_need_failover(struct kib_dev *dev)
			
 
				-{
			
 
				-	struct rdma_cm_id *cmid;
			
 
				-	struct sockaddr_in srcaddr;
			
 
				-	struct sockaddr_in dstaddr;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (!dev->ibd_hdev || /* initializing */
			
 
				-	    !dev->ibd_hdev->ibh_cmid || /* listener is dead */
			
 
				-	    *kiblnd_tunables.kib_dev_failover > 1) /* debugging */
			
 
				-		return 1;
			
 
				-
			
 
				-	/*
			
 
				-	 * XXX: it's UGLY, but I don't have better way to find
			
 
				-	 * ib-bonding HCA failover because:
			
 
				-	 *
			
 
				-	 * a. no reliable CM event for HCA failover...
			
 
				-	 * b. no OFED API to get ib_device for current net_device...
			
 
				-	 *
			
 
				-	 * We have only two choices at this point:
			
 
				-	 *
			
 
				-	 * a. rdma_bind_addr(), it will conflict with listener cmid
			
 
				-	 * b. rdma_resolve_addr() to zero addr
			
 
				-	 */
			
 
				-	cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
			
 
				-				     IB_QPT_RC);
			
 
				-	if (IS_ERR(cmid)) {
			
 
				-		rc = PTR_ERR(cmid);
			
 
				-		CERROR("Failed to create cmid for failover: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	memset(&srcaddr, 0, sizeof(srcaddr));
			
 
				-	srcaddr.sin_family = AF_INET;
			
 
				-	srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
			
 
				-
			
 
				-	memset(&dstaddr, 0, sizeof(dstaddr));
			
 
				-	dstaddr.sin_family = AF_INET;
			
 
				-	rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr,
			
 
				-			       (struct sockaddr *)&dstaddr, 1);
			
 
				-	if (rc || !cmid->device) {
			
 
				-		CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
			
 
				-		       dev->ibd_ifname, &dev->ibd_ifip,
			
 
				-		       cmid->device, rc);
			
 
				-		rdma_destroy_id(cmid);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */
			
 
				-	rdma_destroy_id(cmid);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int kiblnd_dev_failover(struct kib_dev *dev)
			
 
				-{
			
 
				-	LIST_HEAD(zombie_tpo);
			
 
				-	LIST_HEAD(zombie_ppo);
			
 
				-	LIST_HEAD(zombie_fpo);
			
 
				-	struct rdma_cm_id *cmid  = NULL;
			
 
				-	struct kib_hca_dev *hdev  = NULL;
			
 
				-	struct ib_pd *pd;
			
 
				-	struct kib_net *net;
			
 
				-	struct sockaddr_in addr;
			
 
				-	unsigned long flags;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
			
 
				-		dev->ibd_can_failover || !dev->ibd_hdev);
			
 
				-
			
 
				-	rc = kiblnd_dev_need_failover(dev);
			
 
				-	if (rc <= 0)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (dev->ibd_hdev &&
			
 
				-	    dev->ibd_hdev->ibh_cmid) {
			
 
				-		/*
			
 
				-		 * XXX it's not good to close old listener at here,
			
 
				-		 * because we can fail to create new listener.
			
 
				-		 * But we have to close it now, otherwise rdma_bind_addr
			
 
				-		 * will return EADDRINUSE... How crap!
			
 
				-		 */
			
 
				-		write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-		cmid = dev->ibd_hdev->ibh_cmid;
			
 
				-		/*
			
 
				-		 * make next schedule of kiblnd_dev_need_failover()
			
 
				-		 * return 1 for me
			
 
				-		 */
			
 
				-		dev->ibd_hdev->ibh_cmid  = NULL;
			
 
				-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-		rdma_destroy_id(cmid);
			
 
				-	}
			
 
				-
			
 
				-	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
			
 
				-				     IB_QPT_RC);
			
 
				-	if (IS_ERR(cmid)) {
			
 
				-		rc = PTR_ERR(cmid);
			
 
				-		CERROR("Failed to create cmid for failover: %d\n", rc);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	memset(&addr, 0, sizeof(addr));
			
 
				-	addr.sin_family      = AF_INET;
			
 
				-	addr.sin_addr.s_addr = htonl(dev->ibd_ifip);
			
 
				-	addr.sin_port	= htons(*kiblnd_tunables.kib_service);
			
 
				-
			
 
				-	/* Bind to failover device or port */
			
 
				-	rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
			
 
				-	if (rc || !cmid->device) {
			
 
				-		CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
			
 
				-		       dev->ibd_ifname, &dev->ibd_ifip,
			
 
				-		       cmid->device, rc);
			
 
				-		rdma_destroy_id(cmid);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	hdev = kzalloc(sizeof(*hdev), GFP_NOFS);
			
 
				-	if (!hdev) {
			
 
				-		CERROR("Failed to allocate kib_hca_dev\n");
			
 
				-		rdma_destroy_id(cmid);
			
 
				-		rc = -ENOMEM;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	atomic_set(&hdev->ibh_ref, 1);
			
 
				-	hdev->ibh_dev   = dev;
			
 
				-	hdev->ibh_cmid  = cmid;
			
 
				-	hdev->ibh_ibdev = cmid->device;
			
 
				-
			
 
				-	pd = ib_alloc_pd(cmid->device, 0);
			
 
				-	if (IS_ERR(pd)) {
			
 
				-		rc = PTR_ERR(pd);
			
 
				-		CERROR("Can't allocate PD: %d\n", rc);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	hdev->ibh_pd = pd;
			
 
				-
			
 
				-	rc = rdma_listen(cmid, 0);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't start new listener: %d\n", rc);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = kiblnd_hdev_get_attr(hdev);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't get device attributes: %d\n", rc);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	swap(dev->ibd_hdev, hdev); /* take over the refcount */
			
 
				-
			
 
				-	list_for_each_entry(net, &dev->ibd_nets, ibn_list) {
			
 
				-		cfs_cpt_for_each(i, lnet_cpt_table()) {
			
 
				-			kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset,
			
 
				-					    &zombie_tpo);
			
 
				-
			
 
				-			if (net->ibn_fmr_ps)
			
 
				-				kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i],
			
 
				-							&zombie_fpo);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				- out:
			
 
				-	if (!list_empty(&zombie_tpo))
			
 
				-		kiblnd_destroy_pool_list(&zombie_tpo);
			
 
				-	if (!list_empty(&zombie_ppo))
			
 
				-		kiblnd_destroy_pool_list(&zombie_ppo);
			
 
				-	if (!list_empty(&zombie_fpo))
			
 
				-		kiblnd_destroy_fmr_pool_list(&zombie_fpo);
			
 
				-	if (hdev)
			
 
				-		kiblnd_hdev_decref(hdev);
			
 
				-
			
 
				-	if (rc)
			
 
				-		dev->ibd_failed_failover++;
			
 
				-	else
			
 
				-		dev->ibd_failed_failover = 0;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_destroy_dev(struct kib_dev *dev)
			
 
				-{
			
 
				-	LASSERT(!dev->ibd_nnets);
			
 
				-	LASSERT(list_empty(&dev->ibd_nets));
			
 
				-
			
 
				-	list_del(&dev->ibd_fail_list);
			
 
				-	list_del(&dev->ibd_list);
			
 
				-
			
 
				-	if (dev->ibd_hdev)
			
 
				-		kiblnd_hdev_decref(dev->ibd_hdev);
			
 
				-
			
 
				-	kfree(dev);
			
 
				-}
			
 
				-
			
 
				-static struct kib_dev *kiblnd_create_dev(char *ifname)
			
 
				-{
			
 
				-	struct net_device *netdev;
			
 
				-	struct kib_dev *dev;
			
 
				-	__u32 netmask;
			
 
				-	__u32 ip;
			
 
				-	int up;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lnet_ipif_query(ifname, &up, &ip, &netmask);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't query IPoIB interface %s: %d\n",
			
 
				-		       ifname, rc);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (!up) {
			
 
				-		CERROR("Can't query IPoIB interface %s: it's down\n", ifname);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	dev = kzalloc(sizeof(*dev), GFP_NOFS);
			
 
				-	if (!dev)
			
 
				-		return NULL;
			
 
				-
			
 
				-	netdev = dev_get_by_name(&init_net, ifname);
			
 
				-	if (!netdev) {
			
 
				-		dev->ibd_can_failover = 0;
			
 
				-	} else {
			
 
				-		dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER);
			
 
				-		dev_put(netdev);
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&dev->ibd_nets);
			
 
				-	INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */
			
 
				-	INIT_LIST_HEAD(&dev->ibd_fail_list);
			
 
				-	dev->ibd_ifip = ip;
			
 
				-	strcpy(&dev->ibd_ifname[0], ifname);
			
 
				-
			
 
				-	/* initialize the device */
			
 
				-	rc = kiblnd_dev_failover(dev);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't initialize device: %d\n", rc);
			
 
				-		kfree(dev);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs);
			
 
				-	return dev;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_base_shutdown(void)
			
 
				-{
			
 
				-	struct kib_sched_info *sched;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(list_empty(&kiblnd_data.kib_devs));
			
 
				-
			
 
				-	switch (kiblnd_data.kib_init) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case IBLND_INIT_ALL:
			
 
				-	case IBLND_INIT_DATA:
			
 
				-		LASSERT(kiblnd_data.kib_peers);
			
 
				-		for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
			
 
				-			LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
			
 
				-		LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
			
 
				-		LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
			
 
				-		LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
			
 
				-		LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
			
 
				-
			
 
				-		/* flag threads to terminate; wake and wait for them to die */
			
 
				-		kiblnd_data.kib_shutdown = 1;
			
 
				-
			
 
				-		/*
			
 
				-		 * NB: we really want to stop scheduler threads net by net
			
 
				-		 * instead of the whole module, this should be improved
			
 
				-		 * with dynamic configuration LNet
			
 
				-		 */
			
 
				-		cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds)
			
 
				-			wake_up_all(&sched->ibs_waitq);
			
 
				-
			
 
				-		wake_up_all(&kiblnd_data.kib_connd_waitq);
			
 
				-		wake_up_all(&kiblnd_data.kib_failover_waitq);
			
 
				-
			
 
				-		i = 2;
			
 
				-		while (atomic_read(&kiblnd_data.kib_nthreads)) {
			
 
				-			i++;
			
 
				-			/* power of 2 ? */
			
 
				-			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
			
 
				-			       "Waiting for %d threads to terminate\n",
			
 
				-			       atomic_read(&kiblnd_data.kib_nthreads));
			
 
				-			set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-			schedule_timeout(HZ);
			
 
				-		}
			
 
				-
			
 
				-		/* fall through */
			
 
				-
			
 
				-	case IBLND_INIT_NOTHING:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	kvfree(kiblnd_data.kib_peers);
			
 
				-
			
 
				-	if (kiblnd_data.kib_scheds)
			
 
				-		cfs_percpt_free(kiblnd_data.kib_scheds);
			
 
				-
			
 
				-	kiblnd_data.kib_init = IBLND_INIT_NOTHING;
			
 
				-	module_put(THIS_MODULE);
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_shutdown(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
			
 
				-	int i;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
			
 
				-
			
 
				-	if (!net)
			
 
				-		goto out;
			
 
				-
			
 
				-	write_lock_irqsave(g_lock, flags);
			
 
				-	net->ibn_shutdown = 1;
			
 
				-	write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-	switch (net->ibn_init) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case IBLND_INIT_ALL:
			
 
				-		/* nuke all existing peers within this net */
			
 
				-		kiblnd_del_peer(ni, LNET_NID_ANY);
			
 
				-
			
 
				-		/* Wait for all peer state to clean up */
			
 
				-		i = 2;
			
 
				-		while (atomic_read(&net->ibn_npeers)) {
			
 
				-			i++;
			
 
				-			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
			
 
				-			       "%s: waiting for %d peers to disconnect\n",
			
 
				-			       libcfs_nid2str(ni->ni_nid),
			
 
				-			       atomic_read(&net->ibn_npeers));
			
 
				-			set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-			schedule_timeout(HZ);
			
 
				-		}
			
 
				-
			
 
				-		kiblnd_net_fini_pools(net);
			
 
				-
			
 
				-		write_lock_irqsave(g_lock, flags);
			
 
				-		LASSERT(net->ibn_dev->ibd_nnets > 0);
			
 
				-		net->ibn_dev->ibd_nnets--;
			
 
				-		list_del(&net->ibn_list);
			
 
				-		write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-		/* fall through */
			
 
				-
			
 
				-	case IBLND_INIT_NOTHING:
			
 
				-		LASSERT(!atomic_read(&net->ibn_nconns));
			
 
				-
			
 
				-		if (net->ibn_dev && !net->ibn_dev->ibd_nnets)
			
 
				-			kiblnd_destroy_dev(net->ibn_dev);
			
 
				-
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	net->ibn_init = IBLND_INIT_NOTHING;
			
 
				-	ni->ni_data = NULL;
			
 
				-
			
 
				-	kfree(net);
			
 
				-
			
 
				-out:
			
 
				-	if (list_empty(&kiblnd_data.kib_devs))
			
 
				-		kiblnd_base_shutdown();
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_base_startup(void)
			
 
				-{
			
 
				-	struct kib_sched_info *sched;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING);
			
 
				-
			
 
				-	try_module_get(THIS_MODULE);
			
 
				-	/* zero pointers, flags etc */
			
 
				-	memset(&kiblnd_data, 0, sizeof(kiblnd_data));
			
 
				-
			
 
				-	rwlock_init(&kiblnd_data.kib_global_lock);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&kiblnd_data.kib_devs);
			
 
				-	INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
			
 
				-
			
 
				-	kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
			
 
				-	kiblnd_data.kib_peers = kvmalloc_array(kiblnd_data.kib_peer_hash_size,
			
 
				-					       sizeof(struct list_head),
			
 
				-					       GFP_KERNEL);
			
 
				-	if (!kiblnd_data.kib_peers)
			
 
				-		goto failed;
			
 
				-	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
			
 
				-		INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
			
 
				-
			
 
				-	spin_lock_init(&kiblnd_data.kib_connd_lock);
			
 
				-	INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
			
 
				-	INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
			
 
				-	INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
			
 
				-	INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
			
 
				-
			
 
				-	init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
			
 
				-	init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
			
 
				-
			
 
				-	kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-						  sizeof(*sched));
			
 
				-	if (!kiblnd_data.kib_scheds)
			
 
				-		goto failed;
			
 
				-
			
 
				-	cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
			
 
				-		int nthrs;
			
 
				-
			
 
				-		spin_lock_init(&sched->ibs_lock);
			
 
				-		INIT_LIST_HEAD(&sched->ibs_conns);
			
 
				-		init_waitqueue_head(&sched->ibs_waitq);
			
 
				-
			
 
				-		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
			
 
				-		if (*kiblnd_tunables.kib_nscheds > 0) {
			
 
				-			nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds);
			
 
				-		} else {
			
 
				-			/*
			
 
				-			 * max to half of CPUs, another half is reserved for
			
 
				-			 * upper layer modules
			
 
				-			 */
			
 
				-			nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
			
 
				-		}
			
 
				-
			
 
				-		sched->ibs_nthreads_max = nthrs;
			
 
				-		sched->ibs_cpt = i;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
			
 
				-
			
 
				-	/* lists/ptrs/locks initialised */
			
 
				-	kiblnd_data.kib_init = IBLND_INIT_DATA;
			
 
				-	/*****************************************************/
			
 
				-
			
 
				-	rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd");
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't spawn o2iblnd connd: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (*kiblnd_tunables.kib_dev_failover)
			
 
				-		rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
			
 
				-					 "kiblnd_failover");
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't spawn o2iblnd failover thread: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* flag everything initialised */
			
 
				-	kiblnd_data.kib_init = IBLND_INIT_ALL;
			
 
				-	/*****************************************************/
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	kiblnd_base_shutdown();
			
 
				-	return -ENETDOWN;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_start_schedulers(struct kib_sched_info *sched)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	int nthrs;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!sched->ibs_nthreads) {
			
 
				-		if (*kiblnd_tunables.kib_nscheds > 0) {
			
 
				-			nthrs = sched->ibs_nthreads_max;
			
 
				-		} else {
			
 
				-			nthrs = cfs_cpt_weight(lnet_cpt_table(),
			
 
				-					       sched->ibs_cpt);
			
 
				-			nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
			
 
				-			nthrs = min(IBLND_N_SCHED_HIGH, nthrs);
			
 
				-		}
			
 
				-	} else {
			
 
				-		LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max);
			
 
				-		/* increase one thread if there is new interface */
			
 
				-		nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < nthrs; i++) {
			
 
				-		long id;
			
 
				-		char name[20];
			
 
				-
			
 
				-		id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
			
 
				-		snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
			
 
				-			 KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
			
 
				-		rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
			
 
				-		if (!rc)
			
 
				-			continue;
			
 
				-
			
 
				-		CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
			
 
				-		       sched->ibs_cpt, sched->ibs_nthreads + i, rc);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	sched->ibs_nthreads += i;
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts,
			
 
				-				    int ncpts)
			
 
				-{
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < ncpts; i++) {
			
 
				-		struct kib_sched_info *sched;
			
 
				-
			
 
				-		cpt = !cpts ? i : cpts[i];
			
 
				-		sched = kiblnd_data.kib_scheds[cpt];
			
 
				-
			
 
				-		if (!newdev && sched->ibs_nthreads > 0)
			
 
				-			continue;
			
 
				-
			
 
				-		rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]);
			
 
				-		if (rc) {
			
 
				-			CERROR("Failed to start scheduler threads for %s\n",
			
 
				-			       dev->ibd_ifname);
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct kib_dev *kiblnd_dev_search(char *ifname)
			
 
				-{
			
 
				-	struct kib_dev *alias = NULL;
			
 
				-	struct kib_dev *dev;
			
 
				-	char *colon;
			
 
				-	char *colon2;
			
 
				-
			
 
				-	colon = strchr(ifname, ':');
			
 
				-	list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
			
 
				-		if (!strcmp(&dev->ibd_ifname[0], ifname))
			
 
				-			return dev;
			
 
				-
			
 
				-		if (alias)
			
 
				-			continue;
			
 
				-
			
 
				-		colon2 = strchr(dev->ibd_ifname, ':');
			
 
				-		if (colon)
			
 
				-			*colon = 0;
			
 
				-		if (colon2)
			
 
				-			*colon2 = 0;
			
 
				-
			
 
				-		if (!strcmp(&dev->ibd_ifname[0], ifname))
			
 
				-			alias = dev;
			
 
				-
			
 
				-		if (colon)
			
 
				-			*colon = ':';
			
 
				-		if (colon2)
			
 
				-			*colon2 = ':';
			
 
				-	}
			
 
				-	return alias;
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_startup(struct lnet_ni *ni)
			
 
				-{
			
 
				-	char *ifname;
			
 
				-	struct kib_dev *ibdev = NULL;
			
 
				-	struct kib_net *net;
			
 
				-	struct timespec64 tv;
			
 
				-	unsigned long flags;
			
 
				-	int rc;
			
 
				-	int newdev;
			
 
				-
			
 
				-	LASSERT(ni->ni_lnd == &the_o2iblnd);
			
 
				-
			
 
				-	if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
			
 
				-		rc = kiblnd_base_startup();
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-	}
			
 
				-
			
 
				-	net = kzalloc(sizeof(*net), GFP_NOFS);
			
 
				-	ni->ni_data = net;
			
 
				-	if (!net)
			
 
				-		goto net_failed;
			
 
				-
			
 
				-	ktime_get_real_ts64(&tv);
			
 
				-	net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC +
			
 
				-			       tv.tv_nsec / NSEC_PER_USEC;
			
 
				-
			
 
				-	rc = kiblnd_tunables_setup(ni);
			
 
				-	if (rc)
			
 
				-		goto net_failed;
			
 
				-
			
 
				-	if (ni->ni_interfaces[0]) {
			
 
				-		/* Use the IPoIB interface specified in 'networks=' */
			
 
				-
			
 
				-		BUILD_BUG_ON(LNET_MAX_INTERFACES <= 1);
			
 
				-		if (ni->ni_interfaces[1]) {
			
 
				-			CERROR("Multiple interfaces not supported\n");
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		ifname = ni->ni_interfaces[0];
			
 
				-	} else {
			
 
				-		ifname = *kiblnd_tunables.kib_default_ipif;
			
 
				-	}
			
 
				-
			
 
				-	if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
			
 
				-		CERROR("IPoIB interface name too long: %s\n", ifname);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	ibdev = kiblnd_dev_search(ifname);
			
 
				-
			
 
				-	newdev = !ibdev;
			
 
				-	/* hmm...create kib_dev even for alias */
			
 
				-	if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname))
			
 
				-		ibdev = kiblnd_create_dev(ifname);
			
 
				-
			
 
				-	if (!ibdev)
			
 
				-		goto failed;
			
 
				-
			
 
				-	net->ibn_dev = ibdev;
			
 
				-	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
			
 
				-
			
 
				-	rc = kiblnd_dev_start_threads(ibdev, newdev,
			
 
				-				      ni->ni_cpts, ni->ni_ncpts);
			
 
				-	if (rc)
			
 
				-		goto failed;
			
 
				-
			
 
				-	rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
			
 
				-	if (rc) {
			
 
				-		CERROR("Failed to initialize NI pools: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-	ibdev->ibd_nnets++;
			
 
				-	list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	net->ibn_init = IBLND_INIT_ALL;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-failed:
			
 
				-	if (!net->ibn_dev && ibdev)
			
 
				-		kiblnd_destroy_dev(ibdev);
			
 
				-
			
 
				-net_failed:
			
 
				-	kiblnd_shutdown(ni);
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s failed\n", __func__);
			
 
				-	return -ENETDOWN;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_lnd the_o2iblnd = {
			
 
				-	.lnd_type	= O2IBLND,
			
 
				-	.lnd_startup	= kiblnd_startup,
			
 
				-	.lnd_shutdown	= kiblnd_shutdown,
			
 
				-	.lnd_ctl	= kiblnd_ctl,
			
 
				-	.lnd_query	= kiblnd_query,
			
 
				-	.lnd_send	= kiblnd_send,
			
 
				-	.lnd_recv	= kiblnd_recv,
			
 
				-};
			
 
				-
			
 
				-static void __exit ko2iblnd_exit(void)
			
 
				-{
			
 
				-	lnet_unregister_lnd(&the_o2iblnd);
			
 
				-}
			
 
				-
			
 
				-static int __init ko2iblnd_init(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE);
			
 
				-	BUILD_BUG_ON(offsetof(struct kib_msg,
			
 
				-			  ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
			
 
				-			  > IBLND_MSG_SIZE);
			
 
				-	BUILD_BUG_ON(offsetof(struct kib_msg,
			
 
				-			  ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
			
 
				-			  > IBLND_MSG_SIZE);
			
 
				-
			
 
				-	kiblnd_tunables_init();
			
 
				-
			
 
				-	rc = libcfs_setup();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	lnet_register_lnd(&the_o2iblnd);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
			
 
				-MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver");
			
 
				-MODULE_VERSION("2.7.0");
			
 
				-MODULE_LICENSE("GPL");
			
 
				-
			
 
				-module_init(ko2iblnd_init);
			
 
				-module_exit(ko2iblnd_exit);
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -1,1048 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/klnds/o2iblnd/o2iblnd.h
			
 
				- *
			
 
				- * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/stat.h>
			
 
				-#include <linux/errno.h>
			
 
				-#include <linux/unistd.h>
			
 
				-#include <linux/uio.h>
			
 
				-#include <linux/uaccess.h>
			
 
				-
			
 
				-#include <linux/io.h>
			
 
				-
			
 
				-#include <linux/fs.h>
			
 
				-#include <linux/file.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <linux/kmod.h>
			
 
				-#include <linux/sysctl.h>
			
 
				-#include <linux/pci.h>
			
 
				-
			
 
				-#include <net/sock.h>
			
 
				-#include <linux/in.h>
			
 
				-
			
 
				-#include <rdma/rdma_cm.h>
			
 
				-#include <rdma/ib_cm.h>
			
 
				-#include <rdma/ib_verbs.h>
			
 
				-#include <rdma/ib_fmr_pool.h>
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LND
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-#define IBLND_PEER_HASH_SIZE		101	/* # peer lists */
			
 
				-/* # scheduler loops before reschedule */
			
 
				-#define IBLND_RESCHED			100
			
 
				-
			
 
				-#define IBLND_N_SCHED			2
			
 
				-#define IBLND_N_SCHED_HIGH		4
			
 
				-
			
 
				-struct kib_tunables {
			
 
				-	int *kib_dev_failover;           /* HCA failover */
			
 
				-	unsigned int *kib_service;       /* IB service number */
			
 
				-	int *kib_min_reconnect_interval; /* first failed connection retry... */
			
 
				-	int *kib_max_reconnect_interval; /* exponentially increasing to this */
			
 
				-	int *kib_cksum;                  /* checksum struct kib_msg? */
			
 
				-	int *kib_timeout;                /* comms timeout (seconds) */
			
 
				-	int *kib_keepalive;              /* keepalive timeout (seconds) */
			
 
				-	int *kib_ntx;                    /* # tx descs */
			
 
				-	char **kib_default_ipif;         /* default IPoIB interface */
			
 
				-	int *kib_retry_count;
			
 
				-	int *kib_rnr_retry_count;
			
 
				-	int *kib_ib_mtu;                 /* IB MTU */
			
 
				-	int *kib_require_priv_port;      /* accept only privileged ports */
			
 
				-	int *kib_use_priv_port; /* use privileged port for active connect */
			
 
				-	int *kib_nscheds;                /* # threads on each CPT */
			
 
				-};
			
 
				-
			
 
				-extern struct kib_tunables  kiblnd_tunables;
			
 
				-
			
 
				-#define IBLND_MSG_QUEUE_SIZE_V1   8 /* V1 only : # messages/RDMAs in-flight */
			
 
				-#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
			
 
				-
			
 
				-#define IBLND_CREDITS_DEFAULT     8 /* default # of peer credits */
			
 
				-#define IBLND_CREDITS_MAX	  ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1)  /* Max # of peer credits */
			
 
				-
			
 
				-/* when eagerly to return credits */
			
 
				-#define IBLND_CREDITS_HIGHWATER(t, v)	((v) == IBLND_MSG_VERSION_1 ? \
			
 
				-					IBLND_CREDIT_HIGHWATER_V1 : \
			
 
				-					t->lnd_peercredits_hiw)
			
 
				-
			
 
				-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
			
 
				-							       cb, dev, \
			
 
				-							       ps, qpt)
			
 
				-
			
 
				-/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
			
 
				-#define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
			
 
				-#define IBLND_OOB_MSGS(v)	   (IBLND_OOB_CAPABLE(v) ? 2 : 0)
			
 
				-
			
 
				-#define IBLND_FRAG_SHIFT	(PAGE_SHIFT - 12)	/* frag size on wire is in 4K units */
			
 
				-#define IBLND_MSG_SIZE		(4 << 10)		/* max size of queued messages (inc hdr) */
			
 
				-#define IBLND_MAX_RDMA_FRAGS	(LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
			
 
				-
			
 
				-/************************/
			
 
				-/* derived constants... */
			
 
				-/* Pools (shared by connections on each CPT) */
			
 
				-/* These pools can grow at runtime, so don't need give a very large value */
			
 
				-#define IBLND_TX_POOL			256
			
 
				-#define IBLND_FMR_POOL			256
			
 
				-#define IBLND_FMR_POOL_FLUSH		192
			
 
				-
			
 
				-#define IBLND_RX_MSGS(c)	\
			
 
				-	((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
			
 
				-#define IBLND_RX_MSG_BYTES(c)	(IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
			
 
				-#define IBLND_RX_MSG_PAGES(c)	\
			
 
				-	((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
			
 
				-
			
 
				-/* WRs and CQEs (per connection) */
			
 
				-#define IBLND_RECV_WRS(c)	IBLND_RX_MSGS(c)
			
 
				-#define IBLND_SEND_WRS(c)	\
			
 
				-	(((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
			
 
				-	  kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
			
 
				-#define IBLND_CQ_ENTRIES(c)	(IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
			
 
				-
			
 
				-struct kib_hca_dev;
			
 
				-
			
 
				-/* o2iblnd can run over aliased interface */
			
 
				-#ifdef IFALIASZ
			
 
				-#define KIB_IFNAME_SIZE	      IFALIASZ
			
 
				-#else
			
 
				-#define KIB_IFNAME_SIZE	      256
			
 
				-#endif
			
 
				-
			
 
				-struct kib_dev {
			
 
				-	struct list_head   ibd_list;            /* chain on kib_devs */
			
 
				-	struct list_head   ibd_fail_list;       /* chain on kib_failed_devs */
			
 
				-	__u32              ibd_ifip;            /* IPoIB interface IP */
			
 
				-
			
 
				-	/* IPoIB interface name */
			
 
				-	char               ibd_ifname[KIB_IFNAME_SIZE];
			
 
				-	int                ibd_nnets;           /* # nets extant */
			
 
				-
			
 
				-	unsigned long      ibd_next_failover;
			
 
				-	int                ibd_failed_failover; /* # failover failures */
			
 
				-	unsigned int       ibd_failover;        /* failover in progress */
			
 
				-	unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */
			
 
				-	struct list_head   ibd_nets;
			
 
				-	struct kib_hca_dev *ibd_hdev;
			
 
				-};
			
 
				-
			
 
				-struct kib_hca_dev {
			
 
				-	struct rdma_cm_id  *ibh_cmid;           /* listener cmid */
			
 
				-	struct ib_device   *ibh_ibdev;          /* IB device */
			
 
				-	int                ibh_page_shift;      /* page shift of current HCA */
			
 
				-	int                ibh_page_size;       /* page size of current HCA */
			
 
				-	__u64              ibh_page_mask;       /* page mask of current HCA */
			
 
				-	int                ibh_mr_shift;        /* bits shift of max MR size */
			
 
				-	__u64              ibh_mr_size;         /* size of MR */
			
 
				-	struct ib_pd       *ibh_pd;             /* PD */
			
 
				-	struct kib_dev	   *ibh_dev;		/* owner */
			
 
				-	atomic_t           ibh_ref;             /* refcount */
			
 
				-};
			
 
				-
			
 
				-/** # of seconds to keep pool alive */
			
 
				-#define IBLND_POOL_DEADLINE     300
			
 
				-/** # of seconds to retry if allocation failed */
			
 
				-#define IBLND_POOL_RETRY	1
			
 
				-
			
 
				-struct kib_pages {
			
 
				-	int                ibp_npages;          /* # pages */
			
 
				-	struct page        *ibp_pages[0];       /* page array */
			
 
				-};
			
 
				-
			
 
				-struct kib_pool;
			
 
				-struct kib_poolset;
			
 
				-
			
 
				-typedef int  (*kib_ps_pool_create_t)(struct kib_poolset *ps,
			
 
				-				     int inc, struct kib_pool **pp_po);
			
 
				-typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
			
 
				-typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
			
 
				-typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);
			
 
				-
			
 
				-struct kib_net;
			
 
				-
			
 
				-#define IBLND_POOL_NAME_LEN     32
			
 
				-
			
 
				-struct kib_poolset {
			
 
				-	spinlock_t            ps_lock;            /* serialize */
			
 
				-	struct kib_net        *ps_net;            /* network it belongs to */
			
 
				-	char                  ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
			
 
				-	struct list_head      ps_pool_list;       /* list of pools */
			
 
				-	struct list_head      ps_failed_pool_list;/* failed pool list */
			
 
				-	unsigned long         ps_next_retry;      /* time stamp for retry if */
			
 
				-						  /* failed to allocate */
			
 
				-	int                   ps_increasing;      /* is allocating new pool */
			
 
				-	int                   ps_pool_size;       /* new pool size */
			
 
				-	int                   ps_cpt;             /* CPT id */
			
 
				-
			
 
				-	kib_ps_pool_create_t  ps_pool_create;     /* create a new pool */
			
 
				-	kib_ps_pool_destroy_t ps_pool_destroy;    /* destroy a pool */
			
 
				-	kib_ps_node_init_t    ps_node_init; /* initialize new allocated node */
			
 
				-	kib_ps_node_fini_t    ps_node_fini;       /* finalize node */
			
 
				-};
			
 
				-
			
 
				-struct kib_pool {
			
 
				-	struct list_head      po_list;       /* chain on pool list */
			
 
				-	struct list_head      po_free_list;  /* pre-allocated node */
			
 
				-	struct kib_poolset	*po_owner;	/* pool_set of this pool */
			
 
				-	unsigned long         po_deadline;   /* deadline of this pool */
			
 
				-	int                   po_allocated;  /* # of elements in use */
			
 
				-	int                   po_failed;     /* pool is created on failed HCA */
			
 
				-	int                   po_size;       /* # of pre-allocated elements */
			
 
				-};
			
 
				-
			
 
				-struct kib_tx_poolset {
			
 
				-	struct kib_poolset	tps_poolset;		/* pool-set */
			
 
				-	__u64                 tps_next_tx_cookie; /* cookie of TX */
			
 
				-};
			
 
				-
			
 
				-struct kib_tx_pool {
			
 
				-	struct kib_pool		 tpo_pool;	/* pool */
			
 
				-	struct kib_hca_dev	*tpo_hdev;	/* device for this pool */
			
 
				-	struct kib_tx		*tpo_tx_descs;	/* all the tx descriptors */
			
 
				-	struct kib_pages	*tpo_tx_pages;	/* premapped tx msg pages */
			
 
				-};
			
 
				-
			
 
				-struct kib_fmr_poolset {
			
 
				-	spinlock_t            fps_lock;            /* serialize */
			
 
				-	struct kib_net        *fps_net;            /* IB network */
			
 
				-	struct list_head      fps_pool_list;       /* FMR pool list */
			
 
				-	struct list_head      fps_failed_pool_list;/* FMR pool list */
			
 
				-	__u64                 fps_version;         /* validity stamp */
			
 
				-	int                   fps_cpt;             /* CPT id */
			
 
				-	int                   fps_pool_size;
			
 
				-	int                   fps_flush_trigger;
			
 
				-	int		      fps_cache;
			
 
				-	int                   fps_increasing;      /* is allocating new pool */
			
 
				-	unsigned long         fps_next_retry;      /* time stamp for retry if*/
			
 
				-						   /* failed to allocate */
			
 
				-};
			
 
				-
			
 
				-struct kib_fast_reg_descriptor { /* For fast registration */
			
 
				-	struct list_head		 frd_list;
			
 
				-	struct ib_send_wr		 frd_inv_wr;
			
 
				-	struct ib_reg_wr		 frd_fastreg_wr;
			
 
				-	struct ib_mr			*frd_mr;
			
 
				-	bool				 frd_valid;
			
 
				-};
			
 
				-
			
 
				-struct kib_fmr_pool {
			
 
				-	struct list_head	 fpo_list;	/* chain on pool list */
			
 
				-	struct kib_hca_dev	*fpo_hdev;	/* device for this pool */
			
 
				-	struct kib_fmr_poolset	*fpo_owner;	/* owner of this pool */
			
 
				-	union {
			
 
				-		struct {
			
 
				-			struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
			
 
				-		} fmr;
			
 
				-		struct { /* For fast registration */
			
 
				-			struct list_head    fpo_pool_list;
			
 
				-			int		    fpo_pool_size;
			
 
				-		} fast_reg;
			
 
				-	};
			
 
				-	unsigned long         fpo_deadline;        /* deadline of this pool */
			
 
				-	int                   fpo_failed;          /* fmr pool is failed */
			
 
				-	int                   fpo_map_count;       /* # of mapped FMR */
			
 
				-	int		      fpo_is_fmr;
			
 
				-};
			
 
				-
			
 
				-struct kib_fmr {
			
 
				-	struct kib_fmr_pool		*fmr_pool;	/* pool of FMR */
			
 
				-	struct ib_pool_fmr		*fmr_pfmr;	/* IB pool fmr */
			
 
				-	struct kib_fast_reg_descriptor	*fmr_frd;
			
 
				-	u32				 fmr_key;
			
 
				-};
			
 
				-
			
 
				-struct kib_net {
			
 
				-	struct list_head      ibn_list;       /* chain on struct kib_dev::ibd_nets */
			
 
				-	__u64                 ibn_incarnation;/* my epoch */
			
 
				-	int                   ibn_init;       /* initialisation state */
			
 
				-	int                   ibn_shutdown;   /* shutting down? */
			
 
				-
			
 
				-	atomic_t              ibn_npeers;     /* # peers extant */
			
 
				-	atomic_t              ibn_nconns;     /* # connections extant */
			
 
				-
			
 
				-	struct kib_tx_poolset	**ibn_tx_ps;	/* tx pool-set */
			
 
				-	struct kib_fmr_poolset	**ibn_fmr_ps;	/* fmr pool-set */
			
 
				-
			
 
				-	struct kib_dev		*ibn_dev;	/* underlying IB device */
			
 
				-};
			
 
				-
			
 
				-#define KIB_THREAD_SHIFT		16
			
 
				-#define KIB_THREAD_ID(cpt, tid)		((cpt) << KIB_THREAD_SHIFT | (tid))
			
 
				-#define KIB_THREAD_CPT(id)		((id) >> KIB_THREAD_SHIFT)
			
 
				-#define KIB_THREAD_TID(id)		((id) & ((1UL << KIB_THREAD_SHIFT) - 1))
			
 
				-
			
 
				-struct kib_sched_info {
			
 
				-	spinlock_t         ibs_lock;     /* serialise */
			
 
				-	wait_queue_head_t  ibs_waitq;    /* schedulers sleep here */
			
 
				-	struct list_head   ibs_conns;    /* conns to check for rx completions */
			
 
				-	int                ibs_nthreads; /* number of scheduler threads */
			
 
				-	int                ibs_nthreads_max; /* max allowed scheduler threads */
			
 
				-	int                ibs_cpt;      /* CPT id */
			
 
				-};
			
 
				-
			
 
				-struct kib_data {
			
 
				-	int               kib_init;           /* initialisation state */
			
 
				-	int               kib_shutdown;       /* shut down? */
			
 
				-	struct list_head  kib_devs;           /* IB devices extant */
			
 
				-	struct list_head  kib_failed_devs;    /* list head of failed devices */
			
 
				-	wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */
			
 
				-	atomic_t kib_nthreads;                /* # live threads */
			
 
				-	rwlock_t kib_global_lock;    /* stabilize net/dev/peer/conn ops */
			
 
				-	struct list_head *kib_peers; /* hash table of all my known peers */
			
 
				-	int  kib_peer_hash_size;     /* size of kib_peers */
			
 
				-	void *kib_connd; /* the connd task (serialisation assertions) */
			
 
				-	struct list_head kib_connd_conns;   /* connections to setup/teardown */
			
 
				-	struct list_head kib_connd_zombies; /* connections with zero refcount */
			
 
				-	/* connections to reconnect */
			
 
				-	struct list_head	kib_reconn_list;
			
 
				-	/* peers wait for reconnection */
			
 
				-	struct list_head	kib_reconn_wait;
			
 
				-	/**
			
 
				-	 * The second that peers are pulled out from \a kib_reconn_wait
			
 
				-	 * for reconnection.
			
 
				-	 */
			
 
				-	time64_t		kib_reconn_sec;
			
 
				-
			
 
				-	wait_queue_head_t kib_connd_waitq;  /* connection daemon sleeps here */
			
 
				-	spinlock_t kib_connd_lock;          /* serialise */
			
 
				-	struct ib_qp_attr kib_error_qpa;    /* QP->ERROR */
			
 
				-	struct kib_sched_info **kib_scheds; /* percpt data for schedulers */
			
 
				-};
			
 
				-
			
 
				-#define IBLND_INIT_NOTHING 0
			
 
				-#define IBLND_INIT_DATA    1
			
 
				-#define IBLND_INIT_ALL     2
			
 
				-
			
 
				-/************************************************************************
			
 
				- * IB Wire message format.
			
 
				- * These are sent in sender's byte order (i.e. receiver flips).
			
 
				- */
			
 
				-
			
 
				-struct kib_connparams {
			
 
				-	__u16        ibcp_queue_depth;
			
 
				-	__u16        ibcp_max_frags;
			
 
				-	__u32        ibcp_max_msg_size;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_immediate_msg {
			
 
				-	struct lnet_hdr	ibim_hdr;        /* portals header */
			
 
				-	char         ibim_payload[0]; /* piggy-backed payload */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_rdma_frag {
			
 
				-	__u32        rf_nob;          /* # bytes this frag */
			
 
				-	__u64        rf_addr;         /* CAVEAT EMPTOR: misaligned!! */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_rdma_desc {
			
 
				-	__u32           rd_key;       /* local/remote key */
			
 
				-	__u32           rd_nfrags;    /* # fragments */
			
 
				-	struct kib_rdma_frag	rd_frags[0];	/* buffer frags */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_putreq_msg {
			
 
				-	struct lnet_hdr	ibprm_hdr;    /* portals header */
			
 
				-	__u64           ibprm_cookie; /* opaque completion cookie */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_putack_msg {
			
 
				-	__u64           ibpam_src_cookie; /* reflected completion cookie */
			
 
				-	__u64           ibpam_dst_cookie; /* opaque completion cookie */
			
 
				-	struct kib_rdma_desc ibpam_rd;         /* sender's sink buffer */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_get_msg {
			
 
				-	struct lnet_hdr ibgm_hdr;     /* portals header */
			
 
				-	__u64           ibgm_cookie;  /* opaque completion cookie */
			
 
				-	struct kib_rdma_desc ibgm_rd;      /* rdma descriptor */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_completion_msg {
			
 
				-	__u64           ibcm_cookie;  /* opaque completion cookie */
			
 
				-	__s32           ibcm_status;  /* < 0 failure: >= 0 length */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct kib_msg {
			
 
				-	/* First 2 fields fixed FOR ALL TIME */
			
 
				-	__u32           ibm_magic;    /* I'm an ibnal message */
			
 
				-	__u16           ibm_version;  /* this is my version number */
			
 
				-
			
 
				-	__u8            ibm_type;     /* msg type */
			
 
				-	__u8            ibm_credits;  /* returned credits */
			
 
				-	__u32           ibm_nob;      /* # bytes in whole message */
			
 
				-	__u32           ibm_cksum;    /* checksum (0 == no checksum) */
			
 
				-	__u64           ibm_srcnid;   /* sender's NID */
			
 
				-	__u64           ibm_srcstamp; /* sender's incarnation */
			
 
				-	__u64           ibm_dstnid;   /* destination's NID */
			
 
				-	__u64           ibm_dststamp; /* destination's incarnation */
			
 
				-
			
 
				-	union {
			
 
				-		struct kib_connparams		connparams;
			
 
				-		struct kib_immediate_msg	immediate;
			
 
				-		struct kib_putreq_msg		putreq;
			
 
				-		struct kib_putack_msg		putack;
			
 
				-		struct kib_get_msg		get;
			
 
				-		struct kib_completion_msg	completion;
			
 
				-	} WIRE_ATTR ibm_u;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-#define IBLND_MSG_MAGIC     LNET_PROTO_IB_MAGIC /* unique magic */
			
 
				-
			
 
				-#define IBLND_MSG_VERSION_1 0x11
			
 
				-#define IBLND_MSG_VERSION_2 0x12
			
 
				-#define IBLND_MSG_VERSION   IBLND_MSG_VERSION_2
			
 
				-
			
 
				-#define IBLND_MSG_CONNREQ   0xc0	/* connection request */
			
 
				-#define IBLND_MSG_CONNACK   0xc1	/* connection acknowledge */
			
 
				-#define IBLND_MSG_NOOP      0xd0	/* nothing (just credits) */
			
 
				-#define IBLND_MSG_IMMEDIATE 0xd1	/* immediate */
			
 
				-#define IBLND_MSG_PUT_REQ   0xd2	/* putreq (src->sink) */
			
 
				-#define IBLND_MSG_PUT_NAK   0xd3	/* completion (sink->src) */
			
 
				-#define IBLND_MSG_PUT_ACK   0xd4	/* putack (sink->src) */
			
 
				-#define IBLND_MSG_PUT_DONE  0xd5	/* completion (src->sink) */
			
 
				-#define IBLND_MSG_GET_REQ   0xd6	/* getreq (sink->src) */
			
 
				-#define IBLND_MSG_GET_DONE  0xd7	/* completion (src->sink: all OK) */
			
 
				-
			
 
				-struct kib_rej {
			
 
				-	__u32            ibr_magic;       /* sender's magic */
			
 
				-	__u16            ibr_version;     /* sender's version */
			
 
				-	__u8             ibr_why;         /* reject reason */
			
 
				-	__u8             ibr_padding;     /* padding */
			
 
				-	__u64            ibr_incarnation; /* incarnation of peer */
			
 
				-	struct kib_connparams ibr_cp;          /* connection parameters */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-/* connection rejection reasons */
			
 
				-#define IBLND_REJECT_CONN_RACE      1 /* You lost connection race */
			
 
				-#define IBLND_REJECT_NO_RESOURCES   2 /* Out of memory/conns etc */
			
 
				-#define IBLND_REJECT_FATAL          3 /* Anything else */
			
 
				-#define IBLND_REJECT_CONN_UNCOMPAT  4 /* incompatible version peer */
			
 
				-#define IBLND_REJECT_CONN_STALE     5 /* stale peer */
			
 
				-/* peer's rdma frags doesn't match mine */
			
 
				-#define IBLND_REJECT_RDMA_FRAGS	    6
			
 
				-/* peer's msg queue size doesn't match mine */
			
 
				-#define IBLND_REJECT_MSG_QUEUE_SIZE 7
			
 
				-
			
 
				-/***********************************************************************/
			
 
				-
			
 
				-struct kib_rx {					/* receive message */
			
 
				-	struct list_head       rx_list;       /* queue for attention */
			
 
				-	struct kib_conn        *rx_conn;      /* owning conn */
			
 
				-	int                    rx_nob; /* # bytes received (-1 while posted) */
			
 
				-	enum ib_wc_status      rx_status;     /* completion status */
			
 
				-	struct kib_msg		*rx_msg;	/* message buffer (host vaddr) */
			
 
				-	__u64                  rx_msgaddr;    /* message buffer (I/O addr) */
			
 
				-	DECLARE_PCI_UNMAP_ADDR(rx_msgunmap);  /* for dma_unmap_single() */
			
 
				-	struct ib_recv_wr      rx_wrq;        /* receive work item... */
			
 
				-	struct ib_sge          rx_sge;        /* ...and its memory */
			
 
				-};
			
 
				-
			
 
				-#define IBLND_POSTRX_DONT_POST    0 /* don't post */
			
 
				-#define IBLND_POSTRX_NO_CREDIT    1 /* post: no credits */
			
 
				-#define IBLND_POSTRX_PEER_CREDIT  2 /* post: give peer back 1 credit */
			
 
				-#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */
			
 
				-
			
 
				-struct kib_tx {					/* transmit message */
			
 
				-	struct list_head      tx_list; /* queue on idle_txs ibc_tx_queue etc. */
			
 
				-	struct kib_tx_pool	*tx_pool;	/* pool I'm from */
			
 
				-	struct kib_conn       *tx_conn;       /* owning conn */
			
 
				-	short                 tx_sending;     /* # tx callbacks outstanding */
			
 
				-	short                 tx_queued;      /* queued for sending */
			
 
				-	short                 tx_waiting;     /* waiting for peer */
			
 
				-	int                   tx_status;      /* LNET completion status */
			
 
				-	unsigned long         tx_deadline;    /* completion deadline */
			
 
				-	__u64                 tx_cookie;      /* completion cookie */
			
 
				-	struct lnet_msg		*tx_lntmsg[2];	/* lnet msgs to finalize on completion */
			
 
				-	struct kib_msg	      *tx_msg;        /* message buffer (host vaddr) */
			
 
				-	__u64                 tx_msgaddr;     /* message buffer (I/O addr) */
			
 
				-	DECLARE_PCI_UNMAP_ADDR(tx_msgunmap);  /* for dma_unmap_single() */
			
 
				-	int                   tx_nwrq;        /* # send work items */
			
 
				-	struct ib_rdma_wr     *tx_wrq;        /* send work items... */
			
 
				-	struct ib_sge         *tx_sge;        /* ...and their memory */
			
 
				-	struct kib_rdma_desc  *tx_rd;         /* rdma descriptor */
			
 
				-	int                   tx_nfrags;      /* # entries in... */
			
 
				-	struct scatterlist    *tx_frags;      /* dma_map_sg descriptor */
			
 
				-	__u64                 *tx_pages;      /* rdma phys page addrs */
			
 
				-	struct kib_fmr        fmr;	      /* FMR */
			
 
				-	int                   tx_dmadir;      /* dma direction */
			
 
				-};
			
 
				-
			
 
				-struct kib_connvars {
			
 
				-	struct kib_msg cv_msg; /* connection-in-progress variables */
			
 
				-};
			
 
				-
			
 
				-struct kib_conn {
			
 
				-	struct kib_sched_info *ibc_sched;      /* scheduler information */
			
 
				-	struct kib_peer       *ibc_peer;       /* owning peer */
			
 
				-	struct kib_hca_dev         *ibc_hdev;       /* HCA bound on */
			
 
				-	struct list_head ibc_list;             /* stash on peer's conn list */
			
 
				-	struct list_head      ibc_sched_list;  /* schedule for attention */
			
 
				-	__u16                 ibc_version;     /* version of connection */
			
 
				-	/* reconnect later */
			
 
				-	__u16			ibc_reconnect:1;
			
 
				-	__u64                 ibc_incarnation; /* which instance of the peer */
			
 
				-	atomic_t              ibc_refcount;    /* # users */
			
 
				-	int                   ibc_state;       /* what's happening */
			
 
				-	int                   ibc_nsends_posted; /* # uncompleted sends */
			
 
				-	int                   ibc_noops_posted;  /* # uncompleted NOOPs */
			
 
				-	int                   ibc_credits;     /* # credits I have */
			
 
				-	int                   ibc_outstanding_credits; /* # credits to return */
			
 
				-	int                   ibc_reserved_credits; /* # ACK/DONE msg credits */
			
 
				-	int                   ibc_comms_error; /* set on comms error */
			
 
				-	/* connections queue depth */
			
 
				-	__u16		      ibc_queue_depth;
			
 
				-	/* connections max frags */
			
 
				-	__u16		      ibc_max_frags;
			
 
				-	unsigned int          ibc_nrx:16;      /* receive buffers owned */
			
 
				-	unsigned int          ibc_scheduled:1; /* scheduled for attention */
			
 
				-	unsigned int          ibc_ready:1;     /* CQ callback fired */
			
 
				-	unsigned long         ibc_last_send;   /* time of last send */
			
 
				-	struct list_head      ibc_connd_list;  /* link chain for */
			
 
				-					       /* kiblnd_check_conns only */
			
 
				-	struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
			
 
				-	struct list_head ibc_tx_noops;         /* IBLND_MSG_NOOPs for */
			
 
				-					       /* IBLND_MSG_VERSION_1 */
			
 
				-	struct list_head ibc_tx_queue;         /* sends that need a credit */
			
 
				-	struct list_head ibc_tx_queue_nocred;  /* sends that don't need a */
			
 
				-					       /* credit */
			
 
				-	struct list_head ibc_tx_queue_rsrvd;   /* sends that need to */
			
 
				-					       /* reserve an ACK/DONE msg */
			
 
				-	struct list_head ibc_active_txs; /* active tx awaiting completion */
			
 
				-	spinlock_t            ibc_lock;        /* serialise */
			
 
				-	struct kib_rx              *ibc_rxs;        /* the rx descs */
			
 
				-	struct kib_pages           *ibc_rx_pages;   /* premapped rx msg pages */
			
 
				-
			
 
				-	struct rdma_cm_id     *ibc_cmid;       /* CM id */
			
 
				-	struct ib_cq          *ibc_cq;         /* completion queue */
			
 
				-
			
 
				-	struct kib_connvars	*ibc_connvars;	/* in-progress connection state */
			
 
				-};
			
 
				-
			
 
				-#define IBLND_CONN_INIT           0	 /* being initialised */
			
 
				-#define IBLND_CONN_ACTIVE_CONNECT 1	 /* active sending req */
			
 
				-#define IBLND_CONN_PASSIVE_WAIT   2	 /* passive waiting for rtu */
			
 
				-#define IBLND_CONN_ESTABLISHED    3	 /* connection established */
			
 
				-#define IBLND_CONN_CLOSING        4	 /* being closed */
			
 
				-#define IBLND_CONN_DISCONNECTED   5	 /* disconnected */
			
 
				-
			
 
				-struct kib_peer {
			
 
				-	struct list_head ibp_list;        /* stash on global peer list */
			
 
				-	lnet_nid_t       ibp_nid;         /* who's on the other end(s) */
			
 
				-	struct lnet_ni	*ibp_ni;         /* LNet interface */
			
 
				-	struct list_head ibp_conns;       /* all active connections */
			
 
				-	struct kib_conn	*ibp_next_conn;  /* next connection to send on for
			
 
				-					  * round robin */
			
 
				-	struct list_head ibp_tx_queue;    /* msgs waiting for a conn */
			
 
				-	__u64            ibp_incarnation; /* incarnation of peer */
			
 
				-	/* when (in jiffies) I was last alive */
			
 
				-	unsigned long		ibp_last_alive;
			
 
				-	/* # users */
			
 
				-	atomic_t		ibp_refcount;
			
 
				-	/* version of peer */
			
 
				-	__u16			ibp_version;
			
 
				-	/* current passive connection attempts */
			
 
				-	unsigned short		ibp_accepting;
			
 
				-	/* current active connection attempts */
			
 
				-	unsigned short		ibp_connecting;
			
 
				-	/* reconnect this peer later */
			
 
				-	unsigned char		ibp_reconnecting;
			
 
				-	/* counter of how many times we triggered a conn race */
			
 
				-	unsigned char		ibp_races;
			
 
				-	/* # consecutive reconnection attempts to this peer */
			
 
				-	unsigned int		ibp_reconnected;
			
 
				-	/* errno on closing this peer */
			
 
				-	int              ibp_error;
			
 
				-	/* max map_on_demand */
			
 
				-	__u16		 ibp_max_frags;
			
 
				-	/* max_peer_credits */
			
 
				-	__u16		 ibp_queue_depth;
			
 
				-};
			
 
				-
			
 
				-extern struct kib_data kiblnd_data;
			
 
				-
			
 
				-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
			
 
				-
			
 
				-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
			
 
				-
			
 
				-/* max # of fragments configured by user */
			
 
				-static inline int
			
 
				-kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
			
 
				-	int mod;
			
 
				-
			
 
				-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
			
 
				-	mod = tunables->lnd_map_on_demand;
			
 
				-	return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_rdma_frags(int version, struct lnet_ni *ni)
			
 
				-{
			
 
				-	return version == IBLND_MSG_VERSION_1 ?
			
 
				-			  (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
			
 
				-			  kiblnd_cfg_rdma_frags(ni);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
			
 
				-	int concurrent_sends;
			
 
				-
			
 
				-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
			
 
				-	concurrent_sends = tunables->lnd_concurrent_sends;
			
 
				-
			
 
				-	if (version == IBLND_MSG_VERSION_1) {
			
 
				-		if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
			
 
				-			return IBLND_MSG_QUEUE_SIZE_V1 * 2;
			
 
				-
			
 
				-		if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
			
 
				-			return IBLND_MSG_QUEUE_SIZE_V1 / 2;
			
 
				-	}
			
 
				-
			
 
				-	return concurrent_sends;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&hdev->ibh_ref) > 0);
			
 
				-	atomic_inc(&hdev->ibh_ref);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-kiblnd_hdev_decref(struct kib_hca_dev *hdev)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&hdev->ibh_ref) > 0);
			
 
				-	if (atomic_dec_and_test(&hdev->ibh_ref))
			
 
				-		kiblnd_hdev_destroy(hdev);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_dev_can_failover(struct kib_dev *dev)
			
 
				-{
			
 
				-	if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
			
 
				-		return 0;
			
 
				-
			
 
				-	if (!*kiblnd_tunables.kib_dev_failover) /* disabled */
			
 
				-		return 0;
			
 
				-
			
 
				-	if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
			
 
				-		return 1;
			
 
				-
			
 
				-	return dev->ibd_can_failover;
			
 
				-}
			
 
				-
			
 
				-#define kiblnd_conn_addref(conn)				\
			
 
				-do {							    \
			
 
				-	CDEBUG(D_NET, "conn[%p] (%d)++\n",		      \
			
 
				-	       (conn), atomic_read(&(conn)->ibc_refcount)); \
			
 
				-	atomic_inc(&(conn)->ibc_refcount);		  \
			
 
				-} while (0)
			
 
				-
			
 
				-#define kiblnd_conn_decref(conn)					\
			
 
				-do {									\
			
 
				-	unsigned long flags;						\
			
 
				-									\
			
 
				-	CDEBUG(D_NET, "conn[%p] (%d)--\n",				\
			
 
				-	       (conn), atomic_read(&(conn)->ibc_refcount));		\
			
 
				-	LASSERT_ATOMIC_POS(&(conn)->ibc_refcount);			\
			
 
				-	if (atomic_dec_and_test(&(conn)->ibc_refcount)) {		\
			
 
				-		spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);	\
			
 
				-		list_add_tail(&(conn)->ibc_list,			\
			
 
				-				  &kiblnd_data.kib_connd_zombies);	\
			
 
				-		wake_up(&kiblnd_data.kib_connd_waitq);		\
			
 
				-		spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
			
 
				-	}								\
			
 
				-} while (0)
			
 
				-
			
 
				-#define kiblnd_peer_addref(peer)				\
			
 
				-do {							    \
			
 
				-	CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n",		\
			
 
				-	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
			
 
				-	       atomic_read(&(peer)->ibp_refcount));	\
			
 
				-	atomic_inc(&(peer)->ibp_refcount);		  \
			
 
				-} while (0)
			
 
				-
			
 
				-#define kiblnd_peer_decref(peer)				\
			
 
				-do {							    \
			
 
				-	CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n",		\
			
 
				-	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
			
 
				-	       atomic_read(&(peer)->ibp_refcount));	\
			
 
				-	LASSERT_ATOMIC_POS(&(peer)->ibp_refcount);	      \
			
 
				-	if (atomic_dec_and_test(&(peer)->ibp_refcount))     \
			
 
				-		kiblnd_destroy_peer(peer);		      \
			
 
				-} while (0)
			
 
				-
			
 
				-static inline bool
			
 
				-kiblnd_peer_connecting(struct kib_peer *peer)
			
 
				-{
			
 
				-	return peer->ibp_connecting ||
			
 
				-	       peer->ibp_reconnecting ||
			
 
				-	       peer->ibp_accepting;
			
 
				-}
			
 
				-
			
 
				-static inline bool
			
 
				-kiblnd_peer_idle(struct kib_peer *peer)
			
 
				-{
			
 
				-	return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
			
 
				-}
			
 
				-
			
 
				-static inline struct list_head *
			
 
				-kiblnd_nid2peerlist(lnet_nid_t nid)
			
 
				-{
			
 
				-	unsigned int hash =
			
 
				-		((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
			
 
				-
			
 
				-	return &kiblnd_data.kib_peers[hash];
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_peer_active(struct kib_peer *peer)
			
 
				-{
			
 
				-	/* Am I in the peer hash table? */
			
 
				-	return !list_empty(&peer->ibp_list);
			
 
				-}
			
 
				-
			
 
				-static inline struct kib_conn *
			
 
				-kiblnd_get_conn_locked(struct kib_peer *peer)
			
 
				-{
			
 
				-	struct list_head *next;
			
 
				-
			
 
				-	LASSERT(!list_empty(&peer->ibp_conns));
			
 
				-
			
 
				-	/* Advance to next connection, be sure to skip the head node */
			
 
				-	if (!peer->ibp_next_conn ||
			
 
				-	    peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns)
			
 
				-		next = peer->ibp_conns.next;
			
 
				-	else
			
 
				-		next = peer->ibp_next_conn->ibc_list.next;
			
 
				-	peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list);
			
 
				-
			
 
				-	return peer->ibp_next_conn;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_send_keepalive(struct kib_conn *conn)
			
 
				-{
			
 
				-	return (*kiblnd_tunables.kib_keepalive > 0) &&
			
 
				-		time_after(jiffies, conn->ibc_last_send +
			
 
				-			   msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
			
 
				-					    MSEC_PER_SEC));
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_need_noop(struct kib_conn *conn)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
			
 
				-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
			
 
				-
			
 
				-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
			
 
				-
			
 
				-	if (conn->ibc_outstanding_credits <
			
 
				-	    IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
			
 
				-	    !kiblnd_send_keepalive(conn))
			
 
				-		return 0; /* No need to send NOOP */
			
 
				-
			
 
				-	if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
			
 
				-		if (!list_empty(&conn->ibc_tx_queue_nocred))
			
 
				-			return 0; /* NOOP can be piggybacked */
			
 
				-
			
 
				-		/* No tx to piggyback NOOP onto or no credit to send a tx */
			
 
				-		return (list_empty(&conn->ibc_tx_queue) ||
			
 
				-			!conn->ibc_credits);
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
			
 
				-	    !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
			
 
				-	    !conn->ibc_credits)		    /* no credit */
			
 
				-		return 0;
			
 
				-
			
 
				-	if (conn->ibc_credits == 1 &&      /* last credit reserved for */
			
 
				-	    !conn->ibc_outstanding_credits) /* giving back credits */
			
 
				-		return 0;
			
 
				-
			
 
				-	/* No tx to piggyback NOOP onto or no credit to send a tx */
			
 
				-	return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-kiblnd_abort_receives(struct kib_conn *conn)
			
 
				-{
			
 
				-	ib_modify_qp(conn->ibc_cmid->qp,
			
 
				-		     &kiblnd_data.kib_error_qpa, IB_QP_STATE);
			
 
				-}
			
 
				-
			
 
				-static inline const char *
			
 
				-kiblnd_queue2str(struct kib_conn *conn, struct list_head *q)
			
 
				-{
			
 
				-	if (q == &conn->ibc_tx_queue)
			
 
				-		return "tx_queue";
			
 
				-
			
 
				-	if (q == &conn->ibc_tx_queue_rsrvd)
			
 
				-		return "tx_queue_rsrvd";
			
 
				-
			
 
				-	if (q == &conn->ibc_tx_queue_nocred)
			
 
				-		return "tx_queue_nocred";
			
 
				-
			
 
				-	if (q == &conn->ibc_active_txs)
			
 
				-		return "active_txs";
			
 
				-
			
 
				-	LBUG();
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */
			
 
				-/* lowest bits of the work request id to stash the work item type. */
			
 
				-
			
 
				-#define IBLND_WID_INVAL	0
			
 
				-#define IBLND_WID_TX	1
			
 
				-#define IBLND_WID_RX	2
			
 
				-#define IBLND_WID_RDMA	3
			
 
				-#define IBLND_WID_MR	4
			
 
				-#define IBLND_WID_MASK	7UL
			
 
				-
			
 
				-static inline __u64
			
 
				-kiblnd_ptr2wreqid(void *ptr, int type)
			
 
				-{
			
 
				-	unsigned long lptr = (unsigned long)ptr;
			
 
				-
			
 
				-	LASSERT(!(lptr & IBLND_WID_MASK));
			
 
				-	LASSERT(!(type & ~IBLND_WID_MASK));
			
 
				-	return (__u64)(lptr | type);
			
 
				-}
			
 
				-
			
 
				-static inline void *
			
 
				-kiblnd_wreqid2ptr(__u64 wreqid)
			
 
				-{
			
 
				-	return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_wreqid2type(__u64 wreqid)
			
 
				-{
			
 
				-	return wreqid & IBLND_WID_MASK;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-kiblnd_set_conn_state(struct kib_conn *conn, int state)
			
 
				-{
			
 
				-	conn->ibc_state = state;
			
 
				-	mb();
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob)
			
 
				-{
			
 
				-	msg->ibm_type = type;
			
 
				-	msg->ibm_nob  = offsetof(struct kib_msg, ibm_u) + body_nob;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_rd_size(struct kib_rdma_desc *rd)
			
 
				-{
			
 
				-	int   i;
			
 
				-	int   size;
			
 
				-
			
 
				-	for (i = size = 0; i < rd->rd_nfrags; i++)
			
 
				-		size += rd->rd_frags[i].rf_nob;
			
 
				-
			
 
				-	return size;
			
 
				-}
			
 
				-
			
 
				-static inline __u64
			
 
				-kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
			
 
				-{
			
 
				-	return rd->rd_frags[index].rf_addr;
			
 
				-}
			
 
				-
			
 
				-static inline __u32
			
 
				-kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
			
 
				-{
			
 
				-	return rd->rd_frags[index].rf_nob;
			
 
				-}
			
 
				-
			
 
				-static inline __u32
			
 
				-kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index)
			
 
				-{
			
 
				-	return rd->rd_key;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob)
			
 
				-{
			
 
				-	if (nob < rd->rd_frags[index].rf_nob) {
			
 
				-		rd->rd_frags[index].rf_addr += nob;
			
 
				-		rd->rd_frags[index].rf_nob  -= nob;
			
 
				-	} else {
			
 
				-		index++;
			
 
				-	}
			
 
				-
			
 
				-	return index;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n)
			
 
				-{
			
 
				-	LASSERT(msgtype == IBLND_MSG_GET_REQ ||
			
 
				-		msgtype == IBLND_MSG_PUT_ACK);
			
 
				-
			
 
				-	return msgtype == IBLND_MSG_GET_REQ ?
			
 
				-	       offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) :
			
 
				-	       offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]);
			
 
				-}
			
 
				-
			
 
				-static inline __u64
			
 
				-kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
			
 
				-{
			
 
				-	return ib_dma_mapping_error(dev, dma_addr);
			
 
				-}
			
 
				-
			
 
				-static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
			
 
				-					  void *msg, size_t size,
			
 
				-					  enum dma_data_direction direction)
			
 
				-{
			
 
				-	return ib_dma_map_single(dev, msg, size, direction);
			
 
				-}
			
 
				-
			
 
				-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
			
 
				-					   __u64 addr, size_t size,
			
 
				-					  enum dma_data_direction direction)
			
 
				-{
			
 
				-	ib_dma_unmap_single(dev, addr, size, direction);
			
 
				-}
			
 
				-
			
 
				-#define KIBLND_UNMAP_ADDR_SET(p, m, a)  do {} while (0)
			
 
				-#define KIBLND_UNMAP_ADDR(p, m, a)      (a)
			
 
				-
			
 
				-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
			
 
				-				    struct scatterlist *sg, int nents,
			
 
				-				    enum dma_data_direction direction)
			
 
				-{
			
 
				-	return ib_dma_map_sg(dev, sg, nents, direction);
			
 
				-}
			
 
				-
			
 
				-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
			
 
				-				       struct scatterlist *sg, int nents,
			
 
				-				       enum dma_data_direction direction)
			
 
				-{
			
 
				-	ib_dma_unmap_sg(dev, sg, nents, direction);
			
 
				-}
			
 
				-
			
 
				-static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
			
 
				-					  struct scatterlist *sg)
			
 
				-{
			
 
				-	return ib_sg_dma_address(dev, sg);
			
 
				-}
			
 
				-
			
 
				-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
			
 
				-					     struct scatterlist *sg)
			
 
				-{
			
 
				-	return ib_sg_dma_len(dev, sg);
			
 
				-}
			
 
				-
			
 
				-/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */
			
 
				-/* right because OFED1.2 defines it as const, to use it we have to add */
			
 
				-/* (void *) cast to overcome "const" */
			
 
				-
			
 
				-#define KIBLND_CONN_PARAM(e)     ((e)->param.conn.private_data)
			
 
				-#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
			
 
				-
			
 
				-void kiblnd_map_rx_descs(struct kib_conn *conn);
			
 
				-void kiblnd_unmap_rx_descs(struct kib_conn *conn);
			
 
				-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node);
			
 
				-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps);
			
 
				-
			
 
				-int  kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
			
 
				-			 struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
			
 
				-			 struct kib_fmr *fmr);
			
 
				-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status);
			
 
				-
			
 
				-int kiblnd_tunables_setup(struct lnet_ni *ni);
			
 
				-void kiblnd_tunables_init(void);
			
 
				-
			
 
				-int  kiblnd_connd(void *arg);
			
 
				-int  kiblnd_scheduler(void *arg);
			
 
				-int  kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
			
 
				-int  kiblnd_failover_thread(void *arg);
			
 
				-
			
 
				-int  kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages);
			
 
				-
			
 
				-int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
			
 
				-			struct rdma_cm_event *event);
			
 
				-int  kiblnd_translate_mtu(int value);
			
 
				-
			
 
				-int  kiblnd_dev_failover(struct kib_dev *dev);
			
 
				-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
			
 
				-		       lnet_nid_t nid);
			
 
				-void kiblnd_destroy_peer(struct kib_peer *peer);
			
 
				-bool kiblnd_reconnect_peer(struct kib_peer *peer);
			
 
				-void kiblnd_destroy_dev(struct kib_dev *dev);
			
 
				-void kiblnd_unlink_peer_locked(struct kib_peer *peer);
			
 
				-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid);
			
 
				-int  kiblnd_close_stale_conns_locked(struct kib_peer *peer,
			
 
				-				     int version, __u64 incarnation);
			
 
				-int  kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why);
			
 
				-
			
 
				-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer,
			
 
				-				    struct rdma_cm_id *cmid,
			
 
				-				    int state, int version);
			
 
				-void kiblnd_destroy_conn(struct kib_conn *conn);
			
 
				-void kiblnd_close_conn(struct kib_conn *conn, int error);
			
 
				-void kiblnd_close_conn_locked(struct kib_conn *conn, int error);
			
 
				-
			
 
				-void kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid);
			
 
				-void kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist,
			
 
				-			int status);
			
 
				-
			
 
				-void kiblnd_qp_event(struct ib_event *event, void *arg);
			
 
				-void kiblnd_cq_event(struct ib_event *event, void *arg);
			
 
				-void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
			
 
				-
			
 
				-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
			
 
				-		     int credits, lnet_nid_t dstnid, __u64 dststamp);
			
 
				-int  kiblnd_unpack_msg(struct kib_msg *msg, int nob);
			
 
				-int  kiblnd_post_rx(struct kib_rx *rx, int credit);
			
 
				-
			
 
				-int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
			
 
				-int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
			
 
				-		int delayed, struct iov_iter *to, unsigned int rlen);
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1,3763 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/klnds/o2iblnd/o2iblnd_cb.c
			
 
				- *
			
 
				- * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/highmem.h>
			
 
				-#include "o2iblnd.h"
			
 
				-
			
 
				-#define MAX_CONN_RACES_BEFORE_ABORT 20
			
 
				-
			
 
				-static void kiblnd_peer_alive(struct kib_peer *peer);
			
 
				-static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
			
 
				-static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx,
			
 
				-			       int type, int body_nob);
			
 
				-static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
			
 
				-			    int resid, struct kib_rdma_desc *dstrd,
			
 
				-			    __u64 dstcookie);
			
 
				-static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
			
 
				-static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
			
 
				-static void kiblnd_unmap_tx(struct kib_tx *tx);
			
 
				-static void kiblnd_check_sends_locked(struct kib_conn *conn);
			
 
				-
			
 
				-static void
			
 
				-kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx)
			
 
				-{
			
 
				-	struct lnet_msg *lntmsg[2];
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(!tx->tx_queued);	       /* mustn't be queued for sending */
			
 
				-	LASSERT(!tx->tx_sending);	  /* mustn't be awaiting sent callback */
			
 
				-	LASSERT(!tx->tx_waiting);	      /* mustn't be awaiting peer response */
			
 
				-	LASSERT(tx->tx_pool);
			
 
				-
			
 
				-	kiblnd_unmap_tx(tx);
			
 
				-
			
 
				-	/* tx may have up to 2 lnet msgs to finalise */
			
 
				-	lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
			
 
				-	lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
			
 
				-	rc = tx->tx_status;
			
 
				-
			
 
				-	if (tx->tx_conn) {
			
 
				-		LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni);
			
 
				-
			
 
				-		kiblnd_conn_decref(tx->tx_conn);
			
 
				-		tx->tx_conn = NULL;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_nwrq = 0;
			
 
				-	tx->tx_status = 0;
			
 
				-
			
 
				-	kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
			
 
				-
			
 
				-	/* delay finalize until my descs have been freed */
			
 
				-	for (i = 0; i < 2; i++) {
			
 
				-		if (!lntmsg[i])
			
 
				-			continue;
			
 
				-
			
 
				-		lnet_finalize(ni, lntmsg[i], rc);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int status)
			
 
				-{
			
 
				-	struct kib_tx *tx;
			
 
				-
			
 
				-	while (!list_empty(txlist)) {
			
 
				-		tx = list_entry(txlist->next, struct kib_tx, tx_list);
			
 
				-
			
 
				-		list_del(&tx->tx_list);
			
 
				-		/* complete now */
			
 
				-		tx->tx_waiting = 0;
			
 
				-		tx->tx_status = status;
			
 
				-		kiblnd_tx_done(ni, tx);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static struct kib_tx *
			
 
				-kiblnd_get_idle_tx(struct lnet_ni *ni, lnet_nid_t target)
			
 
				-{
			
 
				-	struct kib_net *net = (struct kib_net *)ni->ni_data;
			
 
				-	struct list_head *node;
			
 
				-	struct kib_tx *tx;
			
 
				-	struct kib_tx_poolset *tps;
			
 
				-
			
 
				-	tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
			
 
				-	node = kiblnd_pool_alloc_node(&tps->tps_poolset);
			
 
				-	if (!node)
			
 
				-		return NULL;
			
 
				-	tx = list_entry(node, struct kib_tx, tx_list);
			
 
				-
			
 
				-	LASSERT(!tx->tx_nwrq);
			
 
				-	LASSERT(!tx->tx_queued);
			
 
				-	LASSERT(!tx->tx_sending);
			
 
				-	LASSERT(!tx->tx_waiting);
			
 
				-	LASSERT(!tx->tx_status);
			
 
				-	LASSERT(!tx->tx_conn);
			
 
				-	LASSERT(!tx->tx_lntmsg[0]);
			
 
				-	LASSERT(!tx->tx_lntmsg[1]);
			
 
				-	LASSERT(!tx->tx_nfrags);
			
 
				-
			
 
				-	return tx;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_drop_rx(struct kib_rx *rx)
			
 
				-{
			
 
				-	struct kib_conn *conn = rx->rx_conn;
			
 
				-	struct kib_sched_info *sched = conn->ibc_sched;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-	LASSERT(conn->ibc_nrx > 0);
			
 
				-	conn->ibc_nrx--;
			
 
				-	spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-
			
 
				-	kiblnd_conn_decref(conn);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_post_rx(struct kib_rx *rx, int credit)
			
 
				-{
			
 
				-	struct kib_conn *conn = rx->rx_conn;
			
 
				-	struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data;
			
 
				-	struct ib_recv_wr *bad_wrq = NULL;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(credit == IBLND_POSTRX_NO_CREDIT ||
			
 
				-		credit == IBLND_POSTRX_PEER_CREDIT ||
			
 
				-		credit == IBLND_POSTRX_RSRVD_CREDIT);
			
 
				-
			
 
				-	rx->rx_sge.lkey   = conn->ibc_hdev->ibh_pd->local_dma_lkey;
			
 
				-	rx->rx_sge.addr   = rx->rx_msgaddr;
			
 
				-	rx->rx_sge.length = IBLND_MSG_SIZE;
			
 
				-
			
 
				-	rx->rx_wrq.next    = NULL;
			
 
				-	rx->rx_wrq.sg_list = &rx->rx_sge;
			
 
				-	rx->rx_wrq.num_sge = 1;
			
 
				-	rx->rx_wrq.wr_id   = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
			
 
				-
			
 
				-	LASSERT(conn->ibc_state >= IBLND_CONN_INIT);
			
 
				-	LASSERT(rx->rx_nob >= 0);	      /* not posted */
			
 
				-
			
 
				-	if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
			
 
				-		kiblnd_drop_rx(rx);	     /* No more posts for this rx */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	rx->rx_nob = -1;			/* flag posted */
			
 
				-
			
 
				-	/* NB: need an extra reference after ib_post_recv because we don't
			
 
				-	 * own this rx (and rx::rx_conn) anymore, LU-5678.
			
 
				-	 */
			
 
				-	kiblnd_conn_addref(conn);
			
 
				-	rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
			
 
				-	if (unlikely(rc)) {
			
 
				-		CERROR("Can't post rx for %s: %d, bad_wrq: %p\n",
			
 
				-		       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq);
			
 
				-		rx->rx_nob = 0;
			
 
				-	}
			
 
				-
			
 
				-	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
			
 
				-		goto out;
			
 
				-
			
 
				-	if (unlikely(rc)) {
			
 
				-		kiblnd_close_conn(conn, rc);
			
 
				-		kiblnd_drop_rx(rx);	     /* No more posts for this rx */
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (credit == IBLND_POSTRX_NO_CREDIT)
			
 
				-		goto out;
			
 
				-
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-	if (credit == IBLND_POSTRX_PEER_CREDIT)
			
 
				-		conn->ibc_outstanding_credits++;
			
 
				-	else
			
 
				-		conn->ibc_reserved_credits++;
			
 
				-	kiblnd_check_sends_locked(conn);
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-out:
			
 
				-	kiblnd_conn_decref(conn);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static struct kib_tx *
			
 
				-kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-
			
 
				-	list_for_each(tmp, &conn->ibc_active_txs) {
			
 
				-		struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list);
			
 
				-
			
 
				-		LASSERT(!tx->tx_queued);
			
 
				-		LASSERT(tx->tx_sending || tx->tx_waiting);
			
 
				-
			
 
				-		if (tx->tx_cookie != cookie)
			
 
				-			continue;
			
 
				-
			
 
				-		if (tx->tx_waiting &&
			
 
				-		    tx->tx_msg->ibm_type == txtype)
			
 
				-			return tx;
			
 
				-
			
 
				-		CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
			
 
				-		      tx->tx_waiting ? "" : "NOT ",
			
 
				-		      tx->tx_msg->ibm_type, txtype);
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie)
			
 
				-{
			
 
				-	struct kib_tx *tx;
			
 
				-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
			
 
				-	int idle;
			
 
				-
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-
			
 
				-	tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
			
 
				-	if (!tx) {
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-		CWARN("Unmatched completion type %x cookie %#llx from %s\n",
			
 
				-		      txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		kiblnd_close_conn(conn, -EPROTO);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (!tx->tx_status) {	       /* success so far */
			
 
				-		if (status < 0) /* failed? */
			
 
				-			tx->tx_status = status;
			
 
				-		else if (txtype == IBLND_MSG_GET_REQ)
			
 
				-			lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_waiting = 0;
			
 
				-
			
 
				-	idle = !tx->tx_queued && !tx->tx_sending;
			
 
				-	if (idle)
			
 
				-		list_del(&tx->tx_list);
			
 
				-
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-	if (idle)
			
 
				-		kiblnd_tx_done(ni, tx);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie)
			
 
				-{
			
 
				-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
			
 
				-	struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
			
 
				-
			
 
				-	if (!tx) {
			
 
				-		CERROR("Can't get tx for completion %x for %s\n",
			
 
				-		       type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_msg->ibm_u.completion.ibcm_status = status;
			
 
				-	tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
			
 
				-	kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg));
			
 
				-
			
 
				-	kiblnd_queue_tx(tx, conn);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_handle_rx(struct kib_rx *rx)
			
 
				-{
			
 
				-	struct kib_msg *msg = rx->rx_msg;
			
 
				-	struct kib_conn *conn = rx->rx_conn;
			
 
				-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
			
 
				-	int credits = msg->ibm_credits;
			
 
				-	struct kib_tx *tx;
			
 
				-	int rc = 0;
			
 
				-	int rc2;
			
 
				-	int post_credit;
			
 
				-
			
 
				-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Received %x[%d] from %s\n",
			
 
				-	       msg->ibm_type, credits,
			
 
				-	       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-
			
 
				-	if (credits) {
			
 
				-		/* Have I received credits that will let me send? */
			
 
				-		spin_lock(&conn->ibc_lock);
			
 
				-
			
 
				-		if (conn->ibc_credits + credits >
			
 
				-		    conn->ibc_queue_depth) {
			
 
				-			rc2 = conn->ibc_credits;
			
 
				-			spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-			CERROR("Bad credits from %s: %d + %d > %d\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-			       rc2, credits, conn->ibc_queue_depth);
			
 
				-
			
 
				-			kiblnd_close_conn(conn, -EPROTO);
			
 
				-			kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		conn->ibc_credits += credits;
			
 
				-
			
 
				-		/* This ensures the credit taken by NOOP can be returned */
			
 
				-		if (msg->ibm_type == IBLND_MSG_NOOP &&
			
 
				-		    !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
			
 
				-			conn->ibc_outstanding_credits++;
			
 
				-
			
 
				-		kiblnd_check_sends_locked(conn);
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-	}
			
 
				-
			
 
				-	switch (msg->ibm_type) {
			
 
				-	default:
			
 
				-		CERROR("Bad IBLND message type %x from %s\n",
			
 
				-		       msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				-		rc = -EPROTO;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_NOOP:
			
 
				-		if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
			
 
				-			post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (credits) /* credit already posted */
			
 
				-			post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				-		else	      /* a keepalive NOOP */
			
 
				-			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_IMMEDIATE:
			
 
				-		post_credit = IBLND_POSTRX_DONT_POST;
			
 
				-		rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
			
 
				-				msg->ibm_srcnid, rx, 0);
			
 
				-		if (rc < 0)		     /* repost on error */
			
 
				-			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_REQ:
			
 
				-		post_credit = IBLND_POSTRX_DONT_POST;
			
 
				-		rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
			
 
				-				msg->ibm_srcnid, rx, 1);
			
 
				-		if (rc < 0)		     /* repost on error */
			
 
				-			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_NAK:
			
 
				-		CWARN("PUT_NACK from %s\n",
			
 
				-		      libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
			
 
				-		kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
			
 
				-					 msg->ibm_u.completion.ibcm_status,
			
 
				-					 msg->ibm_u.completion.ibcm_cookie);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_ACK:
			
 
				-		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
			
 
				-
			
 
				-		spin_lock(&conn->ibc_lock);
			
 
				-		tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
			
 
				-						   msg->ibm_u.putack.ibpam_src_cookie);
			
 
				-		if (tx)
			
 
				-			list_del(&tx->tx_list);
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-		if (!tx) {
			
 
				-			CERROR("Unmatched PUT_ACK from %s\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-			rc = -EPROTO;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(tx->tx_waiting);
			
 
				-		/*
			
 
				-		 * CAVEAT EMPTOR: I could be racing with tx_complete, but...
			
 
				-		 * (a) I can overwrite tx_msg since my peer has received it!
			
 
				-		 * (b) tx_waiting set tells tx_complete() it's not done.
			
 
				-		 */
			
 
				-		tx->tx_nwrq = 0;		/* overwrite PUT_REQ */
			
 
				-
			
 
				-		rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE,
			
 
				-				       kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
			
 
				-				       &msg->ibm_u.putack.ibpam_rd,
			
 
				-				       msg->ibm_u.putack.ibpam_dst_cookie);
			
 
				-		if (rc2 < 0)
			
 
				-			CERROR("Can't setup rdma for PUT to %s: %d\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
			
 
				-
			
 
				-		spin_lock(&conn->ibc_lock);
			
 
				-		tx->tx_waiting = 0;	/* clear waiting and queue atomically */
			
 
				-		kiblnd_queue_tx_locked(tx, conn);
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_DONE:
			
 
				-		post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				-		kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
			
 
				-					 msg->ibm_u.completion.ibcm_status,
			
 
				-					 msg->ibm_u.completion.ibcm_cookie);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_GET_REQ:
			
 
				-		post_credit = IBLND_POSTRX_DONT_POST;
			
 
				-		rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
			
 
				-				msg->ibm_srcnid, rx, 1);
			
 
				-		if (rc < 0)		     /* repost on error */
			
 
				-			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_GET_DONE:
			
 
				-		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
			
 
				-		kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
			
 
				-					 msg->ibm_u.completion.ibcm_status,
			
 
				-					 msg->ibm_u.completion.ibcm_cookie);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	if (rc < 0)			     /* protocol error */
			
 
				-		kiblnd_close_conn(conn, rc);
			
 
				-
			
 
				-	if (post_credit != IBLND_POSTRX_DONT_POST)
			
 
				-		kiblnd_post_rx(rx, post_credit);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
			
 
				-{
			
 
				-	struct kib_msg *msg = rx->rx_msg;
			
 
				-	struct kib_conn *conn = rx->rx_conn;
			
 
				-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	int rc;
			
 
				-	int err = -EIO;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(rx->rx_nob < 0);	       /* was posted */
			
 
				-	rx->rx_nob = 0;			 /* isn't now */
			
 
				-
			
 
				-	if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
			
 
				-		goto ignore;
			
 
				-
			
 
				-	if (status != IB_WC_SUCCESS) {
			
 
				-		CNETERR("Rx from %s failed: %d\n",
			
 
				-			libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(nob >= 0);
			
 
				-	rx->rx_nob = nob;
			
 
				-
			
 
				-	rc = kiblnd_unpack_msg(msg, rx->rx_nob);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d unpacking rx from %s\n",
			
 
				-		       rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
			
 
				-	    msg->ibm_dstnid != ni->ni_nid ||
			
 
				-	    msg->ibm_srcstamp != conn->ibc_incarnation ||
			
 
				-	    msg->ibm_dststamp != net->ibn_incarnation) {
			
 
				-		CERROR("Stale rx from %s\n",
			
 
				-		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		err = -ESTALE;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* set time last known alive */
			
 
				-	kiblnd_peer_alive(conn->ibc_peer);
			
 
				-
			
 
				-	/* racing with connection establishment/teardown! */
			
 
				-
			
 
				-	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				-		rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
			
 
				-		unsigned long flags;
			
 
				-
			
 
				-		write_lock_irqsave(g_lock, flags);
			
 
				-		/* must check holding global lock to eliminate race */
			
 
				-		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				-			list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-			return;
			
 
				-		}
			
 
				-		write_unlock_irqrestore(g_lock, flags);
			
 
				-	}
			
 
				-	kiblnd_handle_rx(rx);
			
 
				-	return;
			
 
				-
			
 
				- failed:
			
 
				-	CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
			
 
				-	kiblnd_close_conn(conn, err);
			
 
				- ignore:
			
 
				-	kiblnd_drop_rx(rx);		     /* Don't re-post rx. */
			
 
				-}
			
 
				-
			
 
				-static struct page *
			
 
				-kiblnd_kvaddr_to_page(unsigned long vaddr)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-
			
 
				-	if (is_vmalloc_addr((void *)vaddr)) {
			
 
				-		page = vmalloc_to_page((void *)vaddr);
			
 
				-		LASSERT(page);
			
 
				-		return page;
			
 
				-	}
			
 
				-#ifdef CONFIG_HIGHMEM
			
 
				-	if (vaddr >= PKMAP_BASE &&
			
 
				-	    vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
			
 
				-		/* No highmem pages only used for bulk (kiov) I/O */
			
 
				-		CERROR("find page for address in highmem\n");
			
 
				-		LBUG();
			
 
				-	}
			
 
				-#endif
			
 
				-	page = virt_to_page(vaddr);
			
 
				-	LASSERT(page);
			
 
				-	return page;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob)
			
 
				-{
			
 
				-	struct kib_hca_dev *hdev;
			
 
				-	struct kib_fmr_poolset *fps;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(tx->tx_pool);
			
 
				-	LASSERT(tx->tx_pool->tpo_pool.po_owner);
			
 
				-
			
 
				-	hdev = tx->tx_pool->tpo_hdev;
			
 
				-	cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
			
 
				-
			
 
				-	fps = net->ibn_fmr_ps[cpt];
			
 
				-	rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't map %u bytes: %d\n", nob, rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If rd is not tx_rd, it's going to get sent to a peer, who will need
			
 
				-	 * the rkey
			
 
				-	 */
			
 
				-	rd->rd_key = tx->fmr.fmr_key;
			
 
				-	rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
			
 
				-	rd->rd_frags[0].rf_nob = nob;
			
 
				-	rd->rd_nfrags = 1;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void kiblnd_unmap_tx(struct kib_tx *tx)
			
 
				-{
			
 
				-	if (tx->fmr.fmr_pfmr || tx->fmr.fmr_frd)
			
 
				-		kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
			
 
				-
			
 
				-	if (tx->tx_nfrags) {
			
 
				-		kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
			
 
				-				    tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
			
 
				-		tx->tx_nfrags = 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
			
 
				-			 struct kib_rdma_desc *rd, int nfrags)
			
 
				-{
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev;
			
 
				-	__u32 nob;
			
 
				-	int i;
			
 
				-
			
 
				-	/*
			
 
				-	 * If rd is not tx_rd, it's going to get sent to a peer and I'm the
			
 
				-	 * RDMA sink
			
 
				-	 */
			
 
				-	tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
			
 
				-	tx->tx_nfrags = nfrags;
			
 
				-
			
 
				-	rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags,
			
 
				-					  tx->tx_nfrags, tx->tx_dmadir);
			
 
				-
			
 
				-	for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
			
 
				-		rd->rd_frags[i].rf_nob  = kiblnd_sg_dma_len(
			
 
				-			hdev->ibh_ibdev, &tx->tx_frags[i]);
			
 
				-		rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
			
 
				-			hdev->ibh_ibdev, &tx->tx_frags[i]);
			
 
				-		nob += rd->rd_frags[i].rf_nob;
			
 
				-	}
			
 
				-
			
 
				-	if (net->ibn_fmr_ps)
			
 
				-		return kiblnd_fmr_map_tx(net, tx, rd, nob);
			
 
				-
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx,
			
 
				-		    struct kib_rdma_desc *rd, unsigned int niov,
			
 
				-		    const struct kvec *iov, int offset, int nob)
			
 
				-{
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	struct page *page;
			
 
				-	struct scatterlist *sg;
			
 
				-	unsigned long vaddr;
			
 
				-	int fragnob;
			
 
				-	int page_offset;
			
 
				-
			
 
				-	LASSERT(nob > 0);
			
 
				-	LASSERT(niov > 0);
			
 
				-	LASSERT(net);
			
 
				-
			
 
				-	while (offset >= iov->iov_len) {
			
 
				-		offset -= iov->iov_len;
			
 
				-		niov--;
			
 
				-		iov++;
			
 
				-		LASSERT(niov > 0);
			
 
				-	}
			
 
				-
			
 
				-	sg = tx->tx_frags;
			
 
				-	do {
			
 
				-		LASSERT(niov > 0);
			
 
				-
			
 
				-		vaddr = ((unsigned long)iov->iov_base) + offset;
			
 
				-		page_offset = vaddr & (PAGE_SIZE - 1);
			
 
				-		page = kiblnd_kvaddr_to_page(vaddr);
			
 
				-		if (!page) {
			
 
				-			CERROR("Can't find page\n");
			
 
				-			return -EFAULT;
			
 
				-		}
			
 
				-
			
 
				-		fragnob = min((int)(iov->iov_len - offset), nob);
			
 
				-		fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
			
 
				-
			
 
				-		sg_set_page(sg, page, fragnob, page_offset);
			
 
				-		sg = sg_next(sg);
			
 
				-		if (!sg) {
			
 
				-			CERROR("lacking enough sg entries to map tx\n");
			
 
				-			return -EFAULT;
			
 
				-		}
			
 
				-
			
 
				-		if (offset + fragnob < iov->iov_len) {
			
 
				-			offset += fragnob;
			
 
				-		} else {
			
 
				-			offset = 0;
			
 
				-			iov++;
			
 
				-			niov--;
			
 
				-		}
			
 
				-		nob -= fragnob;
			
 
				-	} while (nob > 0);
			
 
				-
			
 
				-	return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
			
 
				-		     struct kib_rdma_desc *rd, int nkiov,
			
 
				-		     const struct bio_vec *kiov, int offset, int nob)
			
 
				-{
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	struct scatterlist *sg;
			
 
				-	int fragnob;
			
 
				-
			
 
				-	CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
			
 
				-
			
 
				-	LASSERT(nob > 0);
			
 
				-	LASSERT(nkiov > 0);
			
 
				-	LASSERT(net);
			
 
				-
			
 
				-	while (offset >= kiov->bv_len) {
			
 
				-		offset -= kiov->bv_len;
			
 
				-		nkiov--;
			
 
				-		kiov++;
			
 
				-		LASSERT(nkiov > 0);
			
 
				-	}
			
 
				-
			
 
				-	sg = tx->tx_frags;
			
 
				-	do {
			
 
				-		LASSERT(nkiov > 0);
			
 
				-
			
 
				-		fragnob = min((int)(kiov->bv_len - offset), nob);
			
 
				-
			
 
				-		sg_set_page(sg, kiov->bv_page, fragnob,
			
 
				-			    kiov->bv_offset + offset);
			
 
				-		sg = sg_next(sg);
			
 
				-		if (!sg) {
			
 
				-			CERROR("lacking enough sg entries to map tx\n");
			
 
				-			return -EFAULT;
			
 
				-		}
			
 
				-
			
 
				-		offset = 0;
			
 
				-		kiov++;
			
 
				-		nkiov--;
			
 
				-		nob -= fragnob;
			
 
				-	} while (nob > 0);
			
 
				-
			
 
				-	return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
			
 
				-	__must_hold(&conn->ibc_lock)
			
 
				-{
			
 
				-	struct kib_msg *msg = tx->tx_msg;
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-	struct lnet_ni *ni = peer->ibp_ni;
			
 
				-	int ver = conn->ibc_version;
			
 
				-	int rc;
			
 
				-	int done;
			
 
				-
			
 
				-	LASSERT(tx->tx_queued);
			
 
				-	/* We rely on this for QP sizing */
			
 
				-	LASSERT(tx->tx_nwrq > 0);
			
 
				-
			
 
				-	LASSERT(!credit || credit == 1);
			
 
				-	LASSERT(conn->ibc_outstanding_credits >= 0);
			
 
				-	LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth);
			
 
				-	LASSERT(conn->ibc_credits >= 0);
			
 
				-	LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
			
 
				-
			
 
				-	if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) {
			
 
				-		/* tx completions outstanding... */
			
 
				-		CDEBUG(D_NET, "%s: posted enough\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid));
			
 
				-		return -EAGAIN;
			
 
				-	}
			
 
				-
			
 
				-	if (credit && !conn->ibc_credits) {   /* no credits */
			
 
				-		CDEBUG(D_NET, "%s: no credits\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid));
			
 
				-		return -EAGAIN;
			
 
				-	}
			
 
				-
			
 
				-	if (credit && !IBLND_OOB_CAPABLE(ver) &&
			
 
				-	    conn->ibc_credits == 1 &&   /* last credit reserved */
			
 
				-	    msg->ibm_type != IBLND_MSG_NOOP) {      /* for NOOP */
			
 
				-		CDEBUG(D_NET, "%s: not using last credit\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid));
			
 
				-		return -EAGAIN;
			
 
				-	}
			
 
				-
			
 
				-	/* NB don't drop ibc_lock before bumping tx_sending */
			
 
				-	list_del(&tx->tx_list);
			
 
				-	tx->tx_queued = 0;
			
 
				-
			
 
				-	if (msg->ibm_type == IBLND_MSG_NOOP &&
			
 
				-	    (!kiblnd_need_noop(conn) ||     /* redundant NOOP */
			
 
				-	     (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */
			
 
				-	      conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
			
 
				-		/*
			
 
				-		 * OK to drop when posted enough NOOPs, since
			
 
				-		 * kiblnd_check_sends_locked will queue NOOP again when
			
 
				-		 * posted NOOPs complete
			
 
				-		 */
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-		kiblnd_tx_done(peer->ibp_ni, tx);
			
 
				-		spin_lock(&conn->ibc_lock);
			
 
				-		CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid),
			
 
				-		       conn->ibc_noops_posted);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
			
 
				-			peer->ibp_nid, conn->ibc_incarnation);
			
 
				-
			
 
				-	conn->ibc_credits -= credit;
			
 
				-	conn->ibc_outstanding_credits = 0;
			
 
				-	conn->ibc_nsends_posted++;
			
 
				-	if (msg->ibm_type == IBLND_MSG_NOOP)
			
 
				-		conn->ibc_noops_posted++;
			
 
				-
			
 
				-	/*
			
 
				-	 * CAVEAT EMPTOR!  This tx could be the PUT_DONE of an RDMA
			
 
				-	 * PUT.  If so, it was first queued here as a PUT_REQ, sent and
			
 
				-	 * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
			
 
				-	 * and then re-queued here.  It's (just) possible that
			
 
				-	 * tx_sending is non-zero if we've not done the tx_complete()
			
 
				-	 * from the first send; hence the ++ rather than = below.
			
 
				-	 */
			
 
				-	tx->tx_sending++;
			
 
				-	list_add(&tx->tx_list, &conn->ibc_active_txs);
			
 
				-
			
 
				-	/* I'm still holding ibc_lock! */
			
 
				-	if (conn->ibc_state != IBLND_CONN_ESTABLISHED) {
			
 
				-		rc = -ECONNABORTED;
			
 
				-	} else if (tx->tx_pool->tpo_pool.po_failed ||
			
 
				-		 conn->ibc_hdev != tx->tx_pool->tpo_hdev) {
			
 
				-		/* close_conn will launch failover */
			
 
				-		rc = -ENETDOWN;
			
 
				-	} else {
			
 
				-		struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
			
 
				-		struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
			
 
				-		struct ib_send_wr *wrq = &tx->tx_wrq[0].wr;
			
 
				-
			
 
				-		if (frd) {
			
 
				-			if (!frd->frd_valid) {
			
 
				-				wrq = &frd->frd_inv_wr;
			
 
				-				wrq->next = &frd->frd_fastreg_wr.wr;
			
 
				-			} else {
			
 
				-				wrq = &frd->frd_fastreg_wr.wr;
			
 
				-			}
			
 
				-			frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr;
			
 
				-		}
			
 
				-
			
 
				-		LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
			
 
				-			 "bad wr_id %llx, opc %d, flags %d, peer: %s\n",
			
 
				-			 bad->wr_id, bad->opcode, bad->send_flags,
			
 
				-			 libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		bad = NULL;
			
 
				-		rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
			
 
				-	}
			
 
				-
			
 
				-	conn->ibc_last_send = jiffies;
			
 
				-
			
 
				-	if (!rc)
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * NB credits are transferred in the actual
			
 
				-	 * message, which can only be the last work item
			
 
				-	 */
			
 
				-	conn->ibc_credits += credit;
			
 
				-	conn->ibc_outstanding_credits += msg->ibm_credits;
			
 
				-	conn->ibc_nsends_posted--;
			
 
				-	if (msg->ibm_type == IBLND_MSG_NOOP)
			
 
				-		conn->ibc_noops_posted--;
			
 
				-
			
 
				-	tx->tx_status = rc;
			
 
				-	tx->tx_waiting = 0;
			
 
				-	tx->tx_sending--;
			
 
				-
			
 
				-	done = !tx->tx_sending;
			
 
				-	if (done)
			
 
				-		list_del(&tx->tx_list);
			
 
				-
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-	if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
			
 
				-		CERROR("Error %d posting transmit to %s\n",
			
 
				-		       rc, libcfs_nid2str(peer->ibp_nid));
			
 
				-	else
			
 
				-		CDEBUG(D_NET, "Error %d posting transmit to %s\n",
			
 
				-		       rc, libcfs_nid2str(peer->ibp_nid));
			
 
				-
			
 
				-	kiblnd_close_conn(conn, rc);
			
 
				-
			
 
				-	if (done)
			
 
				-		kiblnd_tx_done(peer->ibp_ni, tx);
			
 
				-
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_check_sends_locked(struct kib_conn *conn)
			
 
				-{
			
 
				-	int ver = conn->ibc_version;
			
 
				-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
			
 
				-	struct kib_tx *tx;
			
 
				-
			
 
				-	/* Don't send anything until after the connection is established */
			
 
				-	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				-		CDEBUG(D_NET, "%s too soon\n",
			
 
				-		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
			
 
				-	LASSERT(!IBLND_OOB_CAPABLE(ver) ||
			
 
				-		conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
			
 
				-	LASSERT(conn->ibc_reserved_credits >= 0);
			
 
				-
			
 
				-	while (conn->ibc_reserved_credits > 0 &&
			
 
				-	       !list_empty(&conn->ibc_tx_queue_rsrvd)) {
			
 
				-		tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
			
 
				-				struct kib_tx, tx_list);
			
 
				-		list_del(&tx->tx_list);
			
 
				-		list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
			
 
				-		conn->ibc_reserved_credits--;
			
 
				-	}
			
 
				-
			
 
				-	if (kiblnd_need_noop(conn)) {
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-		tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
			
 
				-		if (tx)
			
 
				-			kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
			
 
				-
			
 
				-		spin_lock(&conn->ibc_lock);
			
 
				-		if (tx)
			
 
				-			kiblnd_queue_tx_locked(tx, conn);
			
 
				-	}
			
 
				-
			
 
				-	for (;;) {
			
 
				-		int credit;
			
 
				-
			
 
				-		if (!list_empty(&conn->ibc_tx_queue_nocred)) {
			
 
				-			credit = 0;
			
 
				-			tx = list_entry(conn->ibc_tx_queue_nocred.next,
			
 
				-					struct kib_tx, tx_list);
			
 
				-		} else if (!list_empty(&conn->ibc_tx_noops)) {
			
 
				-			LASSERT(!IBLND_OOB_CAPABLE(ver));
			
 
				-			credit = 1;
			
 
				-			tx = list_entry(conn->ibc_tx_noops.next,
			
 
				-					struct kib_tx, tx_list);
			
 
				-		} else if (!list_empty(&conn->ibc_tx_queue)) {
			
 
				-			credit = 1;
			
 
				-			tx = list_entry(conn->ibc_tx_queue.next,
			
 
				-					struct kib_tx, tx_list);
			
 
				-		} else {
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (kiblnd_post_tx_locked(conn, tx, credit))
			
 
				-			break;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_tx_complete(struct kib_tx *tx, int status)
			
 
				-{
			
 
				-	int failed = (status != IB_WC_SUCCESS);
			
 
				-	struct kib_conn *conn = tx->tx_conn;
			
 
				-	int idle;
			
 
				-
			
 
				-	LASSERT(tx->tx_sending > 0);
			
 
				-
			
 
				-	if (failed) {
			
 
				-		if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
			
 
				-			CNETERR("Tx -> %s cookie %#llx sending %d waiting %d: failed %d\n",
			
 
				-				libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-				tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
			
 
				-				status);
			
 
				-
			
 
				-		kiblnd_close_conn(conn, -EIO);
			
 
				-	} else {
			
 
				-		kiblnd_peer_alive(conn->ibc_peer);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * I could be racing with rdma completion.  Whoever makes 'tx' idle
			
 
				-	 * gets to free it, which also drops its ref on 'conn'.
			
 
				-	 */
			
 
				-	tx->tx_sending--;
			
 
				-	conn->ibc_nsends_posted--;
			
 
				-	if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP)
			
 
				-		conn->ibc_noops_posted--;
			
 
				-
			
 
				-	if (failed) {
			
 
				-		tx->tx_waiting = 0;	     /* don't wait for peer */
			
 
				-		tx->tx_status = -EIO;
			
 
				-	}
			
 
				-
			
 
				-	idle = !tx->tx_sending &&	 /* This is the final callback */
			
 
				-	       !tx->tx_waiting &&	       /* Not waiting for peer */
			
 
				-	       !tx->tx_queued;		  /* Not re-queued (PUT_DONE) */
			
 
				-	if (idle)
			
 
				-		list_del(&tx->tx_list);
			
 
				-
			
 
				-	kiblnd_check_sends_locked(conn);
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-	if (idle)
			
 
				-		kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
			
 
				-		   int body_nob)
			
 
				-{
			
 
				-	struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev;
			
 
				-	struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
			
 
				-	struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
			
 
				-	int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
			
 
				-
			
 
				-	LASSERT(tx->tx_nwrq >= 0);
			
 
				-	LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
			
 
				-	LASSERT(nob <= IBLND_MSG_SIZE);
			
 
				-
			
 
				-	kiblnd_init_msg(tx->tx_msg, type, body_nob);
			
 
				-
			
 
				-	sge->lkey   = hdev->ibh_pd->local_dma_lkey;
			
 
				-	sge->addr   = tx->tx_msgaddr;
			
 
				-	sge->length = nob;
			
 
				-
			
 
				-	memset(wrq, 0, sizeof(*wrq));
			
 
				-
			
 
				-	wrq->wr.next       = NULL;
			
 
				-	wrq->wr.wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
			
 
				-	wrq->wr.sg_list    = sge;
			
 
				-	wrq->wr.num_sge    = 1;
			
 
				-	wrq->wr.opcode     = IB_WR_SEND;
			
 
				-	wrq->wr.send_flags = IB_SEND_SIGNALED;
			
 
				-
			
 
				-	tx->tx_nwrq++;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
			
 
				-		 int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie)
			
 
				-{
			
 
				-	struct kib_msg *ibmsg = tx->tx_msg;
			
 
				-	struct kib_rdma_desc *srcrd = tx->tx_rd;
			
 
				-	struct ib_sge *sge = &tx->tx_sge[0];
			
 
				-	struct ib_rdma_wr *wrq, *next;
			
 
				-	int rc  = resid;
			
 
				-	int srcidx = 0;
			
 
				-	int dstidx = 0;
			
 
				-	int wrknob;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(!tx->tx_nwrq);
			
 
				-	LASSERT(type == IBLND_MSG_GET_DONE ||
			
 
				-		type == IBLND_MSG_PUT_DONE);
			
 
				-
			
 
				-	if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
			
 
				-		CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
			
 
				-		       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-		       conn->ibc_max_frags << PAGE_SHIFT,
			
 
				-		       kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
			
 
				-		rc = -EMSGSIZE;
			
 
				-		goto too_big;
			
 
				-	}
			
 
				-
			
 
				-	while (resid > 0) {
			
 
				-		if (srcidx >= srcrd->rd_nfrags) {
			
 
				-			CERROR("Src buffer exhausted: %d frags\n", srcidx);
			
 
				-			rc = -EPROTO;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (dstidx == dstrd->rd_nfrags) {
			
 
				-			CERROR("Dst buffer exhausted: %d frags\n", dstidx);
			
 
				-			rc = -EPROTO;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
			
 
				-			CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-			       IBLND_MAX_RDMA_FRAGS,
			
 
				-			       srcidx, srcrd->rd_nfrags,
			
 
				-			       dstidx, dstrd->rd_nfrags);
			
 
				-			rc = -EMSGSIZE;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		wrknob = min3(kiblnd_rd_frag_size(srcrd, srcidx),
			
 
				-			      kiblnd_rd_frag_size(dstrd, dstidx),
			
 
				-			      (__u32)resid);
			
 
				-
			
 
				-		sge = &tx->tx_sge[tx->tx_nwrq];
			
 
				-		sge->addr   = kiblnd_rd_frag_addr(srcrd, srcidx);
			
 
				-		sge->lkey   = kiblnd_rd_frag_key(srcrd, srcidx);
			
 
				-		sge->length = wrknob;
			
 
				-
			
 
				-		wrq = &tx->tx_wrq[tx->tx_nwrq];
			
 
				-		next = wrq + 1;
			
 
				-
			
 
				-		wrq->wr.next       = &next->wr;
			
 
				-		wrq->wr.wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
			
 
				-		wrq->wr.sg_list    = sge;
			
 
				-		wrq->wr.num_sge    = 1;
			
 
				-		wrq->wr.opcode     = IB_WR_RDMA_WRITE;
			
 
				-		wrq->wr.send_flags = 0;
			
 
				-
			
 
				-		wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
			
 
				-		wrq->rkey        = kiblnd_rd_frag_key(dstrd, dstidx);
			
 
				-
			
 
				-		srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
			
 
				-		dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
			
 
				-
			
 
				-		resid -= wrknob;
			
 
				-
			
 
				-		tx->tx_nwrq++;
			
 
				-		wrq++;
			
 
				-		sge++;
			
 
				-	}
			
 
				-too_big:
			
 
				-	if (rc < 0)			     /* no RDMA if completing with failure */
			
 
				-		tx->tx_nwrq = 0;
			
 
				-
			
 
				-	ibmsg->ibm_u.completion.ibcm_status = rc;
			
 
				-	ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
			
 
				-	kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
			
 
				-			   type, sizeof(struct kib_completion_msg));
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn)
			
 
				-{
			
 
				-	struct list_head *q;
			
 
				-
			
 
				-	LASSERT(tx->tx_nwrq > 0);	      /* work items set up */
			
 
				-	LASSERT(!tx->tx_queued);	       /* not queued for sending already */
			
 
				-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				-
			
 
				-	tx->tx_queued = 1;
			
 
				-	tx->tx_deadline = jiffies +
			
 
				-			  msecs_to_jiffies(*kiblnd_tunables.kib_timeout *
			
 
				-					   MSEC_PER_SEC);
			
 
				-
			
 
				-	if (!tx->tx_conn) {
			
 
				-		kiblnd_conn_addref(conn);
			
 
				-		tx->tx_conn = conn;
			
 
				-		LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
			
 
				-	} else {
			
 
				-		/* PUT_DONE first attached to conn as a PUT_REQ */
			
 
				-		LASSERT(tx->tx_conn == conn);
			
 
				-		LASSERT(tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
			
 
				-	}
			
 
				-
			
 
				-	switch (tx->tx_msg->ibm_type) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case IBLND_MSG_PUT_REQ:
			
 
				-	case IBLND_MSG_GET_REQ:
			
 
				-		q = &conn->ibc_tx_queue_rsrvd;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_NAK:
			
 
				-	case IBLND_MSG_PUT_ACK:
			
 
				-	case IBLND_MSG_PUT_DONE:
			
 
				-	case IBLND_MSG_GET_DONE:
			
 
				-		q = &conn->ibc_tx_queue_nocred;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_NOOP:
			
 
				-		if (IBLND_OOB_CAPABLE(conn->ibc_version))
			
 
				-			q = &conn->ibc_tx_queue_nocred;
			
 
				-		else
			
 
				-			q = &conn->ibc_tx_noops;
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_IMMEDIATE:
			
 
				-		q = &conn->ibc_tx_queue;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	list_add_tail(&tx->tx_list, q);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
			
 
				-{
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-	kiblnd_queue_tx_locked(tx, conn);
			
 
				-	kiblnd_check_sends_locked(conn);
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-}
			
 
				-
			
 
				-static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
			
 
				-			       struct sockaddr_in *srcaddr,
			
 
				-			       struct sockaddr_in *dstaddr,
			
 
				-			       int timeout_ms)
			
 
				-{
			
 
				-	unsigned short port;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* allow the port to be reused */
			
 
				-	rc = rdma_set_reuseaddr(cmid, 1);
			
 
				-	if (rc) {
			
 
				-		CERROR("Unable to set reuse on cmid: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* look for a free privileged port */
			
 
				-	for (port = PROT_SOCK - 1; port > 0; port--) {
			
 
				-		srcaddr->sin_port = htons(port);
			
 
				-		rc = rdma_resolve_addr(cmid,
			
 
				-				       (struct sockaddr *)srcaddr,
			
 
				-				       (struct sockaddr *)dstaddr,
			
 
				-				       timeout_ms);
			
 
				-		if (!rc) {
			
 
				-			CDEBUG(D_NET, "bound to port %hu\n", port);
			
 
				-			return 0;
			
 
				-		} else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
			
 
				-			CDEBUG(D_NET, "bind to port %hu failed: %d\n",
			
 
				-			       port, rc);
			
 
				-		} else {
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	CERROR("Failed to bind to a free privileged port\n");
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_connect_peer(struct kib_peer *peer)
			
 
				-{
			
 
				-	struct rdma_cm_id *cmid;
			
 
				-	struct kib_dev *dev;
			
 
				-	struct kib_net *net = peer->ibp_ni->ni_data;
			
 
				-	struct sockaddr_in srcaddr;
			
 
				-	struct sockaddr_in dstaddr;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-	LASSERT(peer->ibp_connecting > 0);
			
 
				-
			
 
				-	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
			
 
				-				     IB_QPT_RC);
			
 
				-
			
 
				-	if (IS_ERR(cmid)) {
			
 
				-		CERROR("Can't create CMID for %s: %ld\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
			
 
				-		rc = PTR_ERR(cmid);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	dev = net->ibn_dev;
			
 
				-	memset(&srcaddr, 0, sizeof(srcaddr));
			
 
				-	srcaddr.sin_family = AF_INET;
			
 
				-	srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
			
 
				-
			
 
				-	memset(&dstaddr, 0, sizeof(dstaddr));
			
 
				-	dstaddr.sin_family = AF_INET;
			
 
				-	dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
			
 
				-	dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
			
 
				-
			
 
				-	kiblnd_peer_addref(peer);	       /* cmid's ref */
			
 
				-
			
 
				-	if (*kiblnd_tunables.kib_use_priv_port) {
			
 
				-		rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
			
 
				-					 *kiblnd_tunables.kib_timeout * 1000);
			
 
				-	} else {
			
 
				-		rc = rdma_resolve_addr(cmid,
			
 
				-				       (struct sockaddr *)&srcaddr,
			
 
				-				       (struct sockaddr *)&dstaddr,
			
 
				-				       *kiblnd_tunables.kib_timeout * 1000);
			
 
				-	}
			
 
				-	if (rc) {
			
 
				-		/* Can't initiate address resolution:  */
			
 
				-		CERROR("Can't resolve addr for %s: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				-		goto failed2;
			
 
				-	}
			
 
				-
			
 
				-	return;
			
 
				-
			
 
				- failed2:
			
 
				-	kiblnd_peer_connect_failed(peer, 1, rc);
			
 
				-	kiblnd_peer_decref(peer);	       /* cmid's ref */
			
 
				-	rdma_destroy_id(cmid);
			
 
				-	return;
			
 
				- failed:
			
 
				-	kiblnd_peer_connect_failed(peer, 1, rc);
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-kiblnd_reconnect_peer(struct kib_peer *peer)
			
 
				-{
			
 
				-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
			
 
				-	char *reason = NULL;
			
 
				-	struct list_head txs;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&txs);
			
 
				-
			
 
				-	write_lock_irqsave(glock, flags);
			
 
				-	if (!peer->ibp_reconnecting) {
			
 
				-		if (peer->ibp_accepting)
			
 
				-			reason = "accepting";
			
 
				-		else if (peer->ibp_connecting)
			
 
				-			reason = "connecting";
			
 
				-		else if (!list_empty(&peer->ibp_conns))
			
 
				-			reason = "connected";
			
 
				-		else /* connected then closed */
			
 
				-			reason = "closed";
			
 
				-
			
 
				-		goto no_reconnect;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
			
 
				-		list_empty(&peer->ibp_conns));
			
 
				-	peer->ibp_reconnecting--;
			
 
				-
			
 
				-	if (!kiblnd_peer_active(peer)) {
			
 
				-		list_splice_init(&peer->ibp_tx_queue, &txs);
			
 
				-		reason = "unlinked";
			
 
				-		goto no_reconnect;
			
 
				-	}
			
 
				-
			
 
				-	peer->ibp_connecting++;
			
 
				-	peer->ibp_reconnected++;
			
 
				-	write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-	kiblnd_connect_peer(peer);
			
 
				-	return true;
			
 
				-
			
 
				-no_reconnect:
			
 
				-	write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-	CWARN("Abort reconnection of %s: %s\n",
			
 
				-	      libcfs_nid2str(peer->ibp_nid), reason);
			
 
				-	kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED);
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
			
 
				-{
			
 
				-	struct kib_peer *peer;
			
 
				-	struct kib_peer *peer2;
			
 
				-	struct kib_conn *conn;
			
 
				-	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
			
 
				-	unsigned long flags;
			
 
				-	int rc;
			
 
				-	int		   i;
			
 
				-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
			
 
				-
			
 
				-	/*
			
 
				-	 * If I get here, I've committed to send, so I complete the tx with
			
 
				-	 * failure on any problems
			
 
				-	 */
			
 
				-	LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */
			
 
				-	LASSERT(!tx || tx->tx_nwrq > 0);     /* work items have been set up */
			
 
				-
			
 
				-	/*
			
 
				-	 * First time, just use a read lock since I expect to find my peer
			
 
				-	 * connected
			
 
				-	 */
			
 
				-	read_lock_irqsave(g_lock, flags);
			
 
				-
			
 
				-	peer = kiblnd_find_peer_locked(nid);
			
 
				-	if (peer && !list_empty(&peer->ibp_conns)) {
			
 
				-		/* Found a peer with an established connection */
			
 
				-		conn = kiblnd_get_conn_locked(peer);
			
 
				-		kiblnd_conn_addref(conn); /* 1 ref for me... */
			
 
				-
			
 
				-		read_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-		if (tx)
			
 
				-			kiblnd_queue_tx(tx, conn);
			
 
				-		kiblnd_conn_decref(conn); /* ...to here */
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(g_lock);
			
 
				-	/* Re-try with a write lock */
			
 
				-	write_lock(g_lock);
			
 
				-
			
 
				-	peer = kiblnd_find_peer_locked(nid);
			
 
				-	if (peer) {
			
 
				-		if (list_empty(&peer->ibp_conns)) {
			
 
				-			/* found a peer, but it's still connecting... */
			
 
				-			LASSERT(kiblnd_peer_connecting(peer));
			
 
				-			if (tx)
			
 
				-				list_add_tail(&tx->tx_list,
			
 
				-					      &peer->ibp_tx_queue);
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-		} else {
			
 
				-			conn = kiblnd_get_conn_locked(peer);
			
 
				-			kiblnd_conn_addref(conn); /* 1 ref for me... */
			
 
				-
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-			if (tx)
			
 
				-				kiblnd_queue_tx(tx, conn);
			
 
				-			kiblnd_conn_decref(conn); /* ...to here */
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-	/* Allocate a peer ready to add to the peer table and retry */
			
 
				-	rc = kiblnd_create_peer(ni, &peer, nid);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
			
 
				-		if (tx) {
			
 
				-			tx->tx_status = -EHOSTUNREACH;
			
 
				-			tx->tx_waiting = 0;
			
 
				-			kiblnd_tx_done(ni, tx);
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	write_lock_irqsave(g_lock, flags);
			
 
				-
			
 
				-	peer2 = kiblnd_find_peer_locked(nid);
			
 
				-	if (peer2) {
			
 
				-		if (list_empty(&peer2->ibp_conns)) {
			
 
				-			/* found a peer, but it's still connecting... */
			
 
				-			LASSERT(kiblnd_peer_connecting(peer2));
			
 
				-			if (tx)
			
 
				-				list_add_tail(&tx->tx_list,
			
 
				-					      &peer2->ibp_tx_queue);
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-		} else {
			
 
				-			conn = kiblnd_get_conn_locked(peer2);
			
 
				-			kiblnd_conn_addref(conn); /* 1 ref for me... */
			
 
				-
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-			if (tx)
			
 
				-				kiblnd_queue_tx(tx, conn);
			
 
				-			kiblnd_conn_decref(conn); /* ...to here */
			
 
				-		}
			
 
				-
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* Brand new peer */
			
 
				-	LASSERT(!peer->ibp_connecting);
			
 
				-	tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
			
 
				-	peer->ibp_connecting = tunables->lnd_conns_per_peer;
			
 
				-
			
 
				-	/* always called with a ref on ni, which prevents ni being shutdown */
			
 
				-	LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
			
 
				-
			
 
				-	if (tx)
			
 
				-		list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
			
 
				-
			
 
				-	kiblnd_peer_addref(peer);
			
 
				-	list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
			
 
				-
			
 
				-	write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-	for (i = 0; i < tunables->lnd_conns_per_peer; i++)
			
 
				-		kiblnd_connect_peer(peer);
			
 
				-	kiblnd_peer_decref(peer);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
			
 
				-{
			
 
				-	struct lnet_hdr *hdr = &lntmsg->msg_hdr;
			
 
				-	int type = lntmsg->msg_type;
			
 
				-	struct lnet_process_id target = lntmsg->msg_target;
			
 
				-	int target_is_router = lntmsg->msg_target_is_router;
			
 
				-	int routing = lntmsg->msg_routing;
			
 
				-	unsigned int payload_niov = lntmsg->msg_niov;
			
 
				-	struct kvec *payload_iov = lntmsg->msg_iov;
			
 
				-	struct bio_vec *payload_kiov = lntmsg->msg_kiov;
			
 
				-	unsigned int payload_offset = lntmsg->msg_offset;
			
 
				-	unsigned int payload_nob = lntmsg->msg_len;
			
 
				-	struct iov_iter from;
			
 
				-	struct kib_msg *ibmsg;
			
 
				-	struct kib_rdma_desc  *rd;
			
 
				-	struct kib_tx *tx;
			
 
				-	int nob;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* NB 'private' is different depending on what we're sending.... */
			
 
				-
			
 
				-	CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
			
 
				-	       payload_nob, payload_niov, libcfs_id2str(target));
			
 
				-
			
 
				-	LASSERT(!payload_nob || payload_niov > 0);
			
 
				-	LASSERT(payload_niov <= LNET_MAX_IOV);
			
 
				-
			
 
				-	/* Thread context */
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	/* payload is either all vaddrs or all pages */
			
 
				-	LASSERT(!(payload_kiov && payload_iov));
			
 
				-
			
 
				-	if (payload_kiov)
			
 
				-		iov_iter_bvec(&from, ITER_BVEC | WRITE,
			
 
				-			      payload_kiov, payload_niov,
			
 
				-			      payload_nob + payload_offset);
			
 
				-	else
			
 
				-		iov_iter_kvec(&from, ITER_KVEC | WRITE,
			
 
				-			      payload_iov, payload_niov,
			
 
				-			      payload_nob + payload_offset);
			
 
				-
			
 
				-	iov_iter_advance(&from, payload_offset);
			
 
				-
			
 
				-	switch (type) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-		return -EIO;
			
 
				-
			
 
				-	case LNET_MSG_ACK:
			
 
				-		LASSERT(!payload_nob);
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_MSG_GET:
			
 
				-		if (routing || target_is_router)
			
 
				-			break;		  /* send IMMEDIATE */
			
 
				-
			
 
				-		/* is the REPLY message too small for RDMA? */
			
 
				-		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
			
 
				-		if (nob <= IBLND_MSG_SIZE)
			
 
				-			break;		  /* send IMMEDIATE */
			
 
				-
			
 
				-		tx = kiblnd_get_idle_tx(ni, target.nid);
			
 
				-		if (!tx) {
			
 
				-			CERROR("Can't allocate txd for GET to %s\n",
			
 
				-			       libcfs_nid2str(target.nid));
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-
			
 
				-		ibmsg = tx->tx_msg;
			
 
				-		rd = &ibmsg->ibm_u.get.ibgm_rd;
			
 
				-		if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV))
			
 
				-			rc = kiblnd_setup_rd_iov(ni, tx, rd,
			
 
				-						 lntmsg->msg_md->md_niov,
			
 
				-						 lntmsg->msg_md->md_iov.iov,
			
 
				-						 0, lntmsg->msg_md->md_length);
			
 
				-		else
			
 
				-			rc = kiblnd_setup_rd_kiov(ni, tx, rd,
			
 
				-						  lntmsg->msg_md->md_niov,
			
 
				-						  lntmsg->msg_md->md_iov.kiov,
			
 
				-						  0, lntmsg->msg_md->md_length);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't setup GET sink for %s: %d\n",
			
 
				-			       libcfs_nid2str(target.nid), rc);
			
 
				-			kiblnd_tx_done(ni, tx);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]);
			
 
				-		ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
			
 
				-		ibmsg->ibm_u.get.ibgm_hdr = *hdr;
			
 
				-
			
 
				-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
			
 
				-
			
 
				-		tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
			
 
				-		if (!tx->tx_lntmsg[1]) {
			
 
				-			CERROR("Can't create reply for GET -> %s\n",
			
 
				-			       libcfs_nid2str(target.nid));
			
 
				-			kiblnd_tx_done(ni, tx);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg[0,1] on completion */
			
 
				-		tx->tx_waiting = 1;	     /* waiting for GET_DONE */
			
 
				-		kiblnd_launch_tx(ni, tx, target.nid);
			
 
				-		return 0;
			
 
				-
			
 
				-	case LNET_MSG_REPLY:
			
 
				-	case LNET_MSG_PUT:
			
 
				-		/* Is the payload small enough not to need RDMA? */
			
 
				-		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
			
 
				-		if (nob <= IBLND_MSG_SIZE)
			
 
				-			break;		  /* send IMMEDIATE */
			
 
				-
			
 
				-		tx = kiblnd_get_idle_tx(ni, target.nid);
			
 
				-		if (!tx) {
			
 
				-			CERROR("Can't allocate %s txd for %s\n",
			
 
				-			       type == LNET_MSG_PUT ? "PUT" : "REPLY",
			
 
				-			       libcfs_nid2str(target.nid));
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-
			
 
				-		if (!payload_kiov)
			
 
				-			rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
			
 
				-						 payload_niov, payload_iov,
			
 
				-						 payload_offset, payload_nob);
			
 
				-		else
			
 
				-			rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
			
 
				-						  payload_niov, payload_kiov,
			
 
				-						  payload_offset, payload_nob);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't setup PUT src for %s: %d\n",
			
 
				-			       libcfs_nid2str(target.nid), rc);
			
 
				-			kiblnd_tx_done(ni, tx);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		ibmsg = tx->tx_msg;
			
 
				-		ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
			
 
				-		ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
			
 
				-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg));
			
 
				-
			
 
				-		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
			
 
				-		tx->tx_waiting = 1;	     /* waiting for PUT_{ACK,NAK} */
			
 
				-		kiblnd_launch_tx(ni, tx, target.nid);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* send IMMEDIATE */
			
 
				-
			
 
				-	LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob])
			
 
				-		 <= IBLND_MSG_SIZE);
			
 
				-
			
 
				-	tx = kiblnd_get_idle_tx(ni, target.nid);
			
 
				-	if (!tx) {
			
 
				-		CERROR("Can't send %d to %s: tx descs exhausted\n",
			
 
				-		       type, libcfs_nid2str(target.nid));
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	ibmsg = tx->tx_msg;
			
 
				-	ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
			
 
				-
			
 
				-	rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
			
 
				-			    &from);
			
 
				-	if (rc != payload_nob) {
			
 
				-		kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
			
 
				-		return -EFAULT;
			
 
				-	}
			
 
				-
			
 
				-	nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
			
 
				-	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
			
 
				-
			
 
				-	tx->tx_lntmsg[0] = lntmsg;	      /* finalise lntmsg on completion */
			
 
				-	kiblnd_launch_tx(ni, tx, target.nid);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg)
			
 
				-{
			
 
				-	struct lnet_process_id target = lntmsg->msg_target;
			
 
				-	unsigned int niov = lntmsg->msg_niov;
			
 
				-	struct kvec *iov = lntmsg->msg_iov;
			
 
				-	struct bio_vec *kiov = lntmsg->msg_kiov;
			
 
				-	unsigned int offset = lntmsg->msg_offset;
			
 
				-	unsigned int nob = lntmsg->msg_len;
			
 
				-	struct kib_tx *tx;
			
 
				-	int rc;
			
 
				-
			
 
				-	tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
			
 
				-	if (!tx) {
			
 
				-		CERROR("Can't get tx for REPLY to %s\n",
			
 
				-		       libcfs_nid2str(target.nid));
			
 
				-		goto failed_0;
			
 
				-	}
			
 
				-
			
 
				-	if (!nob)
			
 
				-		rc = 0;
			
 
				-	else if (!kiov)
			
 
				-		rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
			
 
				-					 niov, iov, offset, nob);
			
 
				-	else
			
 
				-		rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
			
 
				-					  niov, kiov, offset, nob);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't setup GET src for %s: %d\n",
			
 
				-		       libcfs_nid2str(target.nid), rc);
			
 
				-		goto failed_1;
			
 
				-	}
			
 
				-
			
 
				-	rc = kiblnd_init_rdma(rx->rx_conn, tx,
			
 
				-			      IBLND_MSG_GET_DONE, nob,
			
 
				-			      &rx->rx_msg->ibm_u.get.ibgm_rd,
			
 
				-			      rx->rx_msg->ibm_u.get.ibgm_cookie);
			
 
				-	if (rc < 0) {
			
 
				-		CERROR("Can't setup rdma for GET from %s: %d\n",
			
 
				-		       libcfs_nid2str(target.nid), rc);
			
 
				-		goto failed_1;
			
 
				-	}
			
 
				-
			
 
				-	if (!nob) {
			
 
				-		/* No RDMA: local completion may happen now! */
			
 
				-		lnet_finalize(ni, lntmsg, 0);
			
 
				-	} else {
			
 
				-		/* RDMA: lnet_finalize(lntmsg) when it completes */
			
 
				-		tx->tx_lntmsg[0] = lntmsg;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_queue_tx(tx, rx->rx_conn);
			
 
				-	return;
			
 
				-
			
 
				- failed_1:
			
 
				-	kiblnd_tx_done(ni, tx);
			
 
				- failed_0:
			
 
				-	lnet_finalize(ni, lntmsg, -EIO);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
			
 
				-	    int delayed, struct iov_iter *to, unsigned int rlen)
			
 
				-{
			
 
				-	struct kib_rx *rx = private;
			
 
				-	struct kib_msg *rxmsg = rx->rx_msg;
			
 
				-	struct kib_conn *conn = rx->rx_conn;
			
 
				-	struct kib_tx *tx;
			
 
				-	int nob;
			
 
				-	int post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	LASSERT(iov_iter_count(to) <= rlen);
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	/* Either all pages or all vaddrs */
			
 
				-
			
 
				-	switch (rxmsg->ibm_type) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case IBLND_MSG_IMMEDIATE:
			
 
				-		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]);
			
 
				-		if (nob > rx->rx_nob) {
			
 
				-			CERROR("Immediate message from %s too big: %d(%d)\n",
			
 
				-			       libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
			
 
				-			       nob, rx->rx_nob);
			
 
				-			rc = -EPROTO;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen,
			
 
				-				  to);
			
 
				-		if (rc != rlen) {
			
 
				-			rc = -EFAULT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		rc = 0;
			
 
				-		lnet_finalize(ni, lntmsg, 0);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_MSG_PUT_REQ: {
			
 
				-		struct kib_msg	*txmsg;
			
 
				-		struct kib_rdma_desc *rd;
			
 
				-
			
 
				-		if (!iov_iter_count(to)) {
			
 
				-			lnet_finalize(ni, lntmsg, 0);
			
 
				-			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
			
 
				-					       rxmsg->ibm_u.putreq.ibprm_cookie);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
			
 
				-		if (!tx) {
			
 
				-			CERROR("Can't allocate tx for %s\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-			/* Not replying will break the connection */
			
 
				-			rc = -ENOMEM;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		txmsg = tx->tx_msg;
			
 
				-		rd = &txmsg->ibm_u.putack.ibpam_rd;
			
 
				-		if (!(to->type & ITER_BVEC))
			
 
				-			rc = kiblnd_setup_rd_iov(ni, tx, rd,
			
 
				-						 to->nr_segs, to->kvec,
			
 
				-						 to->iov_offset,
			
 
				-						 iov_iter_count(to));
			
 
				-		else
			
 
				-			rc = kiblnd_setup_rd_kiov(ni, tx, rd,
			
 
				-						  to->nr_segs, to->bvec,
			
 
				-						  to->iov_offset,
			
 
				-						  iov_iter_count(to));
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't setup PUT sink for %s: %d\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
			
 
				-			kiblnd_tx_done(ni, tx);
			
 
				-			/* tell peer it's over */
			
 
				-			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
			
 
				-					       rxmsg->ibm_u.putreq.ibprm_cookie);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]);
			
 
				-		txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
			
 
				-		txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
			
 
				-
			
 
				-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
			
 
				-
			
 
				-		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
			
 
				-		tx->tx_waiting = 1;	     /* waiting for PUT_DONE */
			
 
				-		kiblnd_queue_tx(tx, conn);
			
 
				-
			
 
				-		/* reposted buffer reserved for PUT_DONE */
			
 
				-		post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				-		break;
			
 
				-		}
			
 
				-
			
 
				-	case IBLND_MSG_GET_REQ:
			
 
				-		if (lntmsg) {
			
 
				-			/* Optimized GET; RDMA lntmsg's payload */
			
 
				-			kiblnd_reply(ni, rx, lntmsg);
			
 
				-		} else {
			
 
				-			/* GET didn't match anything */
			
 
				-			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
			
 
				-					       -ENODATA,
			
 
				-					       rxmsg->ibm_u.get.ibgm_cookie);
			
 
				-		}
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_post_rx(rx, post_credit);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
			
 
				-{
			
 
				-	struct task_struct *task = kthread_run(fn, arg, "%s", name);
			
 
				-
			
 
				-	if (IS_ERR(task))
			
 
				-		return PTR_ERR(task);
			
 
				-
			
 
				-	atomic_inc(&kiblnd_data.kib_nthreads);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_thread_fini(void)
			
 
				-{
			
 
				-	atomic_dec(&kiblnd_data.kib_nthreads);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_peer_alive(struct kib_peer *peer)
			
 
				-{
			
 
				-	/* This is racy, but everyone's only writing jiffies */
			
 
				-	peer->ibp_last_alive = jiffies;
			
 
				-	mb();
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_peer_notify(struct kib_peer *peer)
			
 
				-{
			
 
				-	int error = 0;
			
 
				-	unsigned long last_alive = 0;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	if (kiblnd_peer_idle(peer) && peer->ibp_error) {
			
 
				-		error = peer->ibp_error;
			
 
				-		peer->ibp_error = 0;
			
 
				-
			
 
				-		last_alive = peer->ibp_last_alive;
			
 
				-	}
			
 
				-
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	if (error)
			
 
				-		lnet_notify(peer->ibp_ni,
			
 
				-			    peer->ibp_nid, 0, last_alive);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_close_conn_locked(struct kib_conn *conn, int error)
			
 
				-{
			
 
				-	/*
			
 
				-	 * This just does the immediate housekeeping. 'error' is zero for a
			
 
				-	 * normal shutdown which can happen only after the connection has been
			
 
				-	 * established.  If the connection is established, schedule the
			
 
				-	 * connection to be finished off by the connd. Otherwise the connd is
			
 
				-	 * already dealing with it (either to set it up or tear it down).
			
 
				-	 * Caller holds kib_global_lock exclusively in irq context
			
 
				-	 */
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-	struct kib_dev *dev;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				-
			
 
				-	if (error && !conn->ibc_comms_error)
			
 
				-		conn->ibc_comms_error = error;
			
 
				-
			
 
				-	if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
			
 
				-		return; /* already being handled  */
			
 
				-
			
 
				-	if (!error &&
			
 
				-	    list_empty(&conn->ibc_tx_noops) &&
			
 
				-	    list_empty(&conn->ibc_tx_queue) &&
			
 
				-	    list_empty(&conn->ibc_tx_queue_rsrvd) &&
			
 
				-	    list_empty(&conn->ibc_tx_queue_nocred) &&
			
 
				-	    list_empty(&conn->ibc_active_txs)) {
			
 
				-		CDEBUG(D_NET, "closing conn to %s\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid));
			
 
				-	} else {
			
 
				-		CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
			
 
				-			libcfs_nid2str(peer->ibp_nid), error,
			
 
				-			list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
			
 
				-			list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
			
 
				-			list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
			
 
				-			list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
			
 
				-			list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
			
 
				-	}
			
 
				-
			
 
				-	dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
			
 
				-	if (peer->ibp_next_conn == conn)
			
 
				-		/* clear next_conn so it won't be used */
			
 
				-		peer->ibp_next_conn = NULL;
			
 
				-	list_del(&conn->ibc_list);
			
 
				-	/* connd (see below) takes over ibc_list's ref */
			
 
				-
			
 
				-	if (list_empty(&peer->ibp_conns) &&    /* no more conns */
			
 
				-	    kiblnd_peer_active(peer)) {	 /* still in peer table */
			
 
				-		kiblnd_unlink_peer_locked(peer);
			
 
				-
			
 
				-		/* set/clear error on last conn */
			
 
				-		peer->ibp_error = conn->ibc_comms_error;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
			
 
				-
			
 
				-	if (error &&
			
 
				-	    kiblnd_dev_can_failover(dev)) {
			
 
				-		list_add_tail(&dev->ibd_fail_list,
			
 
				-			      &kiblnd_data.kib_failed_devs);
			
 
				-		wake_up(&kiblnd_data.kib_failover_waitq);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				-
			
 
				-	list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns);
			
 
				-	wake_up(&kiblnd_data.kib_connd_waitq);
			
 
				-
			
 
				-	spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_close_conn(struct kib_conn *conn, int error)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	kiblnd_close_conn_locked(conn, error);
			
 
				-
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_handle_early_rxs(struct kib_conn *conn)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-	struct kib_rx *rx;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-	while (!list_empty(&conn->ibc_early_rxs)) {
			
 
				-		rx = list_entry(conn->ibc_early_rxs.next,
			
 
				-				struct kib_rx, rx_list);
			
 
				-		list_del(&rx->rx_list);
			
 
				-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-		kiblnd_handle_rx(rx);
			
 
				-
			
 
				-		write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-	}
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct list_head *tmp;
			
 
				-	struct list_head *nxt;
			
 
				-	struct kib_tx *tx;
			
 
				-
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-
			
 
				-	list_for_each_safe(tmp, nxt, txs) {
			
 
				-		tx = list_entry(tmp, struct kib_tx, tx_list);
			
 
				-
			
 
				-		if (txs == &conn->ibc_active_txs) {
			
 
				-			LASSERT(!tx->tx_queued);
			
 
				-			LASSERT(tx->tx_waiting || tx->tx_sending);
			
 
				-		} else {
			
 
				-			LASSERT(tx->tx_queued);
			
 
				-		}
			
 
				-
			
 
				-		tx->tx_status = -ECONNABORTED;
			
 
				-		tx->tx_waiting = 0;
			
 
				-
			
 
				-		if (!tx->tx_sending) {
			
 
				-			tx->tx_queued = 0;
			
 
				-			list_del(&tx->tx_list);
			
 
				-			list_add(&tx->tx_list, &zombies);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-	kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_finalise_conn(struct kib_conn *conn)
			
 
				-{
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(conn->ibc_state > IBLND_CONN_INIT);
			
 
				-
			
 
				-	kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
			
 
				-
			
 
				-	/*
			
 
				-	 * abort_receives moves QP state to IB_QPS_ERR.  This is only required
			
 
				-	 * for connections that didn't get as far as being connected, because
			
 
				-	 * rdma_disconnect() does this for free.
			
 
				-	 */
			
 
				-	kiblnd_abort_receives(conn);
			
 
				-
			
 
				-	/*
			
 
				-	 * Complete all tx descs not waiting for sends to complete.
			
 
				-	 * NB we should be safe from RDMA now that the QP has changed state
			
 
				-	 */
			
 
				-	kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
			
 
				-	kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
			
 
				-	kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
			
 
				-	kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
			
 
				-	kiblnd_abort_txs(conn, &conn->ibc_active_txs);
			
 
				-
			
 
				-	kiblnd_handle_early_rxs(conn);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(error);
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	if (active) {
			
 
				-		LASSERT(peer->ibp_connecting > 0);
			
 
				-		peer->ibp_connecting--;
			
 
				-	} else {
			
 
				-		LASSERT(peer->ibp_accepting > 0);
			
 
				-		peer->ibp_accepting--;
			
 
				-	}
			
 
				-
			
 
				-	if (kiblnd_peer_connecting(peer)) {
			
 
				-		/* another connection attempt under way... */
			
 
				-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
			
 
				-					flags);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	peer->ibp_reconnected = 0;
			
 
				-	if (list_empty(&peer->ibp_conns)) {
			
 
				-		/* Take peer's blocked transmits to complete with error */
			
 
				-		list_add(&zombies, &peer->ibp_tx_queue);
			
 
				-		list_del_init(&peer->ibp_tx_queue);
			
 
				-
			
 
				-		if (kiblnd_peer_active(peer))
			
 
				-			kiblnd_unlink_peer_locked(peer);
			
 
				-
			
 
				-		peer->ibp_error = error;
			
 
				-	} else {
			
 
				-		/* Can't have blocked transmits if there are connections */
			
 
				-		LASSERT(list_empty(&peer->ibp_tx_queue));
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	kiblnd_peer_notify(peer);
			
 
				-
			
 
				-	if (list_empty(&zombies))
			
 
				-		return;
			
 
				-
			
 
				-	CNETERR("Deleting messages for %s: connection failed\n",
			
 
				-		libcfs_nid2str(peer->ibp_nid));
			
 
				-
			
 
				-	kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_connreq_done(struct kib_conn *conn, int status)
			
 
				-{
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-	struct kib_tx *tx;
			
 
				-	struct kib_tx *tmp;
			
 
				-	struct list_head txs;
			
 
				-	unsigned long flags;
			
 
				-	int active;
			
 
				-
			
 
				-	active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s: active(%d), version(%x), status(%d)\n",
			
 
				-	       libcfs_nid2str(peer->ibp_nid), active,
			
 
				-	       conn->ibc_version, status);
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
			
 
				-		 peer->ibp_connecting > 0) ||
			
 
				-		 (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
			
 
				-		 peer->ibp_accepting > 0));
			
 
				-
			
 
				-	kfree(conn->ibc_connvars);
			
 
				-	conn->ibc_connvars = NULL;
			
 
				-
			
 
				-	if (status) {
			
 
				-		/* failed to establish connection */
			
 
				-		kiblnd_peer_connect_failed(peer, active, status);
			
 
				-		kiblnd_finalise_conn(conn);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* connection established */
			
 
				-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	conn->ibc_last_send = jiffies;
			
 
				-	kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
			
 
				-	kiblnd_peer_alive(peer);
			
 
				-
			
 
				-	/*
			
 
				-	 * Add conn to peer's list and nuke any dangling conns from a different
			
 
				-	 * peer instance...
			
 
				-	 */
			
 
				-	kiblnd_conn_addref(conn);	       /* +1 ref for ibc_list */
			
 
				-	list_add(&conn->ibc_list, &peer->ibp_conns);
			
 
				-	peer->ibp_reconnected = 0;
			
 
				-	if (active)
			
 
				-		peer->ibp_connecting--;
			
 
				-	else
			
 
				-		peer->ibp_accepting--;
			
 
				-
			
 
				-	if (!peer->ibp_version) {
			
 
				-		peer->ibp_version     = conn->ibc_version;
			
 
				-		peer->ibp_incarnation = conn->ibc_incarnation;
			
 
				-	}
			
 
				-
			
 
				-	if (peer->ibp_version     != conn->ibc_version ||
			
 
				-	    peer->ibp_incarnation != conn->ibc_incarnation) {
			
 
				-		kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
			
 
				-						conn->ibc_incarnation);
			
 
				-		peer->ibp_version     = conn->ibc_version;
			
 
				-		peer->ibp_incarnation = conn->ibc_incarnation;
			
 
				-	}
			
 
				-
			
 
				-	/* grab pending txs while I have the lock */
			
 
				-	list_add(&txs, &peer->ibp_tx_queue);
			
 
				-	list_del_init(&peer->ibp_tx_queue);
			
 
				-
			
 
				-	if (!kiblnd_peer_active(peer) ||	/* peer has been deleted */
			
 
				-	    conn->ibc_comms_error) {       /* error has happened already */
			
 
				-		struct lnet_ni *ni = peer->ibp_ni;
			
 
				-
			
 
				-		/* start to shut down connection */
			
 
				-		kiblnd_close_conn_locked(conn, -ECONNABORTED);
			
 
				-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-		kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
			
 
				-
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * +1 ref for myself, this connection is visible to other threads
			
 
				-	 * now, refcount of peer:ibp_conns can be released by connection
			
 
				-	 * close from either a different thread, or the calling of
			
 
				-	 * kiblnd_check_sends_locked() below. See bz21911 for details.
			
 
				-	 */
			
 
				-	kiblnd_conn_addref(conn);
			
 
				-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	/* Schedule blocked txs
			
 
				-	 * Note: if we are running with conns_per_peer > 1, these blocked
			
 
				-	 * txs will all get scheduled to the first connection which gets
			
 
				-	 * scheduled.  We won't be using round robin on this first batch.
			
 
				-	 */
			
 
				-	spin_lock(&conn->ibc_lock);
			
 
				-	list_for_each_entry_safe(tx, tmp, &txs, tx_list) {
			
 
				-		list_del(&tx->tx_list);
			
 
				-
			
 
				-		kiblnd_queue_tx_locked(tx, conn);
			
 
				-	}
			
 
				-	kiblnd_check_sends_locked(conn);
			
 
				-	spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-	/* schedule blocked rxs */
			
 
				-	kiblnd_handle_early_rxs(conn);
			
 
				-
			
 
				-	kiblnd_conn_decref(conn);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = rdma_reject(cmid, rej, sizeof(*rej));
			
 
				-
			
 
				-	if (rc)
			
 
				-		CWARN("Error %d sending reject\n", rc);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
			
 
				-{
			
 
				-	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
			
 
				-	struct kib_msg *reqmsg = priv;
			
 
				-	struct kib_msg *ackmsg;
			
 
				-	struct kib_dev *ibdev;
			
 
				-	struct kib_peer *peer;
			
 
				-	struct kib_peer *peer2;
			
 
				-	struct kib_conn *conn;
			
 
				-	struct lnet_ni *ni  = NULL;
			
 
				-	struct kib_net *net = NULL;
			
 
				-	lnet_nid_t nid;
			
 
				-	struct rdma_conn_param cp;
			
 
				-	struct kib_rej rej;
			
 
				-	int version = IBLND_MSG_VERSION;
			
 
				-	unsigned long flags;
			
 
				-	int max_frags;
			
 
				-	int rc;
			
 
				-	struct sockaddr_in *peer_addr;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	/* cmid inherits 'context' from the corresponding listener id */
			
 
				-	ibdev = (struct kib_dev *)cmid->context;
			
 
				-	LASSERT(ibdev);
			
 
				-
			
 
				-	memset(&rej, 0, sizeof(rej));
			
 
				-	rej.ibr_magic = IBLND_MSG_MAGIC;
			
 
				-	rej.ibr_why = IBLND_REJECT_FATAL;
			
 
				-	rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
			
 
				-
			
 
				-	peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr;
			
 
				-	if (*kiblnd_tunables.kib_require_priv_port &&
			
 
				-	    ntohs(peer_addr->sin_port) >= PROT_SOCK) {
			
 
				-		__u32 ip = ntohl(peer_addr->sin_addr.s_addr);
			
 
				-
			
 
				-		CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
			
 
				-		       &ip, ntohs(peer_addr->sin_port));
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (priv_nob < offsetof(struct kib_msg, ibm_type)) {
			
 
				-		CERROR("Short connection request\n");
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Future protocol version compatibility support!  If the
			
 
				-	 * o2iblnd-specific protocol changes, or when LNET unifies
			
 
				-	 * protocols over all LNDs, the initial connection will
			
 
				-	 * negotiate a protocol version.  I trap this here to avoid
			
 
				-	 * console errors; the reject tells the peer which protocol I
			
 
				-	 * speak.
			
 
				-	 */
			
 
				-	if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
			
 
				-	    reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
			
 
				-		goto failed;
			
 
				-	if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
			
 
				-	    reqmsg->ibm_version != IBLND_MSG_VERSION &&
			
 
				-	    reqmsg->ibm_version != IBLND_MSG_VERSION_1)
			
 
				-		goto failed;
			
 
				-	if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
			
 
				-	    reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
			
 
				-	    reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
			
 
				-		goto failed;
			
 
				-
			
 
				-	rc = kiblnd_unpack_msg(reqmsg, priv_nob);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't parse connection request: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	nid = reqmsg->ibm_srcnid;
			
 
				-	ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
			
 
				-
			
 
				-	if (ni) {
			
 
				-		net = (struct kib_net *)ni->ni_data;
			
 
				-		rej.ibr_incarnation = net->ibn_incarnation;
			
 
				-	}
			
 
				-
			
 
				-	if (!ni ||			 /* no matching net */
			
 
				-	    ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
			
 
				-	    net->ibn_dev != ibdev) {	      /* wrong device */
			
 
				-		CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
			
 
				-		       libcfs_nid2str(nid),
			
 
				-		       !ni ? "NA" : libcfs_nid2str(ni->ni_nid),
			
 
				-		       ibdev->ibd_ifname, ibdev->ibd_nnets,
			
 
				-		       &ibdev->ibd_ifip,
			
 
				-		       libcfs_nid2str(reqmsg->ibm_dstnid));
			
 
				-
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-       /* check time stamp as soon as possible */
			
 
				-	if (reqmsg->ibm_dststamp &&
			
 
				-	    reqmsg->ibm_dststamp != net->ibn_incarnation) {
			
 
				-		CWARN("Stale connection request\n");
			
 
				-		rej.ibr_why = IBLND_REJECT_CONN_STALE;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* I can accept peer's version */
			
 
				-	version = reqmsg->ibm_version;
			
 
				-
			
 
				-	if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
			
 
				-		CERROR("Unexpected connreq msg type: %x from %s\n",
			
 
				-		       reqmsg->ibm_type, libcfs_nid2str(nid));
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
			
 
				-	    kiblnd_msg_queue_size(version, ni)) {
			
 
				-		CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
			
 
				-		       libcfs_nid2str(nid),
			
 
				-		       reqmsg->ibm_u.connparams.ibcp_queue_depth,
			
 
				-		       kiblnd_msg_queue_size(version, ni));
			
 
				-
			
 
				-		if (version == IBLND_MSG_VERSION)
			
 
				-			rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
			
 
				-
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
			
 
				-	if (max_frags > kiblnd_rdma_frags(version, ni)) {
			
 
				-		CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
			
 
				-		      libcfs_nid2str(nid), version, max_frags,
			
 
				-		      kiblnd_rdma_frags(version, ni));
			
 
				-
			
 
				-		if (version >= IBLND_MSG_VERSION)
			
 
				-			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
			
 
				-
			
 
				-		goto failed;
			
 
				-	} else if (max_frags < kiblnd_rdma_frags(version, ni) &&
			
 
				-		   !net->ibn_fmr_ps) {
			
 
				-		CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
			
 
				-		      libcfs_nid2str(nid), version, max_frags,
			
 
				-		      kiblnd_rdma_frags(version, ni));
			
 
				-
			
 
				-		if (version == IBLND_MSG_VERSION)
			
 
				-			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
			
 
				-
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
			
 
				-		CERROR("Can't accept %s: message size %d too big (%d max)\n",
			
 
				-		       libcfs_nid2str(nid),
			
 
				-		       reqmsg->ibm_u.connparams.ibcp_max_msg_size,
			
 
				-		       IBLND_MSG_SIZE);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* assume 'nid' is a new peer; create  */
			
 
				-	rc = kiblnd_create_peer(ni, &peer, nid);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
			
 
				-		rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* We have validated the peer's parameters so use those */
			
 
				-	peer->ibp_max_frags = max_frags;
			
 
				-	peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
			
 
				-
			
 
				-	write_lock_irqsave(g_lock, flags);
			
 
				-
			
 
				-	peer2 = kiblnd_find_peer_locked(nid);
			
 
				-	if (peer2) {
			
 
				-		if (!peer2->ibp_version) {
			
 
				-			peer2->ibp_version     = version;
			
 
				-			peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
			
 
				-		}
			
 
				-
			
 
				-		/* not the guy I've talked with */
			
 
				-		if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
			
 
				-		    peer2->ibp_version     != version) {
			
 
				-			kiblnd_close_peer_conns_locked(peer2, -ESTALE);
			
 
				-
			
 
				-			if (kiblnd_peer_active(peer2)) {
			
 
				-				peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
			
 
				-				peer2->ibp_version = version;
			
 
				-			}
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-			CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n",
			
 
				-			      libcfs_nid2str(nid), peer2->ibp_version, version,
			
 
				-			      peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
			
 
				-
			
 
				-			kiblnd_peer_decref(peer);
			
 
				-			rej.ibr_why = IBLND_REJECT_CONN_STALE;
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Tie-break connection race in favour of the higher NID.
			
 
				-		 * If we keep running into a race condition multiple times,
			
 
				-		 * we have to assume that the connection attempt with the
			
 
				-		 * higher NID is stuck in a connecting state and will never
			
 
				-		 * recover.  As such, we pass through this if-block and let
			
 
				-		 * the lower NID connection win so we can move forward.
			
 
				-		 */
			
 
				-		if (peer2->ibp_connecting &&
			
 
				-		    nid < ni->ni_nid && peer2->ibp_races <
			
 
				-		    MAX_CONN_RACES_BEFORE_ABORT) {
			
 
				-			peer2->ibp_races++;
			
 
				-			write_unlock_irqrestore(g_lock, flags);
			
 
				-
			
 
				-			CDEBUG(D_NET, "Conn race %s\n",
			
 
				-			       libcfs_nid2str(peer2->ibp_nid));
			
 
				-
			
 
				-			kiblnd_peer_decref(peer);
			
 
				-			rej.ibr_why = IBLND_REJECT_CONN_RACE;
			
 
				-			goto failed;
			
 
				-		}
			
 
				-		if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
			
 
				-			CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
			
 
				-				libcfs_nid2str(peer2->ibp_nid),
			
 
				-				MAX_CONN_RACES_BEFORE_ABORT);
			
 
				-		/**
			
 
				-		 * passive connection is allowed even this peer is waiting for
			
 
				-		 * reconnection.
			
 
				-		 */
			
 
				-		peer2->ibp_reconnecting = 0;
			
 
				-		peer2->ibp_races = 0;
			
 
				-		peer2->ibp_accepting++;
			
 
				-		kiblnd_peer_addref(peer2);
			
 
				-
			
 
				-		/**
			
 
				-		 * Race with kiblnd_launch_tx (active connect) to create peer
			
 
				-		 * so copy validated parameters since we now know what the
			
 
				-		 * peer's limits are
			
 
				-		 */
			
 
				-		peer2->ibp_max_frags = peer->ibp_max_frags;
			
 
				-		peer2->ibp_queue_depth = peer->ibp_queue_depth;
			
 
				-
			
 
				-		write_unlock_irqrestore(g_lock, flags);
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		peer = peer2;
			
 
				-	} else {
			
 
				-		/* Brand new peer */
			
 
				-		LASSERT(!peer->ibp_accepting);
			
 
				-		LASSERT(!peer->ibp_version &&
			
 
				-			!peer->ibp_incarnation);
			
 
				-
			
 
				-		peer->ibp_accepting   = 1;
			
 
				-		peer->ibp_version     = version;
			
 
				-		peer->ibp_incarnation = reqmsg->ibm_srcstamp;
			
 
				-
			
 
				-		/* I have a ref on ni that prevents it being shutdown */
			
 
				-		LASSERT(!net->ibn_shutdown);
			
 
				-
			
 
				-		kiblnd_peer_addref(peer);
			
 
				-		list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
			
 
				-
			
 
				-		write_unlock_irqrestore(g_lock, flags);
			
 
				-	}
			
 
				-
			
 
				-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT,
			
 
				-				  version);
			
 
				-	if (!conn) {
			
 
				-		kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * conn now "owns" cmid, so I return success from here on to ensure the
			
 
				-	 * CM callback doesn't destroy cmid.
			
 
				-	 */
			
 
				-	conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
			
 
				-	conn->ibc_credits          = conn->ibc_queue_depth;
			
 
				-	conn->ibc_reserved_credits = conn->ibc_queue_depth;
			
 
				-	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
			
 
				-		IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
			
 
				-
			
 
				-	ackmsg = &conn->ibc_connvars->cv_msg;
			
 
				-	memset(ackmsg, 0, sizeof(*ackmsg));
			
 
				-
			
 
				-	kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
			
 
				-			sizeof(ackmsg->ibm_u.connparams));
			
 
				-	ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
			
 
				-	ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
			
 
				-	ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
			
 
				-
			
 
				-	kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
			
 
				-
			
 
				-	memset(&cp, 0, sizeof(cp));
			
 
				-	cp.private_data	= ackmsg;
			
 
				-	cp.private_data_len = ackmsg->ibm_nob;
			
 
				-	cp.responder_resources = 0;	     /* No atomic ops or RDMA reads */
			
 
				-	cp.initiator_depth = 0;
			
 
				-	cp.flow_control	= 1;
			
 
				-	cp.retry_count = *kiblnd_tunables.kib_retry_count;
			
 
				-	cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
			
 
				-
			
 
				-	rc = rdma_accept(cmid, &cp);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
			
 
				-		rej.ibr_version = version;
			
 
				-		rej.ibr_why     = IBLND_REJECT_FATAL;
			
 
				-
			
 
				-		kiblnd_reject(cmid, &rej);
			
 
				-		kiblnd_connreq_done(conn, rc);
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-	}
			
 
				-
			
 
				-	lnet_ni_decref(ni);
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	if (ni) {
			
 
				-		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
			
 
				-		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
			
 
				-		lnet_ni_decref(ni);
			
 
				-	}
			
 
				-
			
 
				-	rej.ibr_version             = version;
			
 
				-	kiblnd_reject(cmid, &rej);
			
 
				-
			
 
				-	return -ECONNREFUSED;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_check_reconnect(struct kib_conn *conn, int version,
			
 
				-		       __u64 incarnation, int why, struct kib_connparams *cp)
			
 
				-{
			
 
				-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-	char *reason;
			
 
				-	int msg_size = IBLND_MSG_SIZE;
			
 
				-	int frag_num = -1;
			
 
				-	int queue_dep = -1;
			
 
				-	bool reconnect;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
			
 
				-	LASSERT(peer->ibp_connecting > 0);     /* 'conn' at least */
			
 
				-
			
 
				-	if (cp) {
			
 
				-		msg_size = cp->ibcp_max_msg_size;
			
 
				-		frag_num	= cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
			
 
				-		queue_dep = cp->ibcp_queue_depth;
			
 
				-	}
			
 
				-
			
 
				-	write_lock_irqsave(glock, flags);
			
 
				-	/**
			
 
				-	 * retry connection if it's still needed and no other connection
			
 
				-	 * attempts (active or passive) are in progress
			
 
				-	 * NB: reconnect is still needed even when ibp_tx_queue is
			
 
				-	 * empty if ibp_version != version because reconnect may be
			
 
				-	 * initiated by kiblnd_query()
			
 
				-	 */
			
 
				-	reconnect = (!list_empty(&peer->ibp_tx_queue) ||
			
 
				-		     peer->ibp_version != version) &&
			
 
				-		    peer->ibp_connecting &&
			
 
				-		    !peer->ibp_accepting;
			
 
				-	if (!reconnect) {
			
 
				-		reason = "no need";
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	switch (why) {
			
 
				-	default:
			
 
				-		reason = "Unknown";
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_REJECT_RDMA_FRAGS: {
			
 
				-		struct lnet_ioctl_config_lnd_tunables *tunables;
			
 
				-
			
 
				-		if (!cp) {
			
 
				-			reason = "can't negotiate max frags";
			
 
				-			goto out;
			
 
				-		}
			
 
				-		tunables = peer->ibp_ni->ni_lnd_tunables;
			
 
				-		if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
			
 
				-			reason = "map_on_demand must be enabled";
			
 
				-			goto out;
			
 
				-		}
			
 
				-		if (conn->ibc_max_frags <= frag_num) {
			
 
				-			reason = "unsupported max frags";
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		peer->ibp_max_frags = frag_num;
			
 
				-		reason = "rdma fragments";
			
 
				-		break;
			
 
				-	}
			
 
				-	case IBLND_REJECT_MSG_QUEUE_SIZE:
			
 
				-		if (!cp) {
			
 
				-			reason = "can't negotiate queue depth";
			
 
				-			goto out;
			
 
				-		}
			
 
				-		if (conn->ibc_queue_depth <= queue_dep) {
			
 
				-			reason = "unsupported queue depth";
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		peer->ibp_queue_depth = queue_dep;
			
 
				-		reason = "queue depth";
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_REJECT_CONN_STALE:
			
 
				-		reason = "stale";
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_REJECT_CONN_RACE:
			
 
				-		reason = "conn race";
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_REJECT_CONN_UNCOMPAT:
			
 
				-		reason = "version negotiation";
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	conn->ibc_reconnect = 1;
			
 
				-	peer->ibp_reconnecting++;
			
 
				-	peer->ibp_version = version;
			
 
				-	if (incarnation)
			
 
				-		peer->ibp_incarnation = incarnation;
			
 
				-out:
			
 
				-	write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-	CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
			
 
				-		libcfs_nid2str(peer->ibp_nid),
			
 
				-		reconnect ? "reconnect" : "don't reconnect",
			
 
				-		reason, IBLND_MSG_VERSION, version, msg_size,
			
 
				-		conn->ibc_queue_depth, queue_dep,
			
 
				-		conn->ibc_max_frags, frag_num);
			
 
				-	/**
			
 
				-	 * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
			
 
				-	 * while destroying the zombie
			
 
				-	 */
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
			
 
				-{
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
			
 
				-
			
 
				-	switch (reason) {
			
 
				-	case IB_CM_REJ_STALE_CONN:
			
 
				-		kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0,
			
 
				-				       IBLND_REJECT_CONN_STALE, NULL);
			
 
				-		break;
			
 
				-
			
 
				-	case IB_CM_REJ_INVALID_SERVICE_ID:
			
 
				-		CNETERR("%s rejected: no listener at %d\n",
			
 
				-			libcfs_nid2str(peer->ibp_nid),
			
 
				-			*kiblnd_tunables.kib_service);
			
 
				-		break;
			
 
				-
			
 
				-	case IB_CM_REJ_CONSUMER_DEFINED:
			
 
				-		if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) {
			
 
				-			struct kib_rej *rej = priv;
			
 
				-			struct kib_connparams *cp = NULL;
			
 
				-			int flip = 0;
			
 
				-			__u64 incarnation = -1;
			
 
				-
			
 
				-			/* NB. default incarnation is -1 because:
			
 
				-			 * a) V1 will ignore dst incarnation in connreq.
			
 
				-			 * b) V2 will provide incarnation while rejecting me,
			
 
				-			 *    -1 will be overwrote.
			
 
				-			 *
			
 
				-			 * if I try to connect to a V1 peer with V2 protocol,
			
 
				-			 * it rejected me then upgrade to V2, I have no idea
			
 
				-			 * about the upgrading and try to reconnect with V1,
			
 
				-			 * in this case upgraded V2 can find out I'm trying to
			
 
				-			 * talk to the old guy and reject me(incarnation is -1).
			
 
				-			 */
			
 
				-
			
 
				-			if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
			
 
				-			    rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
			
 
				-				__swab32s(&rej->ibr_magic);
			
 
				-				__swab16s(&rej->ibr_version);
			
 
				-				flip = 1;
			
 
				-			}
			
 
				-
			
 
				-			if (priv_nob >= sizeof(struct kib_rej) &&
			
 
				-			    rej->ibr_version > IBLND_MSG_VERSION_1) {
			
 
				-				/*
			
 
				-				 * priv_nob is always 148 in current version
			
 
				-				 * of OFED, so we still need to check version.
			
 
				-				 * (define of IB_CM_REJ_PRIVATE_DATA_SIZE)
			
 
				-				 */
			
 
				-				cp = &rej->ibr_cp;
			
 
				-
			
 
				-				if (flip) {
			
 
				-					__swab64s(&rej->ibr_incarnation);
			
 
				-					__swab16s(&cp->ibcp_queue_depth);
			
 
				-					__swab16s(&cp->ibcp_max_frags);
			
 
				-					__swab32s(&cp->ibcp_max_msg_size);
			
 
				-				}
			
 
				-
			
 
				-				incarnation = rej->ibr_incarnation;
			
 
				-			}
			
 
				-
			
 
				-			if (rej->ibr_magic != IBLND_MSG_MAGIC &&
			
 
				-			    rej->ibr_magic != LNET_PROTO_MAGIC) {
			
 
				-				CERROR("%s rejected: consumer defined fatal error\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid));
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			if (rej->ibr_version != IBLND_MSG_VERSION &&
			
 
				-			    rej->ibr_version != IBLND_MSG_VERSION_1) {
			
 
				-				CERROR("%s rejected: o2iblnd version %x error\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid),
			
 
				-				       rej->ibr_version);
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			if (rej->ibr_why     == IBLND_REJECT_FATAL &&
			
 
				-			    rej->ibr_version == IBLND_MSG_VERSION_1) {
			
 
				-				CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
			
 
				-
			
 
				-				if (conn->ibc_version != IBLND_MSG_VERSION_1)
			
 
				-					rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
			
 
				-			}
			
 
				-
			
 
				-			switch (rej->ibr_why) {
			
 
				-			case IBLND_REJECT_CONN_RACE:
			
 
				-			case IBLND_REJECT_CONN_STALE:
			
 
				-			case IBLND_REJECT_CONN_UNCOMPAT:
			
 
				-			case IBLND_REJECT_MSG_QUEUE_SIZE:
			
 
				-			case IBLND_REJECT_RDMA_FRAGS:
			
 
				-				kiblnd_check_reconnect(conn, rej->ibr_version,
			
 
				-						       incarnation,
			
 
				-						       rej->ibr_why, cp);
			
 
				-				break;
			
 
				-
			
 
				-			case IBLND_REJECT_NO_RESOURCES:
			
 
				-				CERROR("%s rejected: o2iblnd no resources\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid));
			
 
				-				break;
			
 
				-
			
 
				-			case IBLND_REJECT_FATAL:
			
 
				-				CERROR("%s rejected: o2iblnd fatal error\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid));
			
 
				-				break;
			
 
				-
			
 
				-			default:
			
 
				-				CERROR("%s rejected: o2iblnd reason %d\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid),
			
 
				-				       rej->ibr_why);
			
 
				-				break;
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-		/* fall through */
			
 
				-	default:
			
 
				-		CNETERR("%s rejected: reason %d, size %d\n",
			
 
				-			libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	kiblnd_connreq_done(conn, -ECONNREFUSED);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
			
 
				-{
			
 
				-	struct kib_peer *peer = conn->ibc_peer;
			
 
				-	struct lnet_ni *ni = peer->ibp_ni;
			
 
				-	struct kib_net *net = ni->ni_data;
			
 
				-	struct kib_msg *msg = priv;
			
 
				-	int ver = conn->ibc_version;
			
 
				-	int rc = kiblnd_unpack_msg(msg, priv_nob);
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(net);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't unpack connack from %s: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ibm_type != IBLND_MSG_CONNACK) {
			
 
				-		CERROR("Unexpected message %d from %s\n",
			
 
				-		       msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
			
 
				-		rc = -EPROTO;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (ver != msg->ibm_version) {
			
 
				-		CERROR("%s replied version %x is different with requested version %x\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
			
 
				-		rc = -EPROTO;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ibm_u.connparams.ibcp_queue_depth >
			
 
				-	    conn->ibc_queue_depth) {
			
 
				-		CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid),
			
 
				-		       msg->ibm_u.connparams.ibcp_queue_depth,
			
 
				-		       conn->ibc_queue_depth);
			
 
				-		rc = -EPROTO;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
			
 
				-	    conn->ibc_max_frags) {
			
 
				-		CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid),
			
 
				-		       msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
			
 
				-		       conn->ibc_max_frags);
			
 
				-		rc = -EPROTO;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
			
 
				-		CERROR("%s max message size %d too big (%d max)\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid),
			
 
				-		       msg->ibm_u.connparams.ibcp_max_msg_size,
			
 
				-		       IBLND_MSG_SIZE);
			
 
				-		rc = -EPROTO;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-	if (msg->ibm_dstnid == ni->ni_nid &&
			
 
				-	    msg->ibm_dststamp == net->ibn_incarnation)
			
 
				-		rc = 0;
			
 
				-	else
			
 
				-		rc = -ESTALE;
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), rc,
			
 
				-		       msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	conn->ibc_incarnation = msg->ibm_srcstamp;
			
 
				-	conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
			
 
				-	conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
			
 
				-	conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
			
 
				-	conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
			
 
				-	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
			
 
				-		IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
			
 
				-
			
 
				-	kiblnd_connreq_done(conn, 0);
			
 
				-	return;
			
 
				-
			
 
				- failed:
			
 
				-	/*
			
 
				-	 * NB My QP has already established itself, so I handle anything going
			
 
				-	 * wrong here by setting ibc_comms_error.
			
 
				-	 * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
			
 
				-	 * immediately tears it down.
			
 
				-	 */
			
 
				-	LASSERT(rc);
			
 
				-	conn->ibc_comms_error = rc;
			
 
				-	kiblnd_connreq_done(conn, 0);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_active_connect(struct rdma_cm_id *cmid)
			
 
				-{
			
 
				-	struct kib_peer *peer = (struct kib_peer *)cmid->context;
			
 
				-	struct kib_conn *conn;
			
 
				-	struct kib_msg *msg;
			
 
				-	struct rdma_conn_param cp;
			
 
				-	int version;
			
 
				-	__u64 incarnation;
			
 
				-	unsigned long flags;
			
 
				-	int rc;
			
 
				-
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	incarnation = peer->ibp_incarnation;
			
 
				-	version = !peer->ibp_version ? IBLND_MSG_VERSION :
			
 
				-				       peer->ibp_version;
			
 
				-
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
			
 
				-				  version);
			
 
				-	if (!conn) {
			
 
				-		kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
			
 
				-		kiblnd_peer_decref(peer); /* lose cmid's ref */
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * conn "owns" cmid now, so I return success from here on to ensure the
			
 
				-	 * CM callback doesn't destroy cmid. conn also takes over cmid's ref
			
 
				-	 * on peer
			
 
				-	 */
			
 
				-	msg = &conn->ibc_connvars->cv_msg;
			
 
				-
			
 
				-	memset(msg, 0, sizeof(*msg));
			
 
				-	kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
			
 
				-	msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
			
 
				-	msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
			
 
				-	msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
			
 
				-
			
 
				-	kiblnd_pack_msg(peer->ibp_ni, msg, version,
			
 
				-			0, peer->ibp_nid, incarnation);
			
 
				-
			
 
				-	memset(&cp, 0, sizeof(cp));
			
 
				-	cp.private_data	= msg;
			
 
				-	cp.private_data_len    = msg->ibm_nob;
			
 
				-	cp.responder_resources = 0;	     /* No atomic ops or RDMA reads */
			
 
				-	cp.initiator_depth     = 0;
			
 
				-	cp.flow_control        = 1;
			
 
				-	cp.retry_count         = *kiblnd_tunables.kib_retry_count;
			
 
				-	cp.rnr_retry_count     = *kiblnd_tunables.kib_rnr_retry_count;
			
 
				-
			
 
				-	LASSERT(cmid->context == (void *)conn);
			
 
				-	LASSERT(conn->ibc_cmid == cmid);
			
 
				-
			
 
				-	rc = rdma_connect(cmid, &cp);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't connect to %s: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				-		kiblnd_connreq_done(conn, rc);
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
			
 
				-{
			
 
				-	struct kib_peer *peer;
			
 
				-	struct kib_conn *conn;
			
 
				-	int rc;
			
 
				-
			
 
				-	switch (event->event) {
			
 
				-	default:
			
 
				-		CERROR("Unexpected event: %d, status: %d\n",
			
 
				-		       event->event, event->status);
			
 
				-		LBUG();
			
 
				-
			
 
				-	case RDMA_CM_EVENT_CONNECT_REQUEST:
			
 
				-		/* destroy cmid on failure */
			
 
				-		rc = kiblnd_passive_connect(cmid,
			
 
				-					    (void *)KIBLND_CONN_PARAM(event),
			
 
				-					    KIBLND_CONN_PARAM_LEN(event));
			
 
				-		CDEBUG(D_NET, "connreq: %d\n", rc);
			
 
				-		return rc;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_ADDR_ERROR:
			
 
				-		peer = (struct kib_peer *)cmid->context;
			
 
				-		CNETERR("%s: ADDR ERROR %d\n",
			
 
				-			libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				-		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		return -EHOSTUNREACH;      /* rc destroys cmid */
			
 
				-
			
 
				-	case RDMA_CM_EVENT_ADDR_RESOLVED:
			
 
				-		peer = (struct kib_peer *)cmid->context;
			
 
				-
			
 
				-		CDEBUG(D_NET, "%s Addr resolved: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				-
			
 
				-		if (event->status) {
			
 
				-			CNETERR("Can't resolve address for %s: %d\n",
			
 
				-				libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				-			rc = event->status;
			
 
				-		} else {
			
 
				-			rc = rdma_resolve_route(
			
 
				-				cmid, *kiblnd_tunables.kib_timeout * 1000);
			
 
				-			if (!rc) {
			
 
				-				struct kib_net *net = peer->ibp_ni->ni_data;
			
 
				-				struct kib_dev *dev = net->ibn_dev;
			
 
				-
			
 
				-				CDEBUG(D_NET, "%s: connection bound to "\
			
 
				-				       "%s:%pI4h:%s\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid),
			
 
				-				       dev->ibd_ifname,
			
 
				-				       &dev->ibd_ifip, cmid->device->name);
			
 
				-
			
 
				-				return 0;
			
 
				-			}
			
 
				-
			
 
				-			/* Can't initiate route resolution */
			
 
				-			CERROR("Can't resolve route for %s: %d\n",
			
 
				-			       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				-		}
			
 
				-		kiblnd_peer_connect_failed(peer, 1, rc);
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		return rc;		      /* rc destroys cmid */
			
 
				-
			
 
				-	case RDMA_CM_EVENT_ROUTE_ERROR:
			
 
				-		peer = (struct kib_peer *)cmid->context;
			
 
				-		CNETERR("%s: ROUTE ERROR %d\n",
			
 
				-			libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				-		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		return -EHOSTUNREACH;	   /* rc destroys cmid */
			
 
				-
			
 
				-	case RDMA_CM_EVENT_ROUTE_RESOLVED:
			
 
				-		peer = (struct kib_peer *)cmid->context;
			
 
				-		CDEBUG(D_NET, "%s Route resolved: %d\n",
			
 
				-		       libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				-
			
 
				-		if (!event->status)
			
 
				-			return kiblnd_active_connect(cmid);
			
 
				-
			
 
				-		CNETERR("Can't resolve route for %s: %d\n",
			
 
				-			libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				-		kiblnd_peer_connect_failed(peer, 1, event->status);
			
 
				-		kiblnd_peer_decref(peer);
			
 
				-		return event->status;	   /* rc destroys cmid */
			
 
				-
			
 
				-	case RDMA_CM_EVENT_UNREACHABLE:
			
 
				-		conn = (struct kib_conn *)cmid->context;
			
 
				-		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
			
 
				-			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
			
 
				-		CNETERR("%s: UNREACHABLE %d\n",
			
 
				-			libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
			
 
				-		kiblnd_connreq_done(conn, -ENETDOWN);
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-		return 0;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_CONNECT_ERROR:
			
 
				-		conn = (struct kib_conn *)cmid->context;
			
 
				-		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
			
 
				-			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
			
 
				-		CNETERR("%s: CONNECT ERROR %d\n",
			
 
				-			libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
			
 
				-		kiblnd_connreq_done(conn, -ENOTCONN);
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-		return 0;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_REJECTED:
			
 
				-		conn = (struct kib_conn *)cmid->context;
			
 
				-		switch (conn->ibc_state) {
			
 
				-		default:
			
 
				-			LBUG();
			
 
				-
			
 
				-		case IBLND_CONN_PASSIVE_WAIT:
			
 
				-			CERROR("%s: REJECTED %d\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-			       event->status);
			
 
				-			kiblnd_connreq_done(conn, -ECONNRESET);
			
 
				-			break;
			
 
				-
			
 
				-		case IBLND_CONN_ACTIVE_CONNECT:
			
 
				-			kiblnd_rejected(conn, event->status,
			
 
				-					(void *)KIBLND_CONN_PARAM(event),
			
 
				-					KIBLND_CONN_PARAM_LEN(event));
			
 
				-			break;
			
 
				-		}
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-		return 0;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_ESTABLISHED:
			
 
				-		conn = (struct kib_conn *)cmid->context;
			
 
				-		switch (conn->ibc_state) {
			
 
				-		default:
			
 
				-			LBUG();
			
 
				-
			
 
				-		case IBLND_CONN_PASSIVE_WAIT:
			
 
				-			CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-			kiblnd_connreq_done(conn, 0);
			
 
				-			break;
			
 
				-
			
 
				-		case IBLND_CONN_ACTIVE_CONNECT:
			
 
				-			CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-			kiblnd_check_connreply(conn,
			
 
				-					       (void *)KIBLND_CONN_PARAM(event),
			
 
				-					       KIBLND_CONN_PARAM_LEN(event));
			
 
				-			break;
			
 
				-		}
			
 
				-		/* net keeps its ref on conn! */
			
 
				-		return 0;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
			
 
				-		CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
			
 
				-		return 0;
			
 
				-	case RDMA_CM_EVENT_DISCONNECTED:
			
 
				-		conn = (struct kib_conn *)cmid->context;
			
 
				-		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				-			CERROR("%s DISCONNECTED\n",
			
 
				-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-			kiblnd_connreq_done(conn, -ECONNRESET);
			
 
				-		} else {
			
 
				-			kiblnd_close_conn(conn, 0);
			
 
				-		}
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-		cmid->context = NULL;
			
 
				-		return 0;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
			
 
				-		LCONSOLE_ERROR_MSG(0x131,
			
 
				-				   "Received notification of device removal\n"
			
 
				-				   "Please shutdown LNET to allow this to proceed\n");
			
 
				-		/*
			
 
				-		 * Can't remove network from underneath LNET for now, so I have
			
 
				-		 * to ignore this
			
 
				-		 */
			
 
				-		return 0;
			
 
				-
			
 
				-	case RDMA_CM_EVENT_ADDR_CHANGE:
			
 
				-		LCONSOLE_INFO("Physical link changed (eg hca/port)\n");
			
 
				-		return 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
			
 
				-{
			
 
				-	struct kib_tx *tx;
			
 
				-	struct list_head *ttmp;
			
 
				-
			
 
				-	list_for_each(ttmp, txs) {
			
 
				-		tx = list_entry(ttmp, struct kib_tx, tx_list);
			
 
				-
			
 
				-		if (txs != &conn->ibc_active_txs) {
			
 
				-			LASSERT(tx->tx_queued);
			
 
				-		} else {
			
 
				-			LASSERT(!tx->tx_queued);
			
 
				-			LASSERT(tx->tx_waiting || tx->tx_sending);
			
 
				-		}
			
 
				-
			
 
				-		if (time_after_eq(jiffies, tx->tx_deadline)) {
			
 
				-			CERROR("Timed out tx: %s, %lu seconds\n",
			
 
				-			       kiblnd_queue2str(conn, txs),
			
 
				-			       (jiffies - tx->tx_deadline) / HZ);
			
 
				-			return 1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-kiblnd_conn_timed_out_locked(struct kib_conn *conn)
			
 
				-{
			
 
				-	return  kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
			
 
				-		kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
			
 
				-		kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) ||
			
 
				-		kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) ||
			
 
				-		kiblnd_check_txs_locked(conn, &conn->ibc_active_txs);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_check_conns(int idx)
			
 
				-{
			
 
				-	LIST_HEAD(closes);
			
 
				-	LIST_HEAD(checksends);
			
 
				-	struct list_head *peers = &kiblnd_data.kib_peers[idx];
			
 
				-	struct list_head *ptmp;
			
 
				-	struct kib_peer *peer;
			
 
				-	struct kib_conn *conn;
			
 
				-	struct kib_conn *temp;
			
 
				-	struct kib_conn *tmp;
			
 
				-	struct list_head *ctmp;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/*
			
 
				-	 * NB. We expect to have a look at all the peers and not find any
			
 
				-	 * RDMAs to time out, so we just use a shared lock while we
			
 
				-	 * take a look...
			
 
				-	 */
			
 
				-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	list_for_each(ptmp, peers) {
			
 
				-		peer = list_entry(ptmp, struct kib_peer, ibp_list);
			
 
				-
			
 
				-		list_for_each(ctmp, &peer->ibp_conns) {
			
 
				-			int timedout;
			
 
				-			int sendnoop;
			
 
				-
			
 
				-			conn = list_entry(ctmp, struct kib_conn, ibc_list);
			
 
				-
			
 
				-			LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED);
			
 
				-
			
 
				-			spin_lock(&conn->ibc_lock);
			
 
				-
			
 
				-			sendnoop = kiblnd_need_noop(conn);
			
 
				-			timedout = kiblnd_conn_timed_out_locked(conn);
			
 
				-			if (!sendnoop && !timedout) {
			
 
				-				spin_unlock(&conn->ibc_lock);
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			if (timedout) {
			
 
				-				CERROR("Timed out RDMA with %s (%lu): c: %u, oc: %u, rc: %u\n",
			
 
				-				       libcfs_nid2str(peer->ibp_nid),
			
 
				-				       (jiffies - peer->ibp_last_alive) / HZ,
			
 
				-				       conn->ibc_credits,
			
 
				-				       conn->ibc_outstanding_credits,
			
 
				-				       conn->ibc_reserved_credits);
			
 
				-				list_add(&conn->ibc_connd_list, &closes);
			
 
				-			} else {
			
 
				-				list_add(&conn->ibc_connd_list, &checksends);
			
 
				-			}
			
 
				-			/* +ref for 'closes' or 'checksends' */
			
 
				-			kiblnd_conn_addref(conn);
			
 
				-
			
 
				-			spin_unlock(&conn->ibc_lock);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				-
			
 
				-	/*
			
 
				-	 * Handle timeout by closing the whole
			
 
				-	 * connection. We can only be sure RDMA activity
			
 
				-	 * has ceased once the QP has been modified.
			
 
				-	 */
			
 
				-	list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) {
			
 
				-		list_del(&conn->ibc_connd_list);
			
 
				-		kiblnd_close_conn(conn, -ETIMEDOUT);
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * In case we have enough credits to return via a
			
 
				-	 * NOOP, but there were no non-blocking tx descs
			
 
				-	 * free to do it last time...
			
 
				-	 */
			
 
				-	list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
			
 
				-		list_del(&conn->ibc_connd_list);
			
 
				-
			
 
				-		spin_lock(&conn->ibc_lock);
			
 
				-		kiblnd_check_sends_locked(conn);
			
 
				-		spin_unlock(&conn->ibc_lock);
			
 
				-
			
 
				-		kiblnd_conn_decref(conn);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_disconnect_conn(struct kib_conn *conn)
			
 
				-{
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(current == kiblnd_data.kib_connd);
			
 
				-	LASSERT(conn->ibc_state == IBLND_CONN_CLOSING);
			
 
				-
			
 
				-	rdma_disconnect(conn->ibc_cmid);
			
 
				-	kiblnd_finalise_conn(conn);
			
 
				-
			
 
				-	kiblnd_peer_notify(conn->ibc_peer);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * High-water for reconnection to the same peer, reconnection attempt should
			
 
				- * be delayed after trying more than KIB_RECONN_HIGH_RACE.
			
 
				- */
			
 
				-#define KIB_RECONN_HIGH_RACE	10
			
 
				-/**
			
 
				- * Allow connd to take a break and handle other things after consecutive
			
 
				- * reconnection attempts.
			
 
				- */
			
 
				-#define KIB_RECONN_BREAK	100
			
 
				-
			
 
				-int
			
 
				-kiblnd_connd(void *arg)
			
 
				-{
			
 
				-	spinlock_t *lock = &kiblnd_data.kib_connd_lock;
			
 
				-	wait_queue_entry_t wait;
			
 
				-	unsigned long flags;
			
 
				-	struct kib_conn *conn;
			
 
				-	int timeout;
			
 
				-	int i;
			
 
				-	int dropped_lock;
			
 
				-	int peer_index = 0;
			
 
				-	unsigned long deadline = jiffies;
			
 
				-
			
 
				-	init_waitqueue_entry(&wait, current);
			
 
				-	kiblnd_data.kib_connd = current;
			
 
				-
			
 
				-	spin_lock_irqsave(lock, flags);
			
 
				-
			
 
				-	while (!kiblnd_data.kib_shutdown) {
			
 
				-		int reconn = 0;
			
 
				-
			
 
				-		dropped_lock = 0;
			
 
				-
			
 
				-		if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
			
 
				-			struct kib_peer *peer = NULL;
			
 
				-
			
 
				-			conn = list_entry(kiblnd_data.kib_connd_zombies.next,
			
 
				-					  struct kib_conn, ibc_list);
			
 
				-			list_del(&conn->ibc_list);
			
 
				-			if (conn->ibc_reconnect) {
			
 
				-				peer = conn->ibc_peer;
			
 
				-				kiblnd_peer_addref(peer);
			
 
				-			}
			
 
				-
			
 
				-			spin_unlock_irqrestore(lock, flags);
			
 
				-			dropped_lock = 1;
			
 
				-
			
 
				-			kiblnd_destroy_conn(conn);
			
 
				-
			
 
				-			spin_lock_irqsave(lock, flags);
			
 
				-			if (!peer) {
			
 
				-				kfree(conn);
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			conn->ibc_peer = peer;
			
 
				-			if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
			
 
				-				list_add_tail(&conn->ibc_list,
			
 
				-					      &kiblnd_data.kib_reconn_list);
			
 
				-			else
			
 
				-				list_add_tail(&conn->ibc_list,
			
 
				-					      &kiblnd_data.kib_reconn_wait);
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&kiblnd_data.kib_connd_conns)) {
			
 
				-			conn = list_entry(kiblnd_data.kib_connd_conns.next,
			
 
				-					  struct kib_conn, ibc_list);
			
 
				-			list_del(&conn->ibc_list);
			
 
				-
			
 
				-			spin_unlock_irqrestore(lock, flags);
			
 
				-			dropped_lock = 1;
			
 
				-
			
 
				-			kiblnd_disconnect_conn(conn);
			
 
				-			kiblnd_conn_decref(conn);
			
 
				-
			
 
				-			spin_lock_irqsave(lock, flags);
			
 
				-		}
			
 
				-
			
 
				-		while (reconn < KIB_RECONN_BREAK) {
			
 
				-			if (kiblnd_data.kib_reconn_sec !=
			
 
				-			    ktime_get_real_seconds()) {
			
 
				-				kiblnd_data.kib_reconn_sec = ktime_get_real_seconds();
			
 
				-				list_splice_init(&kiblnd_data.kib_reconn_wait,
			
 
				-						 &kiblnd_data.kib_reconn_list);
			
 
				-			}
			
 
				-
			
 
				-			if (list_empty(&kiblnd_data.kib_reconn_list))
			
 
				-				break;
			
 
				-
			
 
				-			conn = list_entry(kiblnd_data.kib_reconn_list.next,
			
 
				-					  struct kib_conn, ibc_list);
			
 
				-			list_del(&conn->ibc_list);
			
 
				-
			
 
				-			spin_unlock_irqrestore(lock, flags);
			
 
				-			dropped_lock = 1;
			
 
				-
			
 
				-			reconn += kiblnd_reconnect_peer(conn->ibc_peer);
			
 
				-			kiblnd_peer_decref(conn->ibc_peer);
			
 
				-			kfree(conn);
			
 
				-
			
 
				-			spin_lock_irqsave(lock, flags);
			
 
				-		}
			
 
				-
			
 
				-		/* careful with the jiffy wrap... */
			
 
				-		timeout = (int)(deadline - jiffies);
			
 
				-		if (timeout <= 0) {
			
 
				-			const int n = 4;
			
 
				-			const int p = 1;
			
 
				-			int chunk = kiblnd_data.kib_peer_hash_size;
			
 
				-
			
 
				-			spin_unlock_irqrestore(lock, flags);
			
 
				-			dropped_lock = 1;
			
 
				-
			
 
				-			/*
			
 
				-			 * Time to check for RDMA timeouts on a few more
			
 
				-			 * peers: I do checks every 'p' seconds on a
			
 
				-			 * proportion of the peer table and I need to check
			
 
				-			 * every connection 'n' times within a timeout
			
 
				-			 * interval, to ensure I detect a timeout on any
			
 
				-			 * connection within (n+1)/n times the timeout
			
 
				-			 * interval.
			
 
				-			 */
			
 
				-			if (*kiblnd_tunables.kib_timeout > n * p)
			
 
				-				chunk = (chunk * n * p) /
			
 
				-					*kiblnd_tunables.kib_timeout;
			
 
				-			if (!chunk)
			
 
				-				chunk = 1;
			
 
				-
			
 
				-			for (i = 0; i < chunk; i++) {
			
 
				-				kiblnd_check_conns(peer_index);
			
 
				-				peer_index = (peer_index + 1) %
			
 
				-					     kiblnd_data.kib_peer_hash_size;
			
 
				-			}
			
 
				-
			
 
				-			deadline += msecs_to_jiffies(p * MSEC_PER_SEC);
			
 
				-			spin_lock_irqsave(lock, flags);
			
 
				-		}
			
 
				-
			
 
				-		if (dropped_lock)
			
 
				-			continue;
			
 
				-
			
 
				-		/* Nothing to do for 'timeout'  */
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
			
 
				-		spin_unlock_irqrestore(lock, flags);
			
 
				-
			
 
				-		schedule_timeout(timeout);
			
 
				-
			
 
				-		remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
			
 
				-		spin_lock_irqsave(lock, flags);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_irqrestore(lock, flags);
			
 
				-
			
 
				-	kiblnd_thread_fini();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_qp_event(struct ib_event *event, void *arg)
			
 
				-{
			
 
				-	struct kib_conn *conn = arg;
			
 
				-
			
 
				-	switch (event->event) {
			
 
				-	case IB_EVENT_COMM_EST:
			
 
				-		CDEBUG(D_NET, "%s established\n",
			
 
				-		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				-		/*
			
 
				-		 * We received a packet but connection isn't established
			
 
				-		 * probably handshake packet was lost, so free to
			
 
				-		 * force make connection established
			
 
				-		 */
			
 
				-		rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
			
 
				-		return;
			
 
				-
			
 
				-	default:
			
 
				-		CERROR("%s: Async QP event type %d\n",
			
 
				-		       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
			
 
				-		return;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-kiblnd_complete(struct ib_wc *wc)
			
 
				-{
			
 
				-	switch (kiblnd_wreqid2type(wc->wr_id)) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case IBLND_WID_MR:
			
 
				-		if (wc->status != IB_WC_SUCCESS &&
			
 
				-		    wc->status != IB_WC_WR_FLUSH_ERR)
			
 
				-			CNETERR("FastReg failed: %d\n", wc->status);
			
 
				-		break;
			
 
				-
			
 
				-	case IBLND_WID_RDMA:
			
 
				-		/*
			
 
				-		 * We only get RDMA completion notification if it fails.  All
			
 
				-		 * subsequent work items, including the final SEND will fail
			
 
				-		 * too.  However we can't print out any more info about the
			
 
				-		 * failing RDMA because 'tx' might be back on the idle list or
			
 
				-		 * even reused already if we didn't manage to post all our work
			
 
				-		 * items
			
 
				-		 */
			
 
				-		CNETERR("RDMA (tx: %p) failed: %d\n",
			
 
				-			kiblnd_wreqid2ptr(wc->wr_id), wc->status);
			
 
				-		return;
			
 
				-
			
 
				-	case IBLND_WID_TX:
			
 
				-		kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
			
 
				-		return;
			
 
				-
			
 
				-	case IBLND_WID_RX:
			
 
				-		kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
			
 
				-				   wc->byte_len);
			
 
				-		return;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_cq_completion(struct ib_cq *cq, void *arg)
			
 
				-{
			
 
				-	/*
			
 
				-	 * NB I'm not allowed to schedule this conn once its refcount has
			
 
				-	 * reached 0.  Since fundamentally I'm racing with scheduler threads
			
 
				-	 * consuming my CQ I could be called after all completions have
			
 
				-	 * occurred.  But in this case, !ibc_nrx && !ibc_nsends_posted
			
 
				-	 * and this CQ is about to be destroyed so I NOOP.
			
 
				-	 */
			
 
				-	struct kib_conn *conn = arg;
			
 
				-	struct kib_sched_info *sched = conn->ibc_sched;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	LASSERT(cq == conn->ibc_cq);
			
 
				-
			
 
				-	spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-
			
 
				-	conn->ibc_ready = 1;
			
 
				-
			
 
				-	if (!conn->ibc_scheduled &&
			
 
				-	    (conn->ibc_nrx > 0 ||
			
 
				-	     conn->ibc_nsends_posted > 0)) {
			
 
				-		kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
			
 
				-		conn->ibc_scheduled = 1;
			
 
				-		list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns);
			
 
				-
			
 
				-		if (waitqueue_active(&sched->ibs_waitq))
			
 
				-			wake_up(&sched->ibs_waitq);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-kiblnd_cq_event(struct ib_event *event, void *arg)
			
 
				-{
			
 
				-	struct kib_conn *conn = arg;
			
 
				-
			
 
				-	CERROR("%s: async CQ event type %d\n",
			
 
				-	       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_scheduler(void *arg)
			
 
				-{
			
 
				-	long id = (long)arg;
			
 
				-	struct kib_sched_info *sched;
			
 
				-	struct kib_conn *conn;
			
 
				-	wait_queue_entry_t wait;
			
 
				-	unsigned long flags;
			
 
				-	struct ib_wc wc;
			
 
				-	int did_something;
			
 
				-	int busy_loops = 0;
			
 
				-	int rc;
			
 
				-
			
 
				-	init_waitqueue_entry(&wait, current);
			
 
				-
			
 
				-	sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
			
 
				-
			
 
				-	rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt);
			
 
				-	if (rc) {
			
 
				-		CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n",
			
 
				-		      sched->ibs_cpt);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-
			
 
				-	while (!kiblnd_data.kib_shutdown) {
			
 
				-		if (busy_loops++ >= IBLND_RESCHED) {
			
 
				-			spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-
			
 
				-			cond_resched();
			
 
				-			busy_loops = 0;
			
 
				-
			
 
				-			spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-		}
			
 
				-
			
 
				-		did_something = 0;
			
 
				-
			
 
				-		if (!list_empty(&sched->ibs_conns)) {
			
 
				-			conn = list_entry(sched->ibs_conns.next, struct kib_conn,
			
 
				-					  ibc_sched_list);
			
 
				-			/* take over kib_sched_conns' ref on conn... */
			
 
				-			LASSERT(conn->ibc_scheduled);
			
 
				-			list_del(&conn->ibc_sched_list);
			
 
				-			conn->ibc_ready = 0;
			
 
				-
			
 
				-			spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-
			
 
				-			wc.wr_id = IBLND_WID_INVAL;
			
 
				-
			
 
				-			rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
			
 
				-			if (!rc) {
			
 
				-				rc = ib_req_notify_cq(conn->ibc_cq,
			
 
				-						      IB_CQ_NEXT_COMP);
			
 
				-				if (rc < 0) {
			
 
				-					CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n",
			
 
				-					      libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
			
 
				-					kiblnd_close_conn(conn, -EIO);
			
 
				-					kiblnd_conn_decref(conn);
			
 
				-					spin_lock_irqsave(&sched->ibs_lock,
			
 
				-							  flags);
			
 
				-					continue;
			
 
				-				}
			
 
				-
			
 
				-				rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
			
 
				-			}
			
 
				-
			
 
				-			if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) {
			
 
				-				LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n",
			
 
				-					       rc, wc.opcode, wc.status,
			
 
				-					       wc.vendor_err,
			
 
				-					       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-					       conn->ibc_state);
			
 
				-				rc = -EINVAL;
			
 
				-			}
			
 
				-
			
 
				-			if (rc < 0) {
			
 
				-				CWARN("%s: ib_poll_cq failed: %d, closing connection\n",
			
 
				-				      libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				-				      rc);
			
 
				-				kiblnd_close_conn(conn, -EIO);
			
 
				-				kiblnd_conn_decref(conn);
			
 
				-				spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-
			
 
				-			if (rc || conn->ibc_ready) {
			
 
				-				/*
			
 
				-				 * There may be another completion waiting; get
			
 
				-				 * another scheduler to check while I handle
			
 
				-				 * this one...
			
 
				-				 */
			
 
				-				/* +1 ref for sched_conns */
			
 
				-				kiblnd_conn_addref(conn);
			
 
				-				list_add_tail(&conn->ibc_sched_list,
			
 
				-					      &sched->ibs_conns);
			
 
				-				if (waitqueue_active(&sched->ibs_waitq))
			
 
				-					wake_up(&sched->ibs_waitq);
			
 
				-			} else {
			
 
				-				conn->ibc_scheduled = 0;
			
 
				-			}
			
 
				-
			
 
				-			if (rc) {
			
 
				-				spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-				kiblnd_complete(&wc);
			
 
				-
			
 
				-				spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-			}
			
 
				-
			
 
				-			kiblnd_conn_decref(conn); /* ...drop my ref from above */
			
 
				-			did_something = 1;
			
 
				-		}
			
 
				-
			
 
				-		if (did_something)
			
 
				-			continue;
			
 
				-
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		add_wait_queue_exclusive(&sched->ibs_waitq, &wait);
			
 
				-		spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-
			
 
				-		schedule();
			
 
				-		busy_loops = 0;
			
 
				-
			
 
				-		remove_wait_queue(&sched->ibs_waitq, &wait);
			
 
				-		spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				-
			
 
				-	kiblnd_thread_fini();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-kiblnd_failover_thread(void *arg)
			
 
				-{
			
 
				-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
			
 
				-	struct kib_dev *dev;
			
 
				-	wait_queue_entry_t wait;
			
 
				-	unsigned long flags;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(*kiblnd_tunables.kib_dev_failover);
			
 
				-
			
 
				-	init_waitqueue_entry(&wait, current);
			
 
				-	write_lock_irqsave(glock, flags);
			
 
				-
			
 
				-	while (!kiblnd_data.kib_shutdown) {
			
 
				-		int do_failover = 0;
			
 
				-		int long_sleep;
			
 
				-
			
 
				-		list_for_each_entry(dev, &kiblnd_data.kib_failed_devs,
			
 
				-				    ibd_fail_list) {
			
 
				-			if (time_before(jiffies,
			
 
				-					dev->ibd_next_failover))
			
 
				-				continue;
			
 
				-			do_failover = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (do_failover) {
			
 
				-			list_del_init(&dev->ibd_fail_list);
			
 
				-			dev->ibd_failover = 1;
			
 
				-			write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-			rc = kiblnd_dev_failover(dev);
			
 
				-
			
 
				-			write_lock_irqsave(glock, flags);
			
 
				-
			
 
				-			LASSERT(dev->ibd_failover);
			
 
				-			dev->ibd_failover = 0;
			
 
				-			if (rc >= 0) { /* Device is OK or failover succeed */
			
 
				-				dev->ibd_next_failover = jiffies + 3 * HZ;
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			/* failed to failover, retry later */
			
 
				-			dev->ibd_next_failover =
			
 
				-				jiffies + min(dev->ibd_failed_failover, 10) * HZ;
			
 
				-			if (kiblnd_dev_can_failover(dev)) {
			
 
				-				list_add_tail(&dev->ibd_fail_list,
			
 
				-					      &kiblnd_data.kib_failed_devs);
			
 
				-			}
			
 
				-
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* long sleep if no more pending failover */
			
 
				-		long_sleep = list_empty(&kiblnd_data.kib_failed_devs);
			
 
				-
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
			
 
				-		write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-		rc = schedule_timeout(long_sleep ? 10 * HZ :
			
 
				-						   HZ);
			
 
				-		remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
			
 
				-		write_lock_irqsave(glock, flags);
			
 
				-
			
 
				-		if (!long_sleep || rc)
			
 
				-			continue;
			
 
				-
			
 
				-		/*
			
 
				-		 * have a long sleep, routine check all active devices,
			
 
				-		 * we need checking like this because if there is not active
			
 
				-		 * connection on the dev and no SEND from local, we may listen
			
 
				-		 * on wrong HCA for ever while there is a bonding failover
			
 
				-		 */
			
 
				-		list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
			
 
				-			if (kiblnd_dev_can_failover(dev)) {
			
 
				-				list_add_tail(&dev->ibd_fail_list,
			
 
				-					      &kiblnd_data.kib_failed_devs);
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_irqrestore(glock, flags);
			
 
				-
			
 
				-	kiblnd_thread_fini();
			
 
				-	return 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -1,296 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/klnds/o2iblnd/o2iblnd_modparams.c
			
 
				- *
			
 
				- * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- */
			
 
				-
			
 
				-#include "o2iblnd.h"
			
 
				-
			
 
				-static int service = 987;
			
 
				-module_param(service, int, 0444);
			
 
				-MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)");
			
 
				-
			
 
				-static int cksum;
			
 
				-module_param(cksum, int, 0644);
			
 
				-MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums");
			
 
				-
			
 
				-static int timeout = 50;
			
 
				-module_param(timeout, int, 0644);
			
 
				-MODULE_PARM_DESC(timeout, "timeout (seconds)");
			
 
				-
			
 
				-/*
			
 
				- * Number of threads in each scheduler pool which is percpt,
			
 
				- * we will estimate reasonable value based on CPUs if it's set to zero.
			
 
				- */
			
 
				-static int nscheds;
			
 
				-module_param(nscheds, int, 0444);
			
 
				-MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
			
 
				-
			
 
				-static unsigned int conns_per_peer = 1;
			
 
				-module_param(conns_per_peer, uint, 0444);
			
 
				-MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
			
 
				-
			
 
				-/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				-static int ntx = 512;
			
 
				-module_param(ntx, int, 0444);
			
 
				-MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool");
			
 
				-
			
 
				-/* NB: this value is shared by all CPTs */
			
 
				-static int credits = 256;
			
 
				-module_param(credits, int, 0444);
			
 
				-MODULE_PARM_DESC(credits, "# concurrent sends");
			
 
				-
			
 
				-static int peer_credits = 8;
			
 
				-module_param(peer_credits, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
			
 
				-
			
 
				-static int peer_credits_hiw;
			
 
				-module_param(peer_credits_hiw, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits");
			
 
				-
			
 
				-static int peer_buffer_credits;
			
 
				-module_param(peer_buffer_credits, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
			
 
				-
			
 
				-static int peer_timeout = 180;
			
 
				-module_param(peer_timeout, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
			
 
				-
			
 
				-static char *ipif_name = "ib0";
			
 
				-module_param(ipif_name, charp, 0444);
			
 
				-MODULE_PARM_DESC(ipif_name, "IPoIB interface name");
			
 
				-
			
 
				-static int retry_count = 5;
			
 
				-module_param(retry_count, int, 0644);
			
 
				-MODULE_PARM_DESC(retry_count, "Retransmissions when no ACK received");
			
 
				-
			
 
				-static int rnr_retry_count = 6;
			
 
				-module_param(rnr_retry_count, int, 0644);
			
 
				-MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions");
			
 
				-
			
 
				-static int keepalive = 100;
			
 
				-module_param(keepalive, int, 0644);
			
 
				-MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive");
			
 
				-
			
 
				-static int ib_mtu;
			
 
				-module_param(ib_mtu, int, 0444);
			
 
				-MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096");
			
 
				-
			
 
				-static int concurrent_sends;
			
 
				-module_param(concurrent_sends, int, 0444);
			
 
				-MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
			
 
				-
			
 
				-#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS
			
 
				-static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
			
 
				-module_param(map_on_demand, int, 0444);
			
 
				-MODULE_PARM_DESC(map_on_demand, "map on demand");
			
 
				-
			
 
				-/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				-static int fmr_pool_size = 512;
			
 
				-module_param(fmr_pool_size, int, 0444);
			
 
				-MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)");
			
 
				-
			
 
				-/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				-static int fmr_flush_trigger = 384;
			
 
				-module_param(fmr_flush_trigger, int, 0444);
			
 
				-MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush");
			
 
				-
			
 
				-static int fmr_cache = 1;
			
 
				-module_param(fmr_cache, int, 0444);
			
 
				-MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching");
			
 
				-
			
 
				-/*
			
 
				- * 0: disable failover
			
 
				- * 1: enable failover if necessary
			
 
				- * 2: force to failover (for debug)
			
 
				- */
			
 
				-static int dev_failover;
			
 
				-module_param(dev_failover, int, 0444);
			
 
				-MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)");
			
 
				-
			
 
				-static int require_privileged_port;
			
 
				-module_param(require_privileged_port, int, 0644);
			
 
				-MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection");
			
 
				-
			
 
				-static int use_privileged_port = 1;
			
 
				-module_param(use_privileged_port, int, 0644);
			
 
				-MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection");
			
 
				-
			
 
				-struct kib_tunables kiblnd_tunables = {
			
 
				-	.kib_dev_failover      = &dev_failover,
			
 
				-	.kib_service           = &service,
			
 
				-	.kib_cksum             = &cksum,
			
 
				-	.kib_timeout           = &timeout,
			
 
				-	.kib_keepalive         = &keepalive,
			
 
				-	.kib_ntx               = &ntx,
			
 
				-	.kib_default_ipif      = &ipif_name,
			
 
				-	.kib_retry_count       = &retry_count,
			
 
				-	.kib_rnr_retry_count   = &rnr_retry_count,
			
 
				-	.kib_ib_mtu            = &ib_mtu,
			
 
				-	.kib_require_priv_port = &require_privileged_port,
			
 
				-	.kib_use_priv_port     = &use_privileged_port,
			
 
				-	.kib_nscheds           = &nscheds
			
 
				-};
			
 
				-
			
 
				-static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
			
 
				-
			
 
				-/* # messages/RDMAs in-flight */
			
 
				-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni)
			
 
				-{
			
 
				-	if (version == IBLND_MSG_VERSION_1)
			
 
				-		return IBLND_MSG_QUEUE_SIZE_V1;
			
 
				-	else if (ni)
			
 
				-		return ni->ni_peertxcredits;
			
 
				-	else
			
 
				-		return peer_credits;
			
 
				-}
			
 
				-
			
 
				-int kiblnd_tunables_setup(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
			
 
				-
			
 
				-	/*
			
 
				-	 * if there was no tunables specified, setup the tunables to be
			
 
				-	 * defaulted
			
 
				-	 */
			
 
				-	if (!ni->ni_lnd_tunables) {
			
 
				-		ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
			
 
				-					      GFP_NOFS);
			
 
				-		if (!ni->ni_lnd_tunables)
			
 
				-			return -ENOMEM;
			
 
				-
			
 
				-		memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
			
 
				-		       &default_tunables, sizeof(*tunables));
			
 
				-	}
			
 
				-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
			
 
				-
			
 
				-	/* Current API version */
			
 
				-	tunables->lnd_version = 0;
			
 
				-
			
 
				-	if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
			
 
				-		CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
			
 
				-		       *kiblnd_tunables.kib_ib_mtu);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (!ni->ni_peertimeout)
			
 
				-		ni->ni_peertimeout = peer_timeout;
			
 
				-
			
 
				-	if (!ni->ni_maxtxcredits)
			
 
				-		ni->ni_maxtxcredits = credits;
			
 
				-
			
 
				-	if (!ni->ni_peertxcredits)
			
 
				-		ni->ni_peertxcredits = peer_credits;
			
 
				-
			
 
				-	if (!ni->ni_peerrtrcredits)
			
 
				-		ni->ni_peerrtrcredits = peer_buffer_credits;
			
 
				-
			
 
				-	if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
			
 
				-		ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
			
 
				-
			
 
				-	if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
			
 
				-		ni->ni_peertxcredits = IBLND_CREDITS_MAX;
			
 
				-
			
 
				-	if (ni->ni_peertxcredits > credits)
			
 
				-		ni->ni_peertxcredits = credits;
			
 
				-
			
 
				-	if (!tunables->lnd_peercredits_hiw)
			
 
				-		tunables->lnd_peercredits_hiw = peer_credits_hiw;
			
 
				-
			
 
				-	if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
			
 
				-		tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
			
 
				-
			
 
				-	if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
			
 
				-		tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
			
 
				-
			
 
				-	if (tunables->lnd_map_on_demand <= 0 ||
			
 
				-	    tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
			
 
				-		/* Use the default */
			
 
				-		CWARN("Invalid map_on_demand (%d), expects 1 - %d. Using default of %d\n",
			
 
				-		      tunables->lnd_map_on_demand,
			
 
				-		      IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND);
			
 
				-		tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
			
 
				-	}
			
 
				-
			
 
				-	if (tunables->lnd_map_on_demand == 1) {
			
 
				-		/* don't make sense to create map if only one fragment */
			
 
				-		tunables->lnd_map_on_demand = 2;
			
 
				-	}
			
 
				-
			
 
				-	if (!tunables->lnd_concurrent_sends) {
			
 
				-		if (tunables->lnd_map_on_demand > 0 &&
			
 
				-		    tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
			
 
				-			tunables->lnd_concurrent_sends =
			
 
				-						ni->ni_peertxcredits * 2;
			
 
				-		} else {
			
 
				-			tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
			
 
				-		tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
			
 
				-
			
 
				-	if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
			
 
				-		tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
			
 
				-
			
 
				-	if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
			
 
				-		CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n",
			
 
				-		      tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
			
 
				-	}
			
 
				-
			
 
				-	if (!tunables->lnd_fmr_pool_size)
			
 
				-		tunables->lnd_fmr_pool_size = fmr_pool_size;
			
 
				-	if (!tunables->lnd_fmr_flush_trigger)
			
 
				-		tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
			
 
				-	if (!tunables->lnd_fmr_cache)
			
 
				-		tunables->lnd_fmr_cache = fmr_cache;
			
 
				-	if (!tunables->lnd_conns_per_peer) {
			
 
				-		tunables->lnd_conns_per_peer = (conns_per_peer) ?
			
 
				-			conns_per_peer : 1;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void kiblnd_tunables_init(void)
			
 
				-{
			
 
				-	default_tunables.lnd_version = 0;
			
 
				-	default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
			
 
				-	default_tunables.lnd_map_on_demand = map_on_demand;
			
 
				-	default_tunables.lnd_concurrent_sends = concurrent_sends;
			
 
				-	default_tunables.lnd_fmr_pool_size = fmr_pool_size;
			
 
				-	default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
			
 
				-	default_tunables.lnd_fmr_cache = fmr_cache;
			
 
				-	default_tunables.lnd_conns_per_peer = conns_per_peer;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
@@ -1,6 +0,0 @@
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
			
 
				-
			
 
				-obj-$(CONFIG_LNET) += ksocklnd.o
			
 
				-
			
 
				-ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib.o
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -1,2921 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/klnds/socklnd/socklnd.c
			
 
				- *
			
 
				- * Author: Zach Brown <zab@zabbo.net>
			
 
				- * Author: Peter J. Braam <braam@clusterfs.com>
			
 
				- * Author: Phil Schwan <phil@clusterfs.com>
			
 
				- * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- */
			
 
				-
			
 
				-#include "socklnd.h"
			
 
				-
			
 
				-static struct lnet_lnd the_ksocklnd;
			
 
				-struct ksock_nal_data ksocknal_data;
			
 
				-
			
 
				-static struct ksock_interface *
			
 
				-ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
			
 
				-{
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	int i;
			
 
				-	struct ksock_interface *iface;
			
 
				-
			
 
				-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				-		LASSERT(i < LNET_MAX_INTERFACES);
			
 
				-		iface = &net->ksnn_interfaces[i];
			
 
				-
			
 
				-		if (iface->ksni_ipaddr == ip)
			
 
				-			return iface;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct ksock_route *
			
 
				-ksocknal_create_route(__u32 ipaddr, int port)
			
 
				-{
			
 
				-	struct ksock_route *route;
			
 
				-
			
 
				-	route = kzalloc(sizeof(*route), GFP_NOFS);
			
 
				-	if (!route)
			
 
				-		return NULL;
			
 
				-
			
 
				-	atomic_set(&route->ksnr_refcount, 1);
			
 
				-	route->ksnr_peer = NULL;
			
 
				-	route->ksnr_retry_interval = 0;	 /* OK to connect at any time */
			
 
				-	route->ksnr_ipaddr = ipaddr;
			
 
				-	route->ksnr_port = port;
			
 
				-	route->ksnr_scheduled = 0;
			
 
				-	route->ksnr_connecting = 0;
			
 
				-	route->ksnr_connected = 0;
			
 
				-	route->ksnr_deleted = 0;
			
 
				-	route->ksnr_conn_count = 0;
			
 
				-	route->ksnr_share_count = 0;
			
 
				-
			
 
				-	return route;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_destroy_route(struct ksock_route *route)
			
 
				-{
			
 
				-	LASSERT(!atomic_read(&route->ksnr_refcount));
			
 
				-
			
 
				-	if (route->ksnr_peer)
			
 
				-		ksocknal_peer_decref(route->ksnr_peer);
			
 
				-
			
 
				-	kfree(route);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni,
			
 
				-		     struct lnet_process_id id)
			
 
				-{
			
 
				-	int cpt = lnet_cpt_of_nid(id.nid);
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	struct ksock_peer *peer;
			
 
				-
			
 
				-	LASSERT(id.nid != LNET_NID_ANY);
			
 
				-	LASSERT(id.pid != LNET_PID_ANY);
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
			
 
				-	if (!peer)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	peer->ksnp_ni = ni;
			
 
				-	peer->ksnp_id = id;
			
 
				-	atomic_set(&peer->ksnp_refcount, 1);   /* 1 ref for caller */
			
 
				-	peer->ksnp_closing = 0;
			
 
				-	peer->ksnp_accepting = 0;
			
 
				-	peer->ksnp_proto = NULL;
			
 
				-	peer->ksnp_last_alive = 0;
			
 
				-	peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&peer->ksnp_conns);
			
 
				-	INIT_LIST_HEAD(&peer->ksnp_routes);
			
 
				-	INIT_LIST_HEAD(&peer->ksnp_tx_queue);
			
 
				-	INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
			
 
				-	spin_lock_init(&peer->ksnp_lock);
			
 
				-
			
 
				-	spin_lock_bh(&net->ksnn_lock);
			
 
				-
			
 
				-	if (net->ksnn_shutdown) {
			
 
				-		spin_unlock_bh(&net->ksnn_lock);
			
 
				-
			
 
				-		kfree(peer);
			
 
				-		CERROR("Can't create peer: network shutdown\n");
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	net->ksnn_npeers++;
			
 
				-
			
 
				-	spin_unlock_bh(&net->ksnn_lock);
			
 
				-
			
 
				-	*peerp = peer;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_destroy_peer(struct ksock_peer *peer)
			
 
				-{
			
 
				-	struct ksock_net *net = peer->ksnp_ni->ni_data;
			
 
				-
			
 
				-	CDEBUG(D_NET, "peer %s %p deleted\n",
			
 
				-	       libcfs_id2str(peer->ksnp_id), peer);
			
 
				-
			
 
				-	LASSERT(!atomic_read(&peer->ksnp_refcount));
			
 
				-	LASSERT(!peer->ksnp_accepting);
			
 
				-	LASSERT(list_empty(&peer->ksnp_conns));
			
 
				-	LASSERT(list_empty(&peer->ksnp_routes));
			
 
				-	LASSERT(list_empty(&peer->ksnp_tx_queue));
			
 
				-	LASSERT(list_empty(&peer->ksnp_zc_req_list));
			
 
				-
			
 
				-	kfree(peer);
			
 
				-
			
 
				-	/*
			
 
				-	 * NB a peer's connections and routes keep a reference on their peer
			
 
				-	 * until they are destroyed, so we can be assured that _all_ state to
			
 
				-	 * do with this peer has been cleaned up when its refcount drops to
			
 
				-	 * zero.
			
 
				-	 */
			
 
				-	spin_lock_bh(&net->ksnn_lock);
			
 
				-	net->ksnn_npeers--;
			
 
				-	spin_unlock_bh(&net->ksnn_lock);
			
 
				-}
			
 
				-
			
 
				-struct ksock_peer *
			
 
				-ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
			
 
				-{
			
 
				-	struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
			
 
				-	struct ksock_peer *peer;
			
 
				-
			
 
				-	list_for_each_entry(peer, peer_list, ksnp_list) {
			
 
				-		LASSERT(!peer->ksnp_closing);
			
 
				-
			
 
				-		if (peer->ksnp_ni != ni)
			
 
				-			continue;
			
 
				-
			
 
				-		if (peer->ksnp_id.nid != id.nid ||
			
 
				-		    peer->ksnp_id.pid != id.pid)
			
 
				-			continue;
			
 
				-
			
 
				-		CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
			
 
				-		       peer, libcfs_id2str(id),
			
 
				-		       atomic_read(&peer->ksnp_refcount));
			
 
				-		return peer;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-struct ksock_peer *
			
 
				-ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
			
 
				-{
			
 
				-	struct ksock_peer *peer;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-	peer = ksocknal_find_peer_locked(ni, id);
			
 
				-	if (peer)			/* +1 ref for caller? */
			
 
				-		ksocknal_peer_addref(peer);
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return peer;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_unlink_peer_locked(struct ksock_peer *peer)
			
 
				-{
			
 
				-	int i;
			
 
				-	__u32 ip;
			
 
				-	struct ksock_interface *iface;
			
 
				-
			
 
				-	for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
			
 
				-		LASSERT(i < LNET_MAX_INTERFACES);
			
 
				-		ip = peer->ksnp_passive_ips[i];
			
 
				-
			
 
				-		iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
			
 
				-		/*
			
 
				-		 * All IPs in peer->ksnp_passive_ips[] come from the
			
 
				-		 * interface list, therefore the call must succeed.
			
 
				-		 */
			
 
				-		LASSERT(iface);
			
 
				-
			
 
				-		CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
			
 
				-		       peer, iface, iface->ksni_nroutes);
			
 
				-		iface->ksni_npeers--;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(list_empty(&peer->ksnp_conns));
			
 
				-	LASSERT(list_empty(&peer->ksnp_routes));
			
 
				-	LASSERT(!peer->ksnp_closing);
			
 
				-	peer->ksnp_closing = 1;
			
 
				-	list_del(&peer->ksnp_list);
			
 
				-	/* lose peerlist's ref */
			
 
				-	ksocknal_peer_decref(peer);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_get_peer_info(struct lnet_ni *ni, int index,
			
 
				-		       struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
			
 
				-		       int *port, int *conn_count, int *share_count)
			
 
				-{
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct list_head *ptmp;
			
 
				-	struct ksock_route *route;
			
 
				-	struct list_head *rtmp;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				-		list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
			
 
				-
			
 
				-			if (peer->ksnp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			if (!peer->ksnp_n_passive_ips &&
			
 
				-			    list_empty(&peer->ksnp_routes)) {
			
 
				-				if (index-- > 0)
			
 
				-					continue;
			
 
				-
			
 
				-				*id = peer->ksnp_id;
			
 
				-				*myip = 0;
			
 
				-				*peer_ip = 0;
			
 
				-				*port = 0;
			
 
				-				*conn_count = 0;
			
 
				-				*share_count = 0;
			
 
				-				rc = 0;
			
 
				-				goto out;
			
 
				-			}
			
 
				-
			
 
				-			for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
			
 
				-				if (index-- > 0)
			
 
				-					continue;
			
 
				-
			
 
				-				*id = peer->ksnp_id;
			
 
				-				*myip = peer->ksnp_passive_ips[j];
			
 
				-				*peer_ip = 0;
			
 
				-				*port = 0;
			
 
				-				*conn_count = 0;
			
 
				-				*share_count = 0;
			
 
				-				rc = 0;
			
 
				-				goto out;
			
 
				-			}
			
 
				-
			
 
				-			list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				-				if (index-- > 0)
			
 
				-					continue;
			
 
				-
			
 
				-				route = list_entry(rtmp, struct ksock_route,
			
 
				-						   ksnr_list);
			
 
				-
			
 
				-				*id = peer->ksnp_id;
			
 
				-				*myip = route->ksnr_myipaddr;
			
 
				-				*peer_ip = route->ksnr_ipaddr;
			
 
				-				*port = route->ksnr_port;
			
 
				-				*conn_count = route->ksnr_conn_count;
			
 
				-				*share_count = route->ksnr_share_count;
			
 
				-				rc = 0;
			
 
				-				goto out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				- out:
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_associate_route_conn_locked(struct ksock_route *route,
			
 
				-				     struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct ksock_peer *peer = route->ksnr_peer;
			
 
				-	int type = conn->ksnc_type;
			
 
				-	struct ksock_interface *iface;
			
 
				-
			
 
				-	conn->ksnc_route = route;
			
 
				-	ksocknal_route_addref(route);
			
 
				-
			
 
				-	if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
			
 
				-		if (!route->ksnr_myipaddr) {
			
 
				-			/* route wasn't bound locally yet (the initial route) */
			
 
				-			CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
			
 
				-			       libcfs_id2str(peer->ksnp_id),
			
 
				-			       &route->ksnr_ipaddr,
			
 
				-			       &conn->ksnc_myipaddr);
			
 
				-		} else {
			
 
				-			CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
			
 
				-			       libcfs_id2str(peer->ksnp_id),
			
 
				-			       &route->ksnr_ipaddr,
			
 
				-			       &route->ksnr_myipaddr,
			
 
				-			       &conn->ksnc_myipaddr);
			
 
				-
			
 
				-			iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
			
 
				-						  route->ksnr_myipaddr);
			
 
				-			if (iface)
			
 
				-				iface->ksni_nroutes--;
			
 
				-		}
			
 
				-		route->ksnr_myipaddr = conn->ksnc_myipaddr;
			
 
				-		iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
			
 
				-					  route->ksnr_myipaddr);
			
 
				-		if (iface)
			
 
				-			iface->ksni_nroutes++;
			
 
				-	}
			
 
				-
			
 
				-	route->ksnr_connected |= (1 << type);
			
 
				-	route->ksnr_conn_count++;
			
 
				-
			
 
				-	/*
			
 
				-	 * Successful connection => further attempts can
			
 
				-	 * proceed immediately
			
 
				-	 */
			
 
				-	route->ksnr_retry_interval = 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_route *route2;
			
 
				-
			
 
				-	LASSERT(!peer->ksnp_closing);
			
 
				-	LASSERT(!route->ksnr_peer);
			
 
				-	LASSERT(!route->ksnr_scheduled);
			
 
				-	LASSERT(!route->ksnr_connecting);
			
 
				-	LASSERT(!route->ksnr_connected);
			
 
				-
			
 
				-	/* LASSERT(unique) */
			
 
				-	list_for_each(tmp, &peer->ksnp_routes) {
			
 
				-		route2 = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-		if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
			
 
				-			CERROR("Duplicate route %s %pI4h\n",
			
 
				-			       libcfs_id2str(peer->ksnp_id),
			
 
				-			       &route->ksnr_ipaddr);
			
 
				-			LBUG();
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	route->ksnr_peer = peer;
			
 
				-	ksocknal_peer_addref(peer);
			
 
				-	/* peer's routelist takes over my ref on 'route' */
			
 
				-	list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
			
 
				-
			
 
				-	list_for_each(tmp, &peer->ksnp_conns) {
			
 
				-		conn = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-		if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
			
 
				-			continue;
			
 
				-
			
 
				-		ksocknal_associate_route_conn_locked(route, conn);
			
 
				-		/* keep going (typed routes) */
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_del_route_locked(struct ksock_route *route)
			
 
				-{
			
 
				-	struct ksock_peer *peer = route->ksnr_peer;
			
 
				-	struct ksock_interface *iface;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-	struct list_head *cnxt;
			
 
				-
			
 
				-	LASSERT(!route->ksnr_deleted);
			
 
				-
			
 
				-	/* Close associated conns */
			
 
				-	list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
			
 
				-		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-		if (conn->ksnc_route != route)
			
 
				-			continue;
			
 
				-
			
 
				-		ksocknal_close_conn_locked(conn, 0);
			
 
				-	}
			
 
				-
			
 
				-	if (route->ksnr_myipaddr) {
			
 
				-		iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
			
 
				-					  route->ksnr_myipaddr);
			
 
				-		if (iface)
			
 
				-			iface->ksni_nroutes--;
			
 
				-	}
			
 
				-
			
 
				-	route->ksnr_deleted = 1;
			
 
				-	list_del(&route->ksnr_list);
			
 
				-	ksocknal_route_decref(route);	     /* drop peer's ref */
			
 
				-
			
 
				-	if (list_empty(&peer->ksnp_routes) &&
			
 
				-	    list_empty(&peer->ksnp_conns)) {
			
 
				-		/*
			
 
				-		 * I've just removed the last route to a peer with no active
			
 
				-		 * connections
			
 
				-		 */
			
 
				-		ksocknal_unlink_peer_locked(peer);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
			
 
				-		  int port)
			
 
				-{
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct ksock_peer *peer2;
			
 
				-	struct ksock_route *route;
			
 
				-	struct ksock_route *route2;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (id.nid == LNET_NID_ANY ||
			
 
				-	    id.pid == LNET_PID_ANY)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* Have a brand new peer ready... */
			
 
				-	rc = ksocknal_create_peer(&peer, ni, id);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	route = ksocknal_create_route(ipaddr, port);
			
 
				-	if (!route) {
			
 
				-		ksocknal_peer_decref(peer);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	/* always called with a ref on ni, so shutdown can't have started */
			
 
				-	LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
			
 
				-
			
 
				-	peer2 = ksocknal_find_peer_locked(ni, id);
			
 
				-	if (peer2) {
			
 
				-		ksocknal_peer_decref(peer);
			
 
				-		peer = peer2;
			
 
				-	} else {
			
 
				-		/* peer table takes my ref on peer */
			
 
				-		list_add_tail(&peer->ksnp_list,
			
 
				-			      ksocknal_nid2peerlist(id.nid));
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
			
 
				-		if (route2->ksnr_ipaddr == ipaddr) {
			
 
				-			/* Route already exists, use the old one */
			
 
				-			ksocknal_route_decref(route);
			
 
				-			route2->ksnr_share_count++;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-	/* Route doesn't already exist, add the new one */
			
 
				-	ksocknal_add_route_locked(peer, route);
			
 
				-	route->ksnr_share_count++;
			
 
				-out:
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
			
 
				-{
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_route *route;
			
 
				-	struct list_head *tmp;
			
 
				-	struct list_head *nxt;
			
 
				-	int nshared;
			
 
				-
			
 
				-	LASSERT(!peer->ksnp_closing);
			
 
				-
			
 
				-	/* Extra ref prevents peer disappearing until I'm done with it */
			
 
				-	ksocknal_peer_addref(peer);
			
 
				-
			
 
				-	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
			
 
				-		route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-		/* no match */
			
 
				-		if (!(!ip || route->ksnr_ipaddr == ip))
			
 
				-			continue;
			
 
				-
			
 
				-		route->ksnr_share_count = 0;
			
 
				-		/* This deletes associated conns too */
			
 
				-		ksocknal_del_route_locked(route);
			
 
				-	}
			
 
				-
			
 
				-	nshared = 0;
			
 
				-	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
			
 
				-		route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-		nshared += route->ksnr_share_count;
			
 
				-	}
			
 
				-
			
 
				-	if (!nshared) {
			
 
				-		/*
			
 
				-		 * remove everything else if there are no explicit entries
			
 
				-		 * left
			
 
				-		 */
			
 
				-		list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
			
 
				-			route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-			/* we should only be removing auto-entries */
			
 
				-			LASSERT(!route->ksnr_share_count);
			
 
				-			ksocknal_del_route_locked(route);
			
 
				-		}
			
 
				-
			
 
				-		list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
			
 
				-			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-			ksocknal_close_conn_locked(conn, 0);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_peer_decref(peer);
			
 
				-	/* NB peer unlinks itself when last conn/route is removed */
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct list_head *ptmp;
			
 
				-	struct list_head *pnxt;
			
 
				-	struct ksock_peer *peer;
			
 
				-	int lo;
			
 
				-	int hi;
			
 
				-	int i;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	if (id.nid != LNET_NID_ANY) {
			
 
				-		lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
			
 
				-		hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
			
 
				-	} else {
			
 
				-		lo = 0;
			
 
				-		hi = ksocknal_data.ksnd_peer_hash_size - 1;
			
 
				-	}
			
 
				-
			
 
				-	for (i = lo; i <= hi; i++) {
			
 
				-		list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
			
 
				-
			
 
				-			if (peer->ksnp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
			
 
				-			      (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
			
 
				-				continue;
			
 
				-
			
 
				-			ksocknal_peer_addref(peer);     /* a ref for me... */
			
 
				-
			
 
				-			ksocknal_del_peer_locked(peer, ip);
			
 
				-
			
 
				-			if (peer->ksnp_closing &&
			
 
				-			    !list_empty(&peer->ksnp_tx_queue)) {
			
 
				-				LASSERT(list_empty(&peer->ksnp_conns));
			
 
				-				LASSERT(list_empty(&peer->ksnp_routes));
			
 
				-
			
 
				-				list_splice_init(&peer->ksnp_tx_queue,
			
 
				-						 &zombies);
			
 
				-			}
			
 
				-
			
 
				-			ksocknal_peer_decref(peer);     /* ...till here */
			
 
				-
			
 
				-			rc = 0;		 /* matched! */
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	ksocknal_txlist_done(ni, &zombies, 1);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static struct ksock_conn *
			
 
				-ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
			
 
				-{
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct list_head *ptmp;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-	int i;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				-		list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
			
 
				-
			
 
				-			LASSERT(!peer->ksnp_closing);
			
 
				-
			
 
				-			if (peer->ksnp_ni != ni)
			
 
				-				continue;
			
 
				-
			
 
				-			list_for_each(ctmp, &peer->ksnp_conns) {
			
 
				-				if (index-- > 0)
			
 
				-					continue;
			
 
				-
			
 
				-				conn = list_entry(ctmp, struct ksock_conn,
			
 
				-						  ksnc_list);
			
 
				-				ksocknal_conn_addref(conn);
			
 
				-				read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-				return conn;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct ksock_sched *
			
 
				-ksocknal_choose_scheduler_locked(unsigned int cpt)
			
 
				-{
			
 
				-	struct ksock_sched_info	*info = ksocknal_data.ksnd_sched_info[cpt];
			
 
				-	struct ksock_sched *sched;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(info->ksi_nthreads > 0);
			
 
				-
			
 
				-	sched = &info->ksi_scheds[0];
			
 
				-	/*
			
 
				-	 * NB: it's safe so far, but info->ksi_nthreads could be changed
			
 
				-	 * at runtime when we have dynamic LNet configuration, then we
			
 
				-	 * need to take care of this.
			
 
				-	 */
			
 
				-	for (i = 1; i < info->ksi_nthreads; i++) {
			
 
				-		if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
			
 
				-			sched = &info->ksi_scheds[i];
			
 
				-	}
			
 
				-
			
 
				-	return sched;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
			
 
				-{
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	int i;
			
 
				-	int nip;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	nip = net->ksnn_ninterfaces;
			
 
				-	LASSERT(nip <= LNET_MAX_INTERFACES);
			
 
				-
			
 
				-	/*
			
 
				-	 * Only offer interfaces for additional connections if I have
			
 
				-	 * more than one.
			
 
				-	 */
			
 
				-	if (nip < 2) {
			
 
				-		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < nip; i++) {
			
 
				-		ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
			
 
				-		LASSERT(ipaddrs[i]);
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-	return nip;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
			
 
				-{
			
 
				-	int best_netmatch = 0;
			
 
				-	int best_xor      = 0;
			
 
				-	int best	  = -1;
			
 
				-	int this_xor;
			
 
				-	int this_netmatch;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < nips; i++) {
			
 
				-		if (!ips[i])
			
 
				-			continue;
			
 
				-
			
 
				-		this_xor = ips[i] ^ iface->ksni_ipaddr;
			
 
				-		this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
			
 
				-
			
 
				-		if (!(best < 0 ||
			
 
				-		      best_netmatch < this_netmatch ||
			
 
				-		      (best_netmatch == this_netmatch &&
			
 
				-		       best_xor > this_xor)))
			
 
				-			continue;
			
 
				-
			
 
				-		best = i;
			
 
				-		best_netmatch = this_netmatch;
			
 
				-		best_xor = this_xor;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(best >= 0);
			
 
				-	return best;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
			
 
				-{
			
 
				-	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
			
 
				-	struct ksock_net *net = peer->ksnp_ni->ni_data;
			
 
				-	struct ksock_interface *iface;
			
 
				-	struct ksock_interface *best_iface;
			
 
				-	int n_ips;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	int k;
			
 
				-	__u32 ip;
			
 
				-	__u32 xor;
			
 
				-	int this_netmatch;
			
 
				-	int best_netmatch;
			
 
				-	int best_npeers;
			
 
				-
			
 
				-	/*
			
 
				-	 * CAVEAT EMPTOR: We do all our interface matching with an
			
 
				-	 * exclusive hold of global lock at IRQ priority.  We're only
			
 
				-	 * expecting to be dealing with small numbers of interfaces, so the
			
 
				-	 * O(n**3)-ness shouldn't matter
			
 
				-	 */
			
 
				-	/*
			
 
				-	 * Also note that I'm not going to return more than n_peerips
			
 
				-	 * interfaces, even if I have more myself
			
 
				-	 */
			
 
				-	write_lock_bh(global_lock);
			
 
				-
			
 
				-	LASSERT(n_peerips <= LNET_MAX_INTERFACES);
			
 
				-	LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
			
 
				-
			
 
				-	/*
			
 
				-	 * Only match interfaces for additional connections
			
 
				-	 * if I have > 1 interface
			
 
				-	 */
			
 
				-	n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
			
 
				-		min(n_peerips, net->ksnn_ninterfaces);
			
 
				-
			
 
				-	for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
			
 
				-		/*	      ^ yes really... */
			
 
				-
			
 
				-		/*
			
 
				-		 * If we have any new interfaces, first tick off all the
			
 
				-		 * peer IPs that match old interfaces, then choose new
			
 
				-		 * interfaces to match the remaining peer IPS.
			
 
				-		 * We don't forget interfaces we've stopped using; we might
			
 
				-		 * start using them again...
			
 
				-		 */
			
 
				-		if (i < peer->ksnp_n_passive_ips) {
			
 
				-			/* Old interface. */
			
 
				-			ip = peer->ksnp_passive_ips[i];
			
 
				-			best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
			
 
				-
			
 
				-			/* peer passive ips are kept up to date */
			
 
				-			LASSERT(best_iface);
			
 
				-		} else {
			
 
				-			/* choose a new interface */
			
 
				-			LASSERT(i == peer->ksnp_n_passive_ips);
			
 
				-
			
 
				-			best_iface = NULL;
			
 
				-			best_netmatch = 0;
			
 
				-			best_npeers = 0;
			
 
				-
			
 
				-			for (j = 0; j < net->ksnn_ninterfaces; j++) {
			
 
				-				iface = &net->ksnn_interfaces[j];
			
 
				-				ip = iface->ksni_ipaddr;
			
 
				-
			
 
				-				for (k = 0; k < peer->ksnp_n_passive_ips; k++)
			
 
				-					if (peer->ksnp_passive_ips[k] == ip)
			
 
				-						break;
			
 
				-
			
 
				-				if (k < peer->ksnp_n_passive_ips) /* using it already */
			
 
				-					continue;
			
 
				-
			
 
				-				k = ksocknal_match_peerip(iface, peerips,
			
 
				-							  n_peerips);
			
 
				-				xor = ip ^ peerips[k];
			
 
				-				this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
			
 
				-
			
 
				-				if (!(!best_iface ||
			
 
				-				      best_netmatch < this_netmatch ||
			
 
				-				      (best_netmatch == this_netmatch &&
			
 
				-				       best_npeers > iface->ksni_npeers)))
			
 
				-					continue;
			
 
				-
			
 
				-				best_iface = iface;
			
 
				-				best_netmatch = this_netmatch;
			
 
				-				best_npeers = iface->ksni_npeers;
			
 
				-			}
			
 
				-
			
 
				-			LASSERT(best_iface);
			
 
				-
			
 
				-			best_iface->ksni_npeers++;
			
 
				-			ip = best_iface->ksni_ipaddr;
			
 
				-			peer->ksnp_passive_ips[i] = ip;
			
 
				-			peer->ksnp_n_passive_ips = i + 1;
			
 
				-		}
			
 
				-
			
 
				-		/* mark the best matching peer IP used */
			
 
				-		j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
			
 
				-		peerips[j] = 0;
			
 
				-	}
			
 
				-
			
 
				-	/* Overwrite input peer IP addresses */
			
 
				-	memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
			
 
				-
			
 
				-	write_unlock_bh(global_lock);
			
 
				-
			
 
				-	return n_ips;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_create_routes(struct ksock_peer *peer, int port,
			
 
				-		       __u32 *peer_ipaddrs, int npeer_ipaddrs)
			
 
				-{
			
 
				-	struct ksock_route *newroute = NULL;
			
 
				-	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
			
 
				-	struct lnet_ni *ni = peer->ksnp_ni;
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	struct list_head *rtmp;
			
 
				-	struct ksock_route *route;
			
 
				-	struct ksock_interface *iface;
			
 
				-	struct ksock_interface *best_iface;
			
 
				-	int best_netmatch;
			
 
				-	int this_netmatch;
			
 
				-	int best_nroutes;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	/*
			
 
				-	 * CAVEAT EMPTOR: We do all our interface matching with an
			
 
				-	 * exclusive hold of global lock at IRQ priority.  We're only
			
 
				-	 * expecting to be dealing with small numbers of interfaces, so the
			
 
				-	 * O(n**3)-ness here shouldn't matter
			
 
				-	 */
			
 
				-	write_lock_bh(global_lock);
			
 
				-
			
 
				-	if (net->ksnn_ninterfaces < 2) {
			
 
				-		/*
			
 
				-		 * Only create additional connections
			
 
				-		 * if I have > 1 interface
			
 
				-		 */
			
 
				-		write_unlock_bh(global_lock);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
			
 
				-
			
 
				-	for (i = 0; i < npeer_ipaddrs; i++) {
			
 
				-		if (newroute) {
			
 
				-			newroute->ksnr_ipaddr = peer_ipaddrs[i];
			
 
				-		} else {
			
 
				-			write_unlock_bh(global_lock);
			
 
				-
			
 
				-			newroute = ksocknal_create_route(peer_ipaddrs[i], port);
			
 
				-			if (!newroute)
			
 
				-				return;
			
 
				-
			
 
				-			write_lock_bh(global_lock);
			
 
				-		}
			
 
				-
			
 
				-		if (peer->ksnp_closing) {
			
 
				-			/* peer got closed under me */
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* Already got a route? */
			
 
				-		route = NULL;
			
 
				-		list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				-			route = list_entry(rtmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-			if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
			
 
				-				break;
			
 
				-
			
 
				-			route = NULL;
			
 
				-		}
			
 
				-		if (route)
			
 
				-			continue;
			
 
				-
			
 
				-		best_iface = NULL;
			
 
				-		best_nroutes = 0;
			
 
				-		best_netmatch = 0;
			
 
				-
			
 
				-		LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
			
 
				-
			
 
				-		/* Select interface to connect from */
			
 
				-		for (j = 0; j < net->ksnn_ninterfaces; j++) {
			
 
				-			iface = &net->ksnn_interfaces[j];
			
 
				-
			
 
				-			/* Using this interface already? */
			
 
				-			list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				-				route = list_entry(rtmp, struct ksock_route,
			
 
				-						   ksnr_list);
			
 
				-
			
 
				-				if (route->ksnr_myipaddr == iface->ksni_ipaddr)
			
 
				-					break;
			
 
				-
			
 
				-				route = NULL;
			
 
				-			}
			
 
				-			if (route)
			
 
				-				continue;
			
 
				-
			
 
				-			this_netmatch = (!((iface->ksni_ipaddr ^
			
 
				-					   newroute->ksnr_ipaddr) &
			
 
				-					   iface->ksni_netmask)) ? 1 : 0;
			
 
				-
			
 
				-			if (!(!best_iface ||
			
 
				-			      best_netmatch < this_netmatch ||
			
 
				-			      (best_netmatch == this_netmatch &&
			
 
				-			       best_nroutes > iface->ksni_nroutes)))
			
 
				-				continue;
			
 
				-
			
 
				-			best_iface = iface;
			
 
				-			best_netmatch = this_netmatch;
			
 
				-			best_nroutes = iface->ksni_nroutes;
			
 
				-		}
			
 
				-
			
 
				-		if (!best_iface)
			
 
				-			continue;
			
 
				-
			
 
				-		newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
			
 
				-		best_iface->ksni_nroutes++;
			
 
				-
			
 
				-		ksocknal_add_route_locked(peer, newroute);
			
 
				-		newroute = NULL;
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(global_lock);
			
 
				-	if (newroute)
			
 
				-		ksocknal_route_decref(newroute);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
			
 
				-{
			
 
				-	struct ksock_connreq *cr;
			
 
				-	int rc;
			
 
				-	__u32 peer_ip;
			
 
				-	int peer_port;
			
 
				-
			
 
				-	rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
			
 
				-	LASSERT(!rc);		      /* we succeeded before */
			
 
				-
			
 
				-	cr = kzalloc(sizeof(*cr), GFP_NOFS);
			
 
				-	if (!cr) {
			
 
				-		LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
			
 
				-				   &peer_ip);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	lnet_ni_addref(ni);
			
 
				-	cr->ksncr_ni   = ni;
			
 
				-	cr->ksncr_sock = sock;
			
 
				-
			
 
				-	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-
			
 
				-	list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
			
 
				-	wake_up(&ksocknal_data.ksnd_connd_waitq);
			
 
				-
			
 
				-	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
			
 
				-{
			
 
				-	struct ksock_route *route;
			
 
				-
			
 
				-	list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
			
 
				-		if (route->ksnr_ipaddr == ipaddr)
			
 
				-			return route->ksnr_connecting;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
			
 
				-		     struct socket *sock, int type)
			
 
				-{
			
 
				-	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct lnet_process_id peerid;
			
 
				-	struct list_head *tmp;
			
 
				-	__u64 incarnation;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_conn *conn2;
			
 
				-	struct ksock_peer *peer = NULL;
			
 
				-	struct ksock_peer *peer2;
			
 
				-	struct ksock_sched *sched;
			
 
				-	struct ksock_hello_msg *hello;
			
 
				-	int cpt;
			
 
				-	struct ksock_tx *tx;
			
 
				-	struct ksock_tx *txtmp;
			
 
				-	int rc;
			
 
				-	int active;
			
 
				-	char *warn = NULL;
			
 
				-
			
 
				-	active = !!route;
			
 
				-
			
 
				-	LASSERT(active == (type != SOCKLND_CONN_NONE));
			
 
				-
			
 
				-	conn = kzalloc(sizeof(*conn), GFP_NOFS);
			
 
				-	if (!conn) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed_0;
			
 
				-	}
			
 
				-
			
 
				-	conn->ksnc_peer = NULL;
			
 
				-	conn->ksnc_route = NULL;
			
 
				-	conn->ksnc_sock = sock;
			
 
				-	/*
			
 
				-	 * 2 ref, 1 for conn, another extra ref prevents socket
			
 
				-	 * being closed before establishment of connection
			
 
				-	 */
			
 
				-	atomic_set(&conn->ksnc_sock_refcount, 2);
			
 
				-	conn->ksnc_type = type;
			
 
				-	ksocknal_lib_save_callback(sock, conn);
			
 
				-	atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
			
 
				-
			
 
				-	conn->ksnc_rx_ready = 0;
			
 
				-	conn->ksnc_rx_scheduled = 0;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&conn->ksnc_tx_queue);
			
 
				-	conn->ksnc_tx_ready = 0;
			
 
				-	conn->ksnc_tx_scheduled = 0;
			
 
				-	conn->ksnc_tx_carrier = NULL;
			
 
				-	atomic_set(&conn->ksnc_tx_nob, 0);
			
 
				-
			
 
				-	hello = kvzalloc(offsetof(struct ksock_hello_msg,
			
 
				-				  kshm_ips[LNET_MAX_INTERFACES]),
			
 
				-			 GFP_KERNEL);
			
 
				-	if (!hello) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed_1;
			
 
				-	}
			
 
				-
			
 
				-	/* stash conn's local and remote addrs */
			
 
				-	rc = ksocknal_lib_get_conn_addrs(conn);
			
 
				-	if (rc)
			
 
				-		goto failed_1;
			
 
				-
			
 
				-	/*
			
 
				-	 * Find out/confirm peer's NID and connection type and get the
			
 
				-	 * vector of interfaces she's willing to let me connect to.
			
 
				-	 * Passive connections use the listener timeout since the peer sends
			
 
				-	 * eagerly
			
 
				-	 */
			
 
				-	if (active) {
			
 
				-		peer = route->ksnr_peer;
			
 
				-		LASSERT(ni == peer->ksnp_ni);
			
 
				-
			
 
				-		/* Active connection sends HELLO eagerly */
			
 
				-		hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
			
 
				-		peerid = peer->ksnp_id;
			
 
				-
			
 
				-		write_lock_bh(global_lock);
			
 
				-		conn->ksnc_proto = peer->ksnp_proto;
			
 
				-		write_unlock_bh(global_lock);
			
 
				-
			
 
				-		if (!conn->ksnc_proto) {
			
 
				-			conn->ksnc_proto = &ksocknal_protocol_v3x;
			
 
				-#if SOCKNAL_VERSION_DEBUG
			
 
				-			if (*ksocknal_tunables.ksnd_protocol == 2)
			
 
				-				conn->ksnc_proto = &ksocknal_protocol_v2x;
			
 
				-			else if (*ksocknal_tunables.ksnd_protocol == 1)
			
 
				-				conn->ksnc_proto = &ksocknal_protocol_v1x;
			
 
				-#endif
			
 
				-		}
			
 
				-
			
 
				-		rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
			
 
				-		if (rc)
			
 
				-			goto failed_1;
			
 
				-	} else {
			
 
				-		peerid.nid = LNET_NID_ANY;
			
 
				-		peerid.pid = LNET_PID_ANY;
			
 
				-
			
 
				-		/* Passive, get protocol from peer */
			
 
				-		conn->ksnc_proto = NULL;
			
 
				-	}
			
 
				-
			
 
				-	rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
			
 
				-	if (rc < 0)
			
 
				-		goto failed_1;
			
 
				-
			
 
				-	LASSERT(!rc || active);
			
 
				-	LASSERT(conn->ksnc_proto);
			
 
				-	LASSERT(peerid.nid != LNET_NID_ANY);
			
 
				-
			
 
				-	cpt = lnet_cpt_of_nid(peerid.nid);
			
 
				-
			
 
				-	if (active) {
			
 
				-		ksocknal_peer_addref(peer);
			
 
				-		write_lock_bh(global_lock);
			
 
				-	} else {
			
 
				-		rc = ksocknal_create_peer(&peer, ni, peerid);
			
 
				-		if (rc)
			
 
				-			goto failed_1;
			
 
				-
			
 
				-		write_lock_bh(global_lock);
			
 
				-
			
 
				-		/* called with a ref on ni, so shutdown can't have started */
			
 
				-		LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
			
 
				-
			
 
				-		peer2 = ksocknal_find_peer_locked(ni, peerid);
			
 
				-		if (!peer2) {
			
 
				-			/*
			
 
				-			 * NB this puts an "empty" peer in the peer
			
 
				-			 * table (which takes my ref)
			
 
				-			 */
			
 
				-			list_add_tail(&peer->ksnp_list,
			
 
				-				      ksocknal_nid2peerlist(peerid.nid));
			
 
				-		} else {
			
 
				-			ksocknal_peer_decref(peer);
			
 
				-			peer = peer2;
			
 
				-		}
			
 
				-
			
 
				-		/* +1 ref for me */
			
 
				-		ksocknal_peer_addref(peer);
			
 
				-		peer->ksnp_accepting++;
			
 
				-
			
 
				-		/*
			
 
				-		 * Am I already connecting to this guy?  Resolve in
			
 
				-		 * favour of higher NID...
			
 
				-		 */
			
 
				-		if (peerid.nid < ni->ni_nid &&
			
 
				-		    ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
			
 
				-			rc = EALREADY;
			
 
				-			warn = "connection race resolution";
			
 
				-			goto failed_2;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (peer->ksnp_closing ||
			
 
				-	    (active && route->ksnr_deleted)) {
			
 
				-		/* peer/route got closed under me */
			
 
				-		rc = -ESTALE;
			
 
				-		warn = "peer/route removed";
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	if (!peer->ksnp_proto) {
			
 
				-		/*
			
 
				-		 * Never connected before.
			
 
				-		 * NB recv_hello may have returned EPROTO to signal my peer
			
 
				-		 * wants a different protocol than the one I asked for.
			
 
				-		 */
			
 
				-		LASSERT(list_empty(&peer->ksnp_conns));
			
 
				-
			
 
				-		peer->ksnp_proto = conn->ksnc_proto;
			
 
				-		peer->ksnp_incarnation = incarnation;
			
 
				-	}
			
 
				-
			
 
				-	if (peer->ksnp_proto != conn->ksnc_proto ||
			
 
				-	    peer->ksnp_incarnation != incarnation) {
			
 
				-		/* Peer rebooted or I've got the wrong protocol version */
			
 
				-		ksocknal_close_peer_conns_locked(peer, 0, 0);
			
 
				-
			
 
				-		peer->ksnp_proto = NULL;
			
 
				-		rc = ESTALE;
			
 
				-		warn = peer->ksnp_incarnation != incarnation ?
			
 
				-		       "peer rebooted" :
			
 
				-		       "wrong proto version";
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	switch (rc) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case 0:
			
 
				-		break;
			
 
				-	case EALREADY:
			
 
				-		warn = "lost conn race";
			
 
				-		goto failed_2;
			
 
				-	case EPROTO:
			
 
				-		warn = "retry with different protocol version";
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Refuse to duplicate an existing connection, unless this is a
			
 
				-	 * loopback connection
			
 
				-	 */
			
 
				-	if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
			
 
				-		list_for_each(tmp, &peer->ksnp_conns) {
			
 
				-			conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-			if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
			
 
				-			    conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
			
 
				-			    conn2->ksnc_type != conn->ksnc_type)
			
 
				-				continue;
			
 
				-
			
 
				-			/*
			
 
				-			 * Reply on a passive connection attempt so the peer
			
 
				-			 * realises we're connected.
			
 
				-			 */
			
 
				-			LASSERT(!rc);
			
 
				-			if (!active)
			
 
				-				rc = EALREADY;
			
 
				-
			
 
				-			warn = "duplicate";
			
 
				-			goto failed_2;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If the connection created by this route didn't bind to the IP
			
 
				-	 * address the route connected to, the connection/route matching
			
 
				-	 * code below probably isn't going to work.
			
 
				-	 */
			
 
				-	if (active &&
			
 
				-	    route->ksnr_ipaddr != conn->ksnc_ipaddr) {
			
 
				-		CERROR("Route %s %pI4h connected to %pI4h\n",
			
 
				-		       libcfs_id2str(peer->ksnp_id),
			
 
				-		       &route->ksnr_ipaddr,
			
 
				-		       &conn->ksnc_ipaddr);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Search for a route corresponding to the new connection and
			
 
				-	 * create an association.  This allows incoming connections created
			
 
				-	 * by routes in my peer to match my own route entries so I don't
			
 
				-	 * continually create duplicate routes.
			
 
				-	 */
			
 
				-	list_for_each(tmp, &peer->ksnp_routes) {
			
 
				-		route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-		if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
			
 
				-			continue;
			
 
				-
			
 
				-		ksocknal_associate_route_conn_locked(route, conn);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	conn->ksnc_peer = peer;		 /* conn takes my ref on peer */
			
 
				-	peer->ksnp_last_alive = jiffies;
			
 
				-	peer->ksnp_send_keepalive = 0;
			
 
				-	peer->ksnp_error = 0;
			
 
				-
			
 
				-	sched = ksocknal_choose_scheduler_locked(cpt);
			
 
				-	sched->kss_nconns++;
			
 
				-	conn->ksnc_scheduler = sched;
			
 
				-
			
 
				-	conn->ksnc_tx_last_post = jiffies;
			
 
				-	/* Set the deadline for the outgoing HELLO to drain */
			
 
				-	conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
			
 
				-	conn->ksnc_tx_deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-	mb();   /* order with adding to peer's conn list */
			
 
				-
			
 
				-	list_add(&conn->ksnc_list, &peer->ksnp_conns);
			
 
				-	ksocknal_conn_addref(conn);
			
 
				-
			
 
				-	ksocknal_new_packet(conn, 0);
			
 
				-
			
 
				-	conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
			
 
				-
			
 
				-	/* Take packets blocking for this connection. */
			
 
				-	list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
			
 
				-		int match = conn->ksnc_proto->pro_match_tx(conn, tx,
			
 
				-							   tx->tx_nonblk);
			
 
				-
			
 
				-		if (match == SOCKNAL_MATCH_NO)
			
 
				-			continue;
			
 
				-
			
 
				-		list_del(&tx->tx_list);
			
 
				-		ksocknal_queue_tx_locked(tx, conn);
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(global_lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * We've now got a new connection.  Any errors from here on are just
			
 
				-	 * like "normal" comms errors and we close the connection normally.
			
 
				-	 * NB (a) we still have to send the reply HELLO for passive
			
 
				-	 *	connections,
			
 
				-	 *    (b) normal I/O on the conn is blocked until I setup and call the
			
 
				-	 *	socket callbacks.
			
 
				-	 */
			
 
				-	CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
			
 
				-	       libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
			
 
				-	       &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
			
 
				-	       conn->ksnc_port, incarnation, cpt,
			
 
				-	       (int)(sched - &sched->kss_info->ksi_scheds[0]));
			
 
				-
			
 
				-	if (active) {
			
 
				-		/* additional routes after interface exchange? */
			
 
				-		ksocknal_create_routes(peer, conn->ksnc_port,
			
 
				-				       hello->kshm_ips, hello->kshm_nips);
			
 
				-	} else {
			
 
				-		hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
			
 
				-						       hello->kshm_nips);
			
 
				-		rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
			
 
				-	}
			
 
				-
			
 
				-	kvfree(hello);
			
 
				-
			
 
				-	/*
			
 
				-	 * setup the socket AFTER I've received hello (it disables
			
 
				-	 * SO_LINGER).  I might call back to the acceptor who may want
			
 
				-	 * to send a protocol version response and then close the
			
 
				-	 * socket; this ensures the socket only tears down after the
			
 
				-	 * response has been sent.
			
 
				-	 */
			
 
				-	if (!rc)
			
 
				-		rc = ksocknal_lib_setup_sock(sock);
			
 
				-
			
 
				-	write_lock_bh(global_lock);
			
 
				-
			
 
				-	/* NB my callbacks block while I hold ksnd_global_lock */
			
 
				-	ksocknal_lib_set_callback(sock, conn);
			
 
				-
			
 
				-	if (!active)
			
 
				-		peer->ksnp_accepting--;
			
 
				-
			
 
				-	write_unlock_bh(global_lock);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		write_lock_bh(global_lock);
			
 
				-		if (!conn->ksnc_closing) {
			
 
				-			/* could be closed by another thread */
			
 
				-			ksocknal_close_conn_locked(conn, rc);
			
 
				-		}
			
 
				-		write_unlock_bh(global_lock);
			
 
				-	} else if (!ksocknal_connsock_addref(conn)) {
			
 
				-		/* Allow I/O to proceed. */
			
 
				-		ksocknal_read_callback(conn);
			
 
				-		ksocknal_write_callback(conn);
			
 
				-		ksocknal_connsock_decref(conn);
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_connsock_decref(conn);
			
 
				-	ksocknal_conn_decref(conn);
			
 
				-	return rc;
			
 
				-
			
 
				- failed_2:
			
 
				-	if (!peer->ksnp_closing &&
			
 
				-	    list_empty(&peer->ksnp_conns) &&
			
 
				-	    list_empty(&peer->ksnp_routes)) {
			
 
				-		list_add(&zombies, &peer->ksnp_tx_queue);
			
 
				-		list_del_init(&peer->ksnp_tx_queue);
			
 
				-		ksocknal_unlink_peer_locked(peer);
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(global_lock);
			
 
				-
			
 
				-	if (warn) {
			
 
				-		if (rc < 0)
			
 
				-			CERROR("Not creating conn %s type %d: %s\n",
			
 
				-			       libcfs_id2str(peerid), conn->ksnc_type, warn);
			
 
				-		else
			
 
				-			CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
			
 
				-			       libcfs_id2str(peerid), conn->ksnc_type, warn);
			
 
				-	}
			
 
				-
			
 
				-	if (!active) {
			
 
				-		if (rc > 0) {
			
 
				-			/*
			
 
				-			 * Request retry by replying with CONN_NONE
			
 
				-			 * ksnc_proto has been set already
			
 
				-			 */
			
 
				-			conn->ksnc_type = SOCKLND_CONN_NONE;
			
 
				-			hello->kshm_nips = 0;
			
 
				-			ksocknal_send_hello(ni, conn, peerid.nid, hello);
			
 
				-		}
			
 
				-
			
 
				-		write_lock_bh(global_lock);
			
 
				-		peer->ksnp_accepting--;
			
 
				-		write_unlock_bh(global_lock);
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_txlist_done(ni, &zombies, 1);
			
 
				-	ksocknal_peer_decref(peer);
			
 
				-
			
 
				-failed_1:
			
 
				-	kvfree(hello);
			
 
				-
			
 
				-	kfree(conn);
			
 
				-
			
 
				-failed_0:
			
 
				-	sock_release(sock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
			
 
				-{
			
 
				-	/*
			
 
				-	 * This just does the immmediate housekeeping, and queues the
			
 
				-	 * connection for the reaper to terminate.
			
 
				-	 * Caller holds ksnd_global_lock exclusively in irq context
			
 
				-	 */
			
 
				-	struct ksock_peer *peer = conn->ksnc_peer;
			
 
				-	struct ksock_route *route;
			
 
				-	struct ksock_conn *conn2;
			
 
				-	struct list_head *tmp;
			
 
				-
			
 
				-	LASSERT(!peer->ksnp_error);
			
 
				-	LASSERT(!conn->ksnc_closing);
			
 
				-	conn->ksnc_closing = 1;
			
 
				-
			
 
				-	/* ksnd_deathrow_conns takes over peer's ref */
			
 
				-	list_del(&conn->ksnc_list);
			
 
				-
			
 
				-	route = conn->ksnc_route;
			
 
				-	if (route) {
			
 
				-		/* dissociate conn from route... */
			
 
				-		LASSERT(!route->ksnr_deleted);
			
 
				-		LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
			
 
				-
			
 
				-		conn2 = NULL;
			
 
				-		list_for_each(tmp, &peer->ksnp_conns) {
			
 
				-			conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-			if (conn2->ksnc_route == route &&
			
 
				-			    conn2->ksnc_type == conn->ksnc_type)
			
 
				-				break;
			
 
				-
			
 
				-			conn2 = NULL;
			
 
				-		}
			
 
				-		if (!conn2)
			
 
				-			route->ksnr_connected &= ~(1 << conn->ksnc_type);
			
 
				-
			
 
				-		conn->ksnc_route = NULL;
			
 
				-
			
 
				-		ksocknal_route_decref(route);     /* drop conn's ref on route */
			
 
				-	}
			
 
				-
			
 
				-	if (list_empty(&peer->ksnp_conns)) {
			
 
				-		/* No more connections to this peer */
			
 
				-
			
 
				-		if (!list_empty(&peer->ksnp_tx_queue)) {
			
 
				-			struct ksock_tx *tx;
			
 
				-
			
 
				-			LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
			
 
				-
			
 
				-			/*
			
 
				-			 * throw them to the last connection...,
			
 
				-			 * these TXs will be send to /dev/null by scheduler
			
 
				-			 */
			
 
				-			list_for_each_entry(tx, &peer->ksnp_tx_queue,
			
 
				-					    tx_list)
			
 
				-				ksocknal_tx_prep(conn, tx);
			
 
				-
			
 
				-			spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
			
 
				-			list_splice_init(&peer->ksnp_tx_queue,
			
 
				-					 &conn->ksnc_tx_queue);
			
 
				-			spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
			
 
				-		}
			
 
				-
			
 
				-		peer->ksnp_proto = NULL;  /* renegotiate protocol version */
			
 
				-		peer->ksnp_error = error; /* stash last conn close reason */
			
 
				-
			
 
				-		if (list_empty(&peer->ksnp_routes)) {
			
 
				-			/*
			
 
				-			 * I've just closed last conn belonging to a
			
 
				-			 * peer with no routes to it
			
 
				-			 */
			
 
				-			ksocknal_unlink_peer_locked(peer);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-	list_add_tail(&conn->ksnc_list,
			
 
				-		      &ksocknal_data.ksnd_deathrow_conns);
			
 
				-	wake_up(&ksocknal_data.ksnd_reaper_waitq);
			
 
				-
			
 
				-	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_peer_failed(struct ksock_peer *peer)
			
 
				-{
			
 
				-	int notify = 0;
			
 
				-	unsigned long last_alive = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * There has been a connection failure or comms error; but I'll only
			
 
				-	 * tell LNET I think the peer is dead if it's to another kernel and
			
 
				-	 * there are no connections or connection attempts in existence.
			
 
				-	 */
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
			
 
				-	    list_empty(&peer->ksnp_conns) &&
			
 
				-	    !peer->ksnp_accepting &&
			
 
				-	    !ksocknal_find_connecting_route_locked(peer)) {
			
 
				-		notify = 1;
			
 
				-		last_alive = peer->ksnp_last_alive;
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	if (notify)
			
 
				-		lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
			
 
				-			    last_alive);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_finalize_zcreq(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct ksock_peer *peer = conn->ksnc_peer;
			
 
				-	struct ksock_tx *tx;
			
 
				-	struct ksock_tx *temp;
			
 
				-	struct ksock_tx *tmp;
			
 
				-	LIST_HEAD(zlist);
			
 
				-
			
 
				-	/*
			
 
				-	 * NB safe to finalize TXs because closing of socket will
			
 
				-	 * abort all buffered data
			
 
				-	 */
			
 
				-	LASSERT(!conn->ksnc_sock);
			
 
				-
			
 
				-	spin_lock(&peer->ksnp_lock);
			
 
				-
			
 
				-	list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
			
 
				-		if (tx->tx_conn != conn)
			
 
				-			continue;
			
 
				-
			
 
				-		LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
			
 
				-
			
 
				-		tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				-		tx->tx_zc_aborted = 1; /* mark it as not-acked */
			
 
				-		list_del(&tx->tx_zc_list);
			
 
				-		list_add(&tx->tx_zc_list, &zlist);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&peer->ksnp_lock);
			
 
				-
			
 
				-	list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
			
 
				-		list_del(&tx->tx_zc_list);
			
 
				-		ksocknal_tx_decref(tx);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_terminate_conn(struct ksock_conn *conn)
			
 
				-{
			
 
				-	/*
			
 
				-	 * This gets called by the reaper (guaranteed thread context) to
			
 
				-	 * disengage the socket from its callbacks and close it.
			
 
				-	 * ksnc_refcount will eventually hit zero, and then the reaper will
			
 
				-	 * destroy it.
			
 
				-	 */
			
 
				-	struct ksock_peer *peer = conn->ksnc_peer;
			
 
				-	struct ksock_sched *sched = conn->ksnc_scheduler;
			
 
				-	int failed = 0;
			
 
				-
			
 
				-	LASSERT(conn->ksnc_closing);
			
 
				-
			
 
				-	/* wake up the scheduler to "send" all remaining packets to /dev/null */
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	/* a closing conn is always ready to tx */
			
 
				-	conn->ksnc_tx_ready = 1;
			
 
				-
			
 
				-	if (!conn->ksnc_tx_scheduled &&
			
 
				-	    !list_empty(&conn->ksnc_tx_queue)) {
			
 
				-		list_add_tail(&conn->ksnc_tx_list,
			
 
				-			      &sched->kss_tx_conns);
			
 
				-		conn->ksnc_tx_scheduled = 1;
			
 
				-		/* extra ref for scheduler */
			
 
				-		ksocknal_conn_addref(conn);
			
 
				-
			
 
				-		wake_up(&sched->kss_waitq);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	/* serialise with callbacks */
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
			
 
				-
			
 
				-	/*
			
 
				-	 * OK, so this conn may not be completely disengaged from its
			
 
				-	 * scheduler yet, but it _has_ committed to terminate...
			
 
				-	 */
			
 
				-	conn->ksnc_scheduler->kss_nconns--;
			
 
				-
			
 
				-	if (peer->ksnp_error) {
			
 
				-		/* peer's last conn closed in error */
			
 
				-		LASSERT(list_empty(&peer->ksnp_conns));
			
 
				-		failed = 1;
			
 
				-		peer->ksnp_error = 0;     /* avoid multiple notifications */
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	if (failed)
			
 
				-		ksocknal_peer_failed(peer);
			
 
				-
			
 
				-	/*
			
 
				-	 * The socket is closed on the final put; either here, or in
			
 
				-	 * ksocknal_{send,recv}msg().  Since we set up the linger2 option
			
 
				-	 * when the connection was established, this will close the socket
			
 
				-	 * immediately, aborting anything buffered in it. Any hung
			
 
				-	 * zero-copy transmits will therefore complete in finite time.
			
 
				-	 */
			
 
				-	ksocknal_connsock_decref(conn);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_queue_zombie_conn(struct ksock_conn *conn)
			
 
				-{
			
 
				-	/* Queue the conn for the reaper to destroy */
			
 
				-
			
 
				-	LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
			
 
				-	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-	list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
			
 
				-	wake_up(&ksocknal_data.ksnd_reaper_waitq);
			
 
				-
			
 
				-	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_destroy_conn(struct ksock_conn *conn)
			
 
				-{
			
 
				-	unsigned long last_rcv;
			
 
				-
			
 
				-	/* Final coup-de-grace of the reaper */
			
 
				-	CDEBUG(D_NET, "connection %p\n", conn);
			
 
				-
			
 
				-	LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
			
 
				-	LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
			
 
				-	LASSERT(!conn->ksnc_sock);
			
 
				-	LASSERT(!conn->ksnc_route);
			
 
				-	LASSERT(!conn->ksnc_tx_scheduled);
			
 
				-	LASSERT(!conn->ksnc_rx_scheduled);
			
 
				-	LASSERT(list_empty(&conn->ksnc_tx_queue));
			
 
				-
			
 
				-	/* complete current receive if any */
			
 
				-	switch (conn->ksnc_rx_state) {
			
 
				-	case SOCKNAL_RX_LNET_PAYLOAD:
			
 
				-		last_rcv = conn->ksnc_rx_deadline -
			
 
				-			   *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-		CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
			
 
				-		       libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
			
 
				-		       &conn->ksnc_ipaddr, conn->ksnc_port,
			
 
				-		       iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
			
 
				-		       (jiffies - last_rcv) / HZ);
			
 
				-		lnet_finalize(conn->ksnc_peer->ksnp_ni,
			
 
				-			      conn->ksnc_cookie, -EIO);
			
 
				-		break;
			
 
				-	case SOCKNAL_RX_LNET_HEADER:
			
 
				-		if (conn->ksnc_rx_started)
			
 
				-			CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
			
 
				-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-			       &conn->ksnc_ipaddr, conn->ksnc_port,
			
 
				-			       conn->ksnc_proto->pro_version);
			
 
				-		break;
			
 
				-	case SOCKNAL_RX_KSM_HEADER:
			
 
				-		if (conn->ksnc_rx_started)
			
 
				-			CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
			
 
				-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-			       &conn->ksnc_ipaddr, conn->ksnc_port,
			
 
				-			       conn->ksnc_proto->pro_version);
			
 
				-		break;
			
 
				-	case SOCKNAL_RX_SLOP:
			
 
				-		if (conn->ksnc_rx_started)
			
 
				-			CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
			
 
				-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-			       &conn->ksnc_ipaddr, conn->ksnc_port);
			
 
				-	       break;
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_peer_decref(conn->ksnc_peer);
			
 
				-
			
 
				-	kfree(conn);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
			
 
				-{
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-	struct list_head *cnxt;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
			
 
				-		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-		if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
			
 
				-			count++;
			
 
				-			ksocknal_close_conn_locked(conn, why);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
			
 
				-{
			
 
				-	struct ksock_peer *peer = conn->ksnc_peer;
			
 
				-	__u32 ipaddr = conn->ksnc_ipaddr;
			
 
				-	int count;
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
			
 
				-{
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct list_head *ptmp;
			
 
				-	struct list_head *pnxt;
			
 
				-	int lo;
			
 
				-	int hi;
			
 
				-	int i;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	if (id.nid != LNET_NID_ANY) {
			
 
				-		lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
			
 
				-		hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
			
 
				-	} else {
			
 
				-		lo = 0;
			
 
				-		hi = ksocknal_data.ksnd_peer_hash_size - 1;
			
 
				-	}
			
 
				-
			
 
				-	for (i = lo; i <= hi; i++) {
			
 
				-		list_for_each_safe(ptmp, pnxt,
			
 
				-				   &ksocknal_data.ksnd_peers[i]) {
			
 
				-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
			
 
				-
			
 
				-			if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
			
 
				-			      (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
			
 
				-				continue;
			
 
				-
			
 
				-			count += ksocknal_close_peer_conns_locked(peer, ipaddr,
			
 
				-								  0);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	/* wildcards always succeed */
			
 
				-	if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (!count)
			
 
				-		return -ENOENT;
			
 
				-	else
			
 
				-		return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
			
 
				-{
			
 
				-	/*
			
 
				-	 * The router is telling me she's been notified of a change in
			
 
				-	 * gateway state....
			
 
				-	 */
			
 
				-	struct lnet_process_id id = {0};
			
 
				-
			
 
				-	id.nid = gw_nid;
			
 
				-	id.pid = LNET_PID_ANY;
			
 
				-
			
 
				-	CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
			
 
				-	       alive ? "up" : "down");
			
 
				-
			
 
				-	if (!alive) {
			
 
				-		/* If the gateway crashed, close all open connections... */
			
 
				-		ksocknal_close_matching_conns(id, 0);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * ...otherwise do nothing.  We can only establish new connections
			
 
				-	 * if we have autroutes, and these connect on demand.
			
 
				-	 */
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when)
			
 
				-{
			
 
				-	int connect = 1;
			
 
				-	unsigned long last_alive = 0;
			
 
				-	unsigned long now = jiffies;
			
 
				-	struct ksock_peer *peer = NULL;
			
 
				-	rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
			
 
				-	struct lnet_process_id id = {
			
 
				-		.nid = nid,
			
 
				-		.pid = LNET_PID_LUSTRE,
			
 
				-	};
			
 
				-
			
 
				-	read_lock(glock);
			
 
				-
			
 
				-	peer = ksocknal_find_peer_locked(ni, id);
			
 
				-	if (peer) {
			
 
				-		struct ksock_conn *conn;
			
 
				-		int bufnob;
			
 
				-
			
 
				-		list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) {
			
 
				-			bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
			
 
				-
			
 
				-			if (bufnob < conn->ksnc_tx_bufnob) {
			
 
				-				/* something got ACKed */
			
 
				-				conn->ksnc_tx_deadline =
			
 
				-					jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-				peer->ksnp_last_alive = now;
			
 
				-				conn->ksnc_tx_bufnob = bufnob;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		last_alive = peer->ksnp_last_alive;
			
 
				-		if (!ksocknal_find_connectable_route_locked(peer))
			
 
				-			connect = 0;
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(glock);
			
 
				-
			
 
				-	if (last_alive)
			
 
				-		*when = last_alive;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
			
 
				-	       libcfs_nid2str(nid), peer,
			
 
				-	       last_alive ? (now - last_alive) / HZ : -1,
			
 
				-	       connect);
			
 
				-
			
 
				-	if (!connect)
			
 
				-		return;
			
 
				-
			
 
				-	ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
			
 
				-
			
 
				-	write_lock_bh(glock);
			
 
				-
			
 
				-	peer = ksocknal_find_peer_locked(ni, id);
			
 
				-	if (peer)
			
 
				-		ksocknal_launch_all_connections_locked(peer);
			
 
				-
			
 
				-	write_unlock_bh(glock);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_push_peer(struct ksock_peer *peer)
			
 
				-{
			
 
				-	int index;
			
 
				-	int i;
			
 
				-	struct list_head *tmp;
			
 
				-	struct ksock_conn *conn;
			
 
				-
			
 
				-	for (index = 0; ; index++) {
			
 
				-		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-		i = 0;
			
 
				-		conn = NULL;
			
 
				-
			
 
				-		list_for_each(tmp, &peer->ksnp_conns) {
			
 
				-			if (i++ == index) {
			
 
				-				conn = list_entry(tmp, struct ksock_conn,
			
 
				-						  ksnc_list);
			
 
				-				ksocknal_conn_addref(conn);
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-		if (!conn)
			
 
				-			break;
			
 
				-
			
 
				-		ksocknal_lib_push_conn(conn);
			
 
				-		ksocknal_conn_decref(conn);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
			
 
				-{
			
 
				-	struct list_head *start;
			
 
				-	struct list_head *end;
			
 
				-	struct list_head *tmp;
			
 
				-	int rc = -ENOENT;
			
 
				-	unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
			
 
				-
			
 
				-	if (id.nid == LNET_NID_ANY) {
			
 
				-		start = &ksocknal_data.ksnd_peers[0];
			
 
				-		end = &ksocknal_data.ksnd_peers[hsize - 1];
			
 
				-	} else {
			
 
				-		start = ksocknal_nid2peerlist(id.nid);
			
 
				-		end = ksocknal_nid2peerlist(id.nid);
			
 
				-	}
			
 
				-
			
 
				-	for (tmp = start; tmp <= end; tmp++) {
			
 
				-		int peer_off; /* searching offset in peer hash table */
			
 
				-
			
 
				-		for (peer_off = 0; ; peer_off++) {
			
 
				-			struct ksock_peer *peer;
			
 
				-			int i = 0;
			
 
				-
			
 
				-			read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-			list_for_each_entry(peer, tmp, ksnp_list) {
			
 
				-				if (!((id.nid == LNET_NID_ANY ||
			
 
				-				       id.nid == peer->ksnp_id.nid) &&
			
 
				-				      (id.pid == LNET_PID_ANY ||
			
 
				-				       id.pid == peer->ksnp_id.pid)))
			
 
				-					continue;
			
 
				-
			
 
				-				if (i++ == peer_off) {
			
 
				-					ksocknal_peer_addref(peer);
			
 
				-					break;
			
 
				-				}
			
 
				-			}
			
 
				-			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-			if (!i) /* no match */
			
 
				-				break;
			
 
				-
			
 
				-			rc = 0;
			
 
				-			ksocknal_push_peer(peer);
			
 
				-			ksocknal_peer_decref(peer);
			
 
				-		}
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
			
 
				-{
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	struct ksock_interface *iface;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	struct list_head *ptmp;
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct list_head *rtmp;
			
 
				-	struct ksock_route *route;
			
 
				-
			
 
				-	if (!ipaddress || !netmask)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	iface = ksocknal_ip2iface(ni, ipaddress);
			
 
				-	if (iface) {
			
 
				-		/* silently ignore dups */
			
 
				-		rc = 0;
			
 
				-	} else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
			
 
				-		rc = -ENOSPC;
			
 
				-	} else {
			
 
				-		iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
			
 
				-
			
 
				-		iface->ksni_ipaddr = ipaddress;
			
 
				-		iface->ksni_netmask = netmask;
			
 
				-		iface->ksni_nroutes = 0;
			
 
				-		iface->ksni_npeers = 0;
			
 
				-
			
 
				-		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				-			list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				-				peer = list_entry(ptmp, struct ksock_peer,
			
 
				-						  ksnp_list);
			
 
				-
			
 
				-				for (j = 0; j < peer->ksnp_n_passive_ips; j++)
			
 
				-					if (peer->ksnp_passive_ips[j] == ipaddress)
			
 
				-						iface->ksni_npeers++;
			
 
				-
			
 
				-				list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				-					route = list_entry(rtmp, struct ksock_route,
			
 
				-							   ksnr_list);
			
 
				-
			
 
				-					if (route->ksnr_myipaddr == ipaddress)
			
 
				-						iface->ksni_nroutes++;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		rc = 0;
			
 
				-		/*
			
 
				-		 * NB only new connections will pay attention to the
			
 
				-		 * new interface!
			
 
				-		 */
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-	struct list_head *nxt;
			
 
				-	struct ksock_route *route;
			
 
				-	struct ksock_conn *conn;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	for (i = 0; i < peer->ksnp_n_passive_ips; i++)
			
 
				-		if (peer->ksnp_passive_ips[i] == ipaddr) {
			
 
				-			for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
			
 
				-				peer->ksnp_passive_ips[j - 1] =
			
 
				-					peer->ksnp_passive_ips[j];
			
 
				-			peer->ksnp_n_passive_ips--;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
			
 
				-		route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-		if (route->ksnr_myipaddr != ipaddr)
			
 
				-			continue;
			
 
				-
			
 
				-		if (route->ksnr_share_count) {
			
 
				-			/* Manually created; keep, but unbind */
			
 
				-			route->ksnr_myipaddr = 0;
			
 
				-		} else {
			
 
				-			ksocknal_del_route_locked(route);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
			
 
				-		conn = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-		if (conn->ksnc_myipaddr == ipaddr)
			
 
				-			ksocknal_close_conn_locked(conn, 0);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
			
 
				-{
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	int rc = -ENOENT;
			
 
				-	struct list_head *tmp;
			
 
				-	struct list_head *nxt;
			
 
				-	struct ksock_peer *peer;
			
 
				-	__u32 this_ip;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				-		this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
			
 
				-
			
 
				-		if (!(!ipaddress || ipaddress == this_ip))
			
 
				-			continue;
			
 
				-
			
 
				-		rc = 0;
			
 
				-
			
 
				-		for (j = i + 1; j < net->ksnn_ninterfaces; j++)
			
 
				-			net->ksnn_interfaces[j - 1] =
			
 
				-				net->ksnn_interfaces[j];
			
 
				-
			
 
				-		net->ksnn_ninterfaces--;
			
 
				-
			
 
				-		for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
			
 
				-			list_for_each_safe(tmp, nxt,
			
 
				-					   &ksocknal_data.ksnd_peers[j]) {
			
 
				-				peer = list_entry(tmp, struct ksock_peer, ksnp_list);
			
 
				-
			
 
				-				if (peer->ksnp_ni != ni)
			
 
				-					continue;
			
 
				-
			
 
				-				ksocknal_peer_del_interface_locked(peer, this_ip);
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
			
 
				-{
			
 
				-	struct lnet_process_id id = {0};
			
 
				-	struct libcfs_ioctl_data *data = arg;
			
 
				-	int rc;
			
 
				-
			
 
				-	switch (cmd) {
			
 
				-	case IOC_LIBCFS_GET_INTERFACE: {
			
 
				-		struct ksock_net       *net = ni->ni_data;
			
 
				-		struct ksock_interface *iface;
			
 
				-
			
 
				-		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-		if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
			
 
				-			rc = -ENOENT;
			
 
				-		} else {
			
 
				-			rc = 0;
			
 
				-			iface = &net->ksnn_interfaces[data->ioc_count];
			
 
				-
			
 
				-			data->ioc_u32[0] = iface->ksni_ipaddr;
			
 
				-			data->ioc_u32[1] = iface->ksni_netmask;
			
 
				-			data->ioc_u32[2] = iface->ksni_npeers;
			
 
				-			data->ioc_u32[3] = iface->ksni_nroutes;
			
 
				-		}
			
 
				-
			
 
				-		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_ADD_INTERFACE:
			
 
				-		return ksocknal_add_interface(ni,
			
 
				-					      data->ioc_u32[0], /* IP address */
			
 
				-					      data->ioc_u32[1]); /* net mask */
			
 
				-
			
 
				-	case IOC_LIBCFS_DEL_INTERFACE:
			
 
				-		return ksocknal_del_interface(ni,
			
 
				-					      data->ioc_u32[0]); /* IP address */
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_PEER: {
			
 
				-		__u32 myip = 0;
			
 
				-		__u32 ip = 0;
			
 
				-		int port = 0;
			
 
				-		int conn_count = 0;
			
 
				-		int share_count = 0;
			
 
				-
			
 
				-		rc = ksocknal_get_peer_info(ni, data->ioc_count,
			
 
				-					    &id, &myip, &ip, &port,
			
 
				-					    &conn_count,  &share_count);
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-
			
 
				-		data->ioc_nid    = id.nid;
			
 
				-		data->ioc_count  = share_count;
			
 
				-		data->ioc_u32[0] = ip;
			
 
				-		data->ioc_u32[1] = port;
			
 
				-		data->ioc_u32[2] = myip;
			
 
				-		data->ioc_u32[3] = conn_count;
			
 
				-		data->ioc_u32[4] = id.pid;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_ADD_PEER:
			
 
				-		id.nid = data->ioc_nid;
			
 
				-		id.pid = LNET_PID_LUSTRE;
			
 
				-		return ksocknal_add_peer(ni, id,
			
 
				-					  data->ioc_u32[0], /* IP */
			
 
				-					  data->ioc_u32[1]); /* port */
			
 
				-
			
 
				-	case IOC_LIBCFS_DEL_PEER:
			
 
				-		id.nid = data->ioc_nid;
			
 
				-		id.pid = LNET_PID_ANY;
			
 
				-		return ksocknal_del_peer(ni, id,
			
 
				-					  data->ioc_u32[0]); /* IP */
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_CONN: {
			
 
				-		int txmem;
			
 
				-		int rxmem;
			
 
				-		int nagle;
			
 
				-		struct ksock_conn *conn;
			
 
				-
			
 
				-		conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
			
 
				-		if (!conn)
			
 
				-			return -ENOENT;
			
 
				-
			
 
				-		ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
			
 
				-
			
 
				-		data->ioc_count  = txmem;
			
 
				-		data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
			
 
				-		data->ioc_flags  = nagle;
			
 
				-		data->ioc_u32[0] = conn->ksnc_ipaddr;
			
 
				-		data->ioc_u32[1] = conn->ksnc_port;
			
 
				-		data->ioc_u32[2] = conn->ksnc_myipaddr;
			
 
				-		data->ioc_u32[3] = conn->ksnc_type;
			
 
				-		data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
			
 
				-		data->ioc_u32[5] = rxmem;
			
 
				-		data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
			
 
				-		ksocknal_conn_decref(conn);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_CLOSE_CONNECTION:
			
 
				-		id.nid = data->ioc_nid;
			
 
				-		id.pid = LNET_PID_ANY;
			
 
				-		return ksocknal_close_matching_conns(id,
			
 
				-						      data->ioc_u32[0]);
			
 
				-
			
 
				-	case IOC_LIBCFS_REGISTER_MYNID:
			
 
				-		/* Ignore if this is a noop */
			
 
				-		if (data->ioc_nid == ni->ni_nid)
			
 
				-			return 0;
			
 
				-
			
 
				-		CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
			
 
				-		       libcfs_nid2str(data->ioc_nid),
			
 
				-		       libcfs_nid2str(ni->ni_nid));
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	case IOC_LIBCFS_PUSH_CONNECTION:
			
 
				-		id.nid = data->ioc_nid;
			
 
				-		id.pid = LNET_PID_ANY;
			
 
				-		return ksocknal_push(ni, id);
			
 
				-
			
 
				-	default:
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-	/* not reached */
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_free_buffers(void)
			
 
				-{
			
 
				-	LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_sched_info) {
			
 
				-		struct ksock_sched_info *info;
			
 
				-		int i;
			
 
				-
			
 
				-		cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info)
			
 
				-			kfree(info->ksi_scheds);
			
 
				-		cfs_percpt_free(ksocknal_data.ksnd_sched_info);
			
 
				-	}
			
 
				-
			
 
				-	kvfree(ksocknal_data.ksnd_peers);
			
 
				-
			
 
				-	spin_lock(&ksocknal_data.ksnd_tx_lock);
			
 
				-
			
 
				-	if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
			
 
				-		struct list_head zlist;
			
 
				-		struct ksock_tx *tx;
			
 
				-		struct ksock_tx *temp;
			
 
				-
			
 
				-		list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
			
 
				-		list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
			
 
				-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				-
			
 
				-		list_for_each_entry_safe(tx, temp, &zlist, tx_list) {
			
 
				-			list_del(&tx->tx_list);
			
 
				-			kfree(tx);
			
 
				-		}
			
 
				-	} else {
			
 
				-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_base_shutdown(void)
			
 
				-{
			
 
				-	struct ksock_sched_info *info;
			
 
				-	struct ksock_sched *sched;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	LASSERT(!ksocknal_data.ksnd_nnets);
			
 
				-
			
 
				-	switch (ksocknal_data.ksnd_init) {
			
 
				-	default:
			
 
				-		LASSERT(0);
			
 
				-		/* fall through */
			
 
				-	case SOCKNAL_INIT_ALL:
			
 
				-	case SOCKNAL_INIT_DATA:
			
 
				-		LASSERT(ksocknal_data.ksnd_peers);
			
 
				-		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
			
 
				-			LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
			
 
				-
			
 
				-		LASSERT(list_empty(&ksocknal_data.ksnd_nets));
			
 
				-		LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
			
 
				-		LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
			
 
				-		LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
			
 
				-		LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
			
 
				-
			
 
				-		if (ksocknal_data.ksnd_sched_info) {
			
 
				-			cfs_percpt_for_each(info, i,
			
 
				-					    ksocknal_data.ksnd_sched_info) {
			
 
				-				if (!info->ksi_scheds)
			
 
				-					continue;
			
 
				-
			
 
				-				for (j = 0; j < info->ksi_nthreads_max; j++) {
			
 
				-					sched = &info->ksi_scheds[j];
			
 
				-					LASSERT(list_empty(
			
 
				-						&sched->kss_tx_conns));
			
 
				-					LASSERT(list_empty(
			
 
				-						&sched->kss_rx_conns));
			
 
				-					LASSERT(list_empty(
			
 
				-						&sched->kss_zombie_noop_txs));
			
 
				-					LASSERT(!sched->kss_nconns);
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/* flag threads to terminate; wake and wait for them to die */
			
 
				-		ksocknal_data.ksnd_shuttingdown = 1;
			
 
				-		wake_up_all(&ksocknal_data.ksnd_connd_waitq);
			
 
				-		wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
			
 
				-
			
 
				-		if (ksocknal_data.ksnd_sched_info) {
			
 
				-			cfs_percpt_for_each(info, i,
			
 
				-					    ksocknal_data.ksnd_sched_info) {
			
 
				-				if (!info->ksi_scheds)
			
 
				-					continue;
			
 
				-
			
 
				-				for (j = 0; j < info->ksi_nthreads_max; j++) {
			
 
				-					sched = &info->ksi_scheds[j];
			
 
				-					wake_up_all(&sched->kss_waitq);
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		i = 4;
			
 
				-		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-		while (ksocknal_data.ksnd_nthreads) {
			
 
				-			i++;
			
 
				-			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
			
 
				-			       "waiting for %d threads to terminate\n",
			
 
				-				ksocknal_data.ksnd_nthreads);
			
 
				-			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-			set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-			schedule_timeout(HZ);
			
 
				-			read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-		}
			
 
				-		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-		ksocknal_free_buffers();
			
 
				-
			
 
				-		ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	module_put(THIS_MODULE);
			
 
				-}
			
 
				-
			
 
				-static __u64
			
 
				-ksocknal_new_incarnation(void)
			
 
				-{
			
 
				-	/* The incarnation number is the time this module loaded and it
			
 
				-	 * identifies this particular instance of the socknal.
			
 
				-	 */
			
 
				-	return ktime_get_ns();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_base_startup(void)
			
 
				-{
			
 
				-	struct ksock_sched_info	*info;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
			
 
				-	LASSERT(!ksocknal_data.ksnd_nnets);
			
 
				-
			
 
				-	memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
			
 
				-
			
 
				-	ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
			
 
				-	ksocknal_data.ksnd_peers = kvmalloc_array(ksocknal_data.ksnd_peer_hash_size,
			
 
				-						  sizeof(struct list_head),
			
 
				-						  GFP_KERNEL);
			
 
				-	if (!ksocknal_data.ksnd_peers)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
			
 
				-		INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
			
 
				-
			
 
				-	rwlock_init(&ksocknal_data.ksnd_global_lock);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
			
 
				-
			
 
				-	spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
			
 
				-	init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
			
 
				-
			
 
				-	spin_lock_init(&ksocknal_data.ksnd_connd_lock);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
			
 
				-	init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
			
 
				-
			
 
				-	spin_lock_init(&ksocknal_data.ksnd_tx_lock);
			
 
				-	INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
			
 
				-
			
 
				-	/* NB memset above zeros whole of ksocknal_data */
			
 
				-
			
 
				-	/* flag lists/ptrs/locks initialised */
			
 
				-	ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
			
 
				-	try_module_get(THIS_MODULE);
			
 
				-
			
 
				-	ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-							 sizeof(*info));
			
 
				-	if (!ksocknal_data.ksnd_sched_info)
			
 
				-		goto failed;
			
 
				-
			
 
				-	cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
			
 
				-		struct ksock_sched *sched;
			
 
				-		int nthrs;
			
 
				-
			
 
				-		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
			
 
				-		if (*ksocknal_tunables.ksnd_nscheds > 0) {
			
 
				-			nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
			
 
				-		} else {
			
 
				-			/*
			
 
				-			 * max to half of CPUs, assume another half should be
			
 
				-			 * reserved for upper layer modules
			
 
				-			 */
			
 
				-			nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
			
 
				-		}
			
 
				-
			
 
				-		info->ksi_nthreads_max = nthrs;
			
 
				-		info->ksi_cpt = i;
			
 
				-
			
 
				-		info->ksi_scheds = kzalloc_cpt(info->ksi_nthreads_max * sizeof(*sched),
			
 
				-					       GFP_NOFS, i);
			
 
				-		if (!info->ksi_scheds)
			
 
				-			goto failed;
			
 
				-
			
 
				-		for (; nthrs > 0; nthrs--) {
			
 
				-			sched = &info->ksi_scheds[nthrs - 1];
			
 
				-
			
 
				-			sched->kss_info = info;
			
 
				-			spin_lock_init(&sched->kss_lock);
			
 
				-			INIT_LIST_HEAD(&sched->kss_rx_conns);
			
 
				-			INIT_LIST_HEAD(&sched->kss_tx_conns);
			
 
				-			INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
			
 
				-			init_waitqueue_head(&sched->kss_waitq);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_data.ksnd_connd_starting       = 0;
			
 
				-	ksocknal_data.ksnd_connd_failed_stamp   = 0;
			
 
				-	ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
			
 
				-	/*
			
 
				-	 * must have at least 2 connds to remain responsive to accepts while
			
 
				-	 * connecting
			
 
				-	 */
			
 
				-	if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
			
 
				-		*ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
			
 
				-
			
 
				-	if (*ksocknal_tunables.ksnd_nconnds_max <
			
 
				-	    *ksocknal_tunables.ksnd_nconnds) {
			
 
				-		ksocknal_tunables.ksnd_nconnds_max =
			
 
				-			ksocknal_tunables.ksnd_nconnds;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
			
 
				-		char name[16];
			
 
				-
			
 
				-		spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-		ksocknal_data.ksnd_connd_starting++;
			
 
				-		spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-
			
 
				-		snprintf(name, sizeof(name), "socknal_cd%02d", i);
			
 
				-		rc = ksocknal_thread_start(ksocknal_connd,
			
 
				-					   (void *)((uintptr_t)i), name);
			
 
				-		if (rc) {
			
 
				-			spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-			ksocknal_data.ksnd_connd_starting--;
			
 
				-			spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-			CERROR("Can't spawn socknal connd: %d\n", rc);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't spawn socknal reaper: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* flag everything initialised */
			
 
				-	ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	ksocknal_base_shutdown();
			
 
				-	return -ENETDOWN;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_debug_peerhash(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct ksock_peer *peer = NULL;
			
 
				-	struct list_head *tmp;
			
 
				-	int i;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				-		list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				-			peer = list_entry(tmp, struct ksock_peer, ksnp_list);
			
 
				-
			
 
				-			if (peer->ksnp_ni == ni)
			
 
				-				break;
			
 
				-
			
 
				-			peer = NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (peer) {
			
 
				-		struct ksock_route *route;
			
 
				-		struct ksock_conn  *conn;
			
 
				-
			
 
				-		CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
			
 
				-		      libcfs_id2str(peer->ksnp_id),
			
 
				-		      atomic_read(&peer->ksnp_refcount),
			
 
				-		      peer->ksnp_sharecount, peer->ksnp_closing,
			
 
				-		      peer->ksnp_accepting, peer->ksnp_error,
			
 
				-		      peer->ksnp_zc_next_cookie,
			
 
				-		      !list_empty(&peer->ksnp_tx_queue),
			
 
				-		      !list_empty(&peer->ksnp_zc_req_list));
			
 
				-
			
 
				-		list_for_each(tmp, &peer->ksnp_routes) {
			
 
				-			route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-			CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
			
 
				-			      atomic_read(&route->ksnr_refcount),
			
 
				-			      route->ksnr_scheduled, route->ksnr_connecting,
			
 
				-			      route->ksnr_connected, route->ksnr_deleted);
			
 
				-		}
			
 
				-
			
 
				-		list_for_each(tmp, &peer->ksnp_conns) {
			
 
				-			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-			CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
			
 
				-			      atomic_read(&conn->ksnc_conn_refcount),
			
 
				-			      atomic_read(&conn->ksnc_sock_refcount),
			
 
				-			      conn->ksnc_type, conn->ksnc_closing);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_shutdown(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct ksock_net *net = ni->ni_data;
			
 
				-	int i;
			
 
				-	struct lnet_process_id anyid = {0};
			
 
				-
			
 
				-	anyid.nid = LNET_NID_ANY;
			
 
				-	anyid.pid = LNET_PID_ANY;
			
 
				-
			
 
				-	LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
			
 
				-	LASSERT(ksocknal_data.ksnd_nnets > 0);
			
 
				-
			
 
				-	spin_lock_bh(&net->ksnn_lock);
			
 
				-	net->ksnn_shutdown = 1;		 /* prevent new peers */
			
 
				-	spin_unlock_bh(&net->ksnn_lock);
			
 
				-
			
 
				-	/* Delete all peers */
			
 
				-	ksocknal_del_peer(ni, anyid, 0);
			
 
				-
			
 
				-	/* Wait for all peer state to clean up */
			
 
				-	i = 2;
			
 
				-	spin_lock_bh(&net->ksnn_lock);
			
 
				-	while (net->ksnn_npeers) {
			
 
				-		spin_unlock_bh(&net->ksnn_lock);
			
 
				-
			
 
				-		i++;
			
 
				-		CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
			
 
				-		       "waiting for %d peers to disconnect\n",
			
 
				-		       net->ksnn_npeers);
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ);
			
 
				-
			
 
				-		ksocknal_debug_peerhash(ni);
			
 
				-
			
 
				-		spin_lock_bh(&net->ksnn_lock);
			
 
				-	}
			
 
				-	spin_unlock_bh(&net->ksnn_lock);
			
 
				-
			
 
				-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				-		LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
			
 
				-		LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
			
 
				-	}
			
 
				-
			
 
				-	list_del(&net->ksnn_list);
			
 
				-	kfree(net);
			
 
				-
			
 
				-	ksocknal_data.ksnd_nnets--;
			
 
				-	if (!ksocknal_data.ksnd_nnets)
			
 
				-		ksocknal_base_shutdown();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_enumerate_interfaces(struct ksock_net *net)
			
 
				-{
			
 
				-	char **names;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	int rc;
			
 
				-	int n;
			
 
				-
			
 
				-	n = lnet_ipif_enumerate(&names);
			
 
				-	if (n <= 0) {
			
 
				-		CERROR("Can't enumerate interfaces: %d\n", n);
			
 
				-		return n;
			
 
				-	}
			
 
				-
			
 
				-	for (i = j = 0; i < n; i++) {
			
 
				-		int up;
			
 
				-		__u32 ip;
			
 
				-		__u32 mask;
			
 
				-
			
 
				-		if (!strcmp(names[i], "lo")) /* skip the loopback IF */
			
 
				-			continue;
			
 
				-
			
 
				-		rc = lnet_ipif_query(names[i], &up, &ip, &mask);
			
 
				-		if (rc) {
			
 
				-			CWARN("Can't get interface %s info: %d\n",
			
 
				-			      names[i], rc);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (!up) {
			
 
				-			CWARN("Ignoring interface %s (down)\n",
			
 
				-			      names[i]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (j == LNET_MAX_INTERFACES) {
			
 
				-			CWARN("Ignoring interface %s (too many interfaces)\n",
			
 
				-			      names[i]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		net->ksnn_interfaces[j].ksni_ipaddr = ip;
			
 
				-		net->ksnn_interfaces[j].ksni_netmask = mask;
			
 
				-		strlcpy(net->ksnn_interfaces[j].ksni_name,
			
 
				-			names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
			
 
				-		j++;
			
 
				-	}
			
 
				-
			
 
				-	lnet_ipif_free_enumeration(names, n);
			
 
				-
			
 
				-	if (!j)
			
 
				-		CERROR("Can't find any usable interfaces\n");
			
 
				-
			
 
				-	return j;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_search_new_ipif(struct ksock_net *net)
			
 
				-{
			
 
				-	int new_ipif = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				-		char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
			
 
				-		char *colon = strchr(ifnam, ':');
			
 
				-		int found  = 0;
			
 
				-		struct ksock_net *tmp;
			
 
				-		int j;
			
 
				-
			
 
				-		if (colon) /* ignore alias device */
			
 
				-			*colon = 0;
			
 
				-
			
 
				-		list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
			
 
				-			for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
			
 
				-				char *ifnam2 =
			
 
				-					&tmp->ksnn_interfaces[j].ksni_name[0];
			
 
				-				char *colon2 = strchr(ifnam2, ':');
			
 
				-
			
 
				-				if (colon2)
			
 
				-					*colon2 = 0;
			
 
				-
			
 
				-				found = !strcmp(ifnam, ifnam2);
			
 
				-				if (colon2)
			
 
				-					*colon2 = ':';
			
 
				-			}
			
 
				-			if (found)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		new_ipif += !found;
			
 
				-		if (colon)
			
 
				-			*colon = ':';
			
 
				-	}
			
 
				-
			
 
				-	return new_ipif;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_start_schedulers(struct ksock_sched_info *info)
			
 
				-{
			
 
				-	int nthrs;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!info->ksi_nthreads) {
			
 
				-		if (*ksocknal_tunables.ksnd_nscheds > 0) {
			
 
				-			nthrs = info->ksi_nthreads_max;
			
 
				-		} else {
			
 
				-			nthrs = cfs_cpt_weight(lnet_cpt_table(),
			
 
				-					       info->ksi_cpt);
			
 
				-			nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
			
 
				-			nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
			
 
				-		}
			
 
				-		nthrs = min(nthrs, info->ksi_nthreads_max);
			
 
				-	} else {
			
 
				-		LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
			
 
				-		/* increase two threads if there is new interface */
			
 
				-		nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < nthrs; i++) {
			
 
				-		long id;
			
 
				-		char name[20];
			
 
				-		struct ksock_sched *sched;
			
 
				-
			
 
				-		id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
			
 
				-		sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
			
 
				-		snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
			
 
				-			 info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
			
 
				-
			
 
				-		rc = ksocknal_thread_start(ksocknal_scheduler,
			
 
				-					   (void *)id, name);
			
 
				-		if (!rc)
			
 
				-			continue;
			
 
				-
			
 
				-		CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
			
 
				-		       info->ksi_cpt, info->ksi_nthreads + i, rc);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	info->ksi_nthreads += i;
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
			
 
				-{
			
 
				-	int newif = ksocknal_search_new_ipif(net);
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
			
 
				-
			
 
				-	for (i = 0; i < ncpts; i++) {
			
 
				-		struct ksock_sched_info *info;
			
 
				-		int cpt = !cpts ? i : cpts[i];
			
 
				-
			
 
				-		LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
			
 
				-		info = ksocknal_data.ksnd_sched_info[cpt];
			
 
				-
			
 
				-		if (!newif && info->ksi_nthreads > 0)
			
 
				-			continue;
			
 
				-
			
 
				-		rc = ksocknal_start_schedulers(info);
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_startup(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct ksock_net *net;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(ni->ni_lnd == &the_ksocklnd);
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
			
 
				-		rc = ksocknal_base_startup();
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-	}
			
 
				-
			
 
				-	net = kzalloc(sizeof(*net), GFP_NOFS);
			
 
				-	if (!net)
			
 
				-		goto fail_0;
			
 
				-
			
 
				-	spin_lock_init(&net->ksnn_lock);
			
 
				-	net->ksnn_incarnation = ksocknal_new_incarnation();
			
 
				-	ni->ni_data = net;
			
 
				-	ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
			
 
				-	ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
			
 
				-	ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
			
 
				-	ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
			
 
				-
			
 
				-	if (!ni->ni_interfaces[0]) {
			
 
				-		rc = ksocknal_enumerate_interfaces(net);
			
 
				-		if (rc <= 0)
			
 
				-			goto fail_1;
			
 
				-
			
 
				-		net->ksnn_ninterfaces = 1;
			
 
				-	} else {
			
 
				-		for (i = 0; i < LNET_MAX_INTERFACES; i++) {
			
 
				-			int up;
			
 
				-
			
 
				-			if (!ni->ni_interfaces[i])
			
 
				-				break;
			
 
				-
			
 
				-			rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
			
 
				-					     &net->ksnn_interfaces[i].ksni_ipaddr,
			
 
				-					     &net->ksnn_interfaces[i].ksni_netmask);
			
 
				-
			
 
				-			if (rc) {
			
 
				-				CERROR("Can't get interface %s info: %d\n",
			
 
				-				       ni->ni_interfaces[i], rc);
			
 
				-				goto fail_1;
			
 
				-			}
			
 
				-
			
 
				-			if (!up) {
			
 
				-				CERROR("Interface %s is down\n",
			
 
				-				       ni->ni_interfaces[i]);
			
 
				-				goto fail_1;
			
 
				-			}
			
 
				-
			
 
				-			strlcpy(net->ksnn_interfaces[i].ksni_name,
			
 
				-				ni->ni_interfaces[i],
			
 
				-				sizeof(net->ksnn_interfaces[i].ksni_name));
			
 
				-		}
			
 
				-		net->ksnn_ninterfaces = i;
			
 
				-	}
			
 
				-
			
 
				-	/* call it before add it to ksocknal_data.ksnd_nets */
			
 
				-	rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
			
 
				-	if (rc)
			
 
				-		goto fail_1;
			
 
				-
			
 
				-	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
			
 
				-				net->ksnn_interfaces[0].ksni_ipaddr);
			
 
				-	list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
			
 
				-
			
 
				-	ksocknal_data.ksnd_nnets++;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				- fail_1:
			
 
				-	kfree(net);
			
 
				- fail_0:
			
 
				-	if (!ksocknal_data.ksnd_nnets)
			
 
				-		ksocknal_base_shutdown();
			
 
				-
			
 
				-	return -ENETDOWN;
			
 
				-}
			
 
				-
			
 
				-static void __exit ksocklnd_exit(void)
			
 
				-{
			
 
				-	lnet_unregister_lnd(&the_ksocklnd);
			
 
				-}
			
 
				-
			
 
				-static int __init ksocklnd_init(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	/* check ksnr_connected/connecting field large enough */
			
 
				-	BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
			
 
				-	BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
			
 
				-
			
 
				-	/* initialize the_ksocklnd */
			
 
				-	the_ksocklnd.lnd_type     = SOCKLND;
			
 
				-	the_ksocklnd.lnd_startup  = ksocknal_startup;
			
 
				-	the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
			
 
				-	the_ksocklnd.lnd_ctl      = ksocknal_ctl;
			
 
				-	the_ksocklnd.lnd_send     = ksocknal_send;
			
 
				-	the_ksocklnd.lnd_recv     = ksocknal_recv;
			
 
				-	the_ksocklnd.lnd_notify   = ksocknal_notify;
			
 
				-	the_ksocklnd.lnd_query    = ksocknal_query;
			
 
				-	the_ksocklnd.lnd_accept   = ksocknal_accept;
			
 
				-
			
 
				-	rc = ksocknal_tunables_init();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	rc = libcfs_setup();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	lnet_register_lnd(&the_ksocklnd);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
			
 
				-MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
			
 
				-MODULE_VERSION("2.7.0");
			
 
				-MODULE_LICENSE("GPL");
			
 
				-
			
 
				-module_init(ksocklnd_init);
			
 
				-module_exit(ksocklnd_exit);
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -1,704 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- *
			
 
				- *   Author: Zach Brown <zab@zabbo.net>
			
 
				- *   Author: Peter J. Braam <braam@clusterfs.com>
			
 
				- *   Author: Phil Schwan <phil@clusterfs.com>
			
 
				- *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- *
			
 
				- *   This file is part of Lustre, http://www.lustre.org
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef _SOCKLND_SOCKLND_H_
			
 
				-#define _SOCKLND_SOCKLND_H_
			
 
				-
			
 
				-#define DEBUG_PORTAL_ALLOC
			
 
				-#define DEBUG_SUBSYSTEM S_LND
			
 
				-
			
 
				-#include <linux/crc32.h>
			
 
				-#include <linux/errno.h>
			
 
				-#include <linux/if.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/kmod.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/stat.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/syscalls.h>
			
 
				-#include <linux/sysctl.h>
			
 
				-#include <linux/uio.h>
			
 
				-#include <linux/unistd.h>
			
 
				-#include <asm/irq.h>
			
 
				-#include <net/sock.h>
			
 
				-#include <net/tcp.h>
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <linux/lnet/socklnd.h>
			
 
				-
			
 
				-/* assume one thread for each connection type */
			
 
				-#define SOCKNAL_NSCHEDS		3
			
 
				-#define SOCKNAL_NSCHEDS_HIGH	(SOCKNAL_NSCHEDS << 1)
			
 
				-
			
 
				-#define SOCKNAL_PEER_HASH_SIZE  101   /* # peer lists */
			
 
				-#define SOCKNAL_RESCHED         100   /* # scheduler loops before reschedule */
			
 
				-#define SOCKNAL_INSANITY_RECONN 5000  /* connd is trying on reconn infinitely */
			
 
				-#define SOCKNAL_ENOMEM_RETRY    1     /* jiffies between retries */
			
 
				-
			
 
				-#define SOCKNAL_SINGLE_FRAG_TX  0     /* disable multi-fragment sends */
			
 
				-#define SOCKNAL_SINGLE_FRAG_RX  0     /* disable multi-fragment receives */
			
 
				-
			
 
				-#define SOCKNAL_VERSION_DEBUG   0     /* enable protocol version debugging */
			
 
				-
			
 
				-/*
			
 
				- * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
			
 
				- * no risk if we're not running on a CONFIG_HIGHMEM platform.
			
 
				- */
			
 
				-#ifdef CONFIG_HIGHMEM
			
 
				-# define SOCKNAL_RISK_KMAP_DEADLOCK  0
			
 
				-#else
			
 
				-# define SOCKNAL_RISK_KMAP_DEADLOCK  1
			
 
				-#endif
			
 
				-
			
 
				-struct ksock_sched_info;
			
 
				-
			
 
				-struct ksock_sched {				/* per scheduler state */
			
 
				-	spinlock_t              kss_lock;       /* serialise */
			
 
				-	struct list_head        kss_rx_conns;   /* conn waiting to be read */
			
 
				-	struct list_head        kss_tx_conns;   /* conn waiting to be written */
			
 
				-	struct list_head        kss_zombie_noop_txs; /* zombie noop tx list */
			
 
				-	wait_queue_head_t       kss_waitq;	/* where scheduler sleeps */
			
 
				-	int                     kss_nconns;     /* # connections assigned to
			
 
				-						 * this scheduler
			
 
				-						 */
			
 
				-	struct ksock_sched_info *kss_info;	/* owner of it */
			
 
				-};
			
 
				-
			
 
				-struct ksock_sched_info {
			
 
				-	int                     ksi_nthreads_max; /* max allowed threads */
			
 
				-	int                     ksi_nthreads;     /* number of threads */
			
 
				-	int                     ksi_cpt;          /* CPT id */
			
 
				-	struct ksock_sched	*ksi_scheds;	  /* array of schedulers */
			
 
				-};
			
 
				-
			
 
				-#define KSOCK_CPT_SHIFT           16
			
 
				-#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
			
 
				-#define KSOCK_THREAD_CPT(id)      ((id) >> KSOCK_CPT_SHIFT)
			
 
				-#define KSOCK_THREAD_SID(id)      ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
			
 
				-
			
 
				-struct ksock_interface {			/* in-use interface */
			
 
				-	__u32		ksni_ipaddr;		/* interface's IP address */
			
 
				-	__u32		ksni_netmask;		/* interface's network mask */
			
 
				-	int		ksni_nroutes;		/* # routes using (active) */
			
 
				-	int		ksni_npeers;		/* # peers using (passive) */
			
 
				-	char		ksni_name[IFNAMSIZ];	/* interface name */
			
 
				-};
			
 
				-
			
 
				-struct ksock_tunables {
			
 
				-	int          *ksnd_timeout;            /* "stuck" socket timeout
			
 
				-						* (seconds)
			
 
				-						*/
			
 
				-	int          *ksnd_nscheds;            /* # scheduler threads in each
			
 
				-						* pool while starting
			
 
				-						*/
			
 
				-	int          *ksnd_nconnds;            /* # connection daemons */
			
 
				-	int          *ksnd_nconnds_max;        /* max # connection daemons */
			
 
				-	int          *ksnd_min_reconnectms;    /* first connection retry after
			
 
				-						* (ms)...
			
 
				-						*/
			
 
				-	int          *ksnd_max_reconnectms;    /* ...exponentially increasing to
			
 
				-						* this
			
 
				-						*/
			
 
				-	int          *ksnd_eager_ack;          /* make TCP ack eagerly? */
			
 
				-	int          *ksnd_typed_conns;        /* drive sockets by type? */
			
 
				-	int          *ksnd_min_bulk;           /* smallest "large" message */
			
 
				-	int          *ksnd_tx_buffer_size;     /* socket tx buffer size */
			
 
				-	int          *ksnd_rx_buffer_size;     /* socket rx buffer size */
			
 
				-	int          *ksnd_nagle;              /* enable NAGLE? */
			
 
				-	int          *ksnd_round_robin;        /* round robin for multiple
			
 
				-						* interfaces
			
 
				-						*/
			
 
				-	int          *ksnd_keepalive;          /* # secs for sending keepalive
			
 
				-						* NOOP
			
 
				-						*/
			
 
				-	int          *ksnd_keepalive_idle;     /* # idle secs before 1st probe
			
 
				-						*/
			
 
				-	int          *ksnd_keepalive_count;    /* # probes */
			
 
				-	int          *ksnd_keepalive_intvl;    /* time between probes */
			
 
				-	int          *ksnd_credits;            /* # concurrent sends */
			
 
				-	int          *ksnd_peertxcredits;      /* # concurrent sends to 1 peer
			
 
				-						*/
			
 
				-	int          *ksnd_peerrtrcredits;     /* # per-peer router buffer
			
 
				-						* credits
			
 
				-						*/
			
 
				-	int          *ksnd_peertimeout;        /* seconds to consider peer dead
			
 
				-						*/
			
 
				-	int          *ksnd_enable_csum;        /* enable check sum */
			
 
				-	int          *ksnd_inject_csum_error;  /* set non-zero to inject
			
 
				-						* checksum error
			
 
				-						*/
			
 
				-	int          *ksnd_nonblk_zcack;       /* always send zc-ack on
			
 
				-						* non-blocking connection
			
 
				-						*/
			
 
				-	unsigned int *ksnd_zc_min_payload;     /* minimum zero copy payload
			
 
				-						* size
			
 
				-						*/
			
 
				-	int          *ksnd_zc_recv;            /* enable ZC receive (for
			
 
				-						* Chelsio TOE)
			
 
				-						*/
			
 
				-	int          *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to
			
 
				-						* enable ZC receive
			
 
				-						*/
			
 
				-};
			
 
				-
			
 
				-struct ksock_net {
			
 
				-	__u64		  ksnn_incarnation;	/* my epoch */
			
 
				-	spinlock_t	  ksnn_lock;		/* serialise */
			
 
				-	struct list_head	  ksnn_list;		/* chain on global list */
			
 
				-	int		  ksnn_npeers;		/* # peers */
			
 
				-	int		  ksnn_shutdown;	/* shutting down? */
			
 
				-	int		  ksnn_ninterfaces;	/* IP interfaces */
			
 
				-	struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES];
			
 
				-};
			
 
				-
			
 
				-/** connd timeout */
			
 
				-#define SOCKNAL_CONND_TIMEOUT  120
			
 
				-/** reserved thread for accepting & creating new connd */
			
 
				-#define SOCKNAL_CONND_RESV     1
			
 
				-
			
 
				-struct ksock_nal_data {
			
 
				-	int                     ksnd_init;              /* initialisation state
			
 
				-							 */
			
 
				-	int                     ksnd_nnets;             /* # networks set up */
			
 
				-	struct list_head        ksnd_nets;              /* list of nets */
			
 
				-	rwlock_t                ksnd_global_lock;       /* stabilize peer/conn
			
 
				-							 * ops
			
 
				-							 */
			
 
				-	struct list_head        *ksnd_peers;            /* hash table of all my
			
 
				-							 * known peers
			
 
				-							 */
			
 
				-	int                     ksnd_peer_hash_size;    /* size of ksnd_peers */
			
 
				-
			
 
				-	int                     ksnd_nthreads;          /* # live threads */
			
 
				-	int                     ksnd_shuttingdown;      /* tell threads to exit
			
 
				-							 */
			
 
				-	struct ksock_sched_info **ksnd_sched_info;      /* schedulers info */
			
 
				-
			
 
				-	atomic_t                ksnd_nactive_txs;       /* #active txs */
			
 
				-
			
 
				-	struct list_head        ksnd_deathrow_conns;    /* conns to close:
			
 
				-							 * reaper_lock
			
 
				-							 */
			
 
				-	struct list_head        ksnd_zombie_conns;      /* conns to free:
			
 
				-							 * reaper_lock
			
 
				-							 */
			
 
				-	struct list_head        ksnd_enomem_conns;      /* conns to retry:
			
 
				-							 * reaper_lock
			
 
				-							 */
			
 
				-	wait_queue_head_t       ksnd_reaper_waitq;      /* reaper sleeps here */
			
 
				-	unsigned long	        ksnd_reaper_waketime;   /* when reaper will wake
			
 
				-							 */
			
 
				-	spinlock_t              ksnd_reaper_lock;       /* serialise */
			
 
				-
			
 
				-	int                     ksnd_enomem_tx;         /* test ENOMEM sender */
			
 
				-	int                     ksnd_stall_tx;          /* test sluggish sender
			
 
				-							 */
			
 
				-	int                     ksnd_stall_rx;          /* test sluggish
			
 
				-							 * receiver
			
 
				-							 */
			
 
				-	struct list_head        ksnd_connd_connreqs;    /* incoming connection
			
 
				-							 * requests
			
 
				-							 */
			
 
				-	struct list_head        ksnd_connd_routes;      /* routes waiting to be
			
 
				-							 * connected
			
 
				-							 */
			
 
				-	wait_queue_head_t       ksnd_connd_waitq;       /* connds sleep here */
			
 
				-	int                     ksnd_connd_connecting;  /* # connds connecting
			
 
				-							 */
			
 
				-	time64_t                ksnd_connd_failed_stamp;/* time stamp of the
			
 
				-							 * last failed
			
 
				-							 * connecting attempt
			
 
				-							 */
			
 
				-	time64_t                ksnd_connd_starting_stamp;/* time stamp of the
			
 
				-							   * last starting connd
			
 
				-							   */
			
 
				-	unsigned int		ksnd_connd_starting;	/* # starting connd */
			
 
				-	unsigned int		ksnd_connd_running;	/* # running connd */
			
 
				-	spinlock_t              ksnd_connd_lock;        /* serialise */
			
 
				-
			
 
				-	struct list_head        ksnd_idle_noop_txs;     /* list head for freed
			
 
				-							 * noop tx
			
 
				-							 */
			
 
				-	spinlock_t              ksnd_tx_lock;           /* serialise, g_lock
			
 
				-							 * unsafe
			
 
				-							 */
			
 
				-};
			
 
				-
			
 
				-#define SOCKNAL_INIT_NOTHING 0
			
 
				-#define SOCKNAL_INIT_DATA    1
			
 
				-#define SOCKNAL_INIT_ALL     2
			
 
				-
			
 
				-/*
			
 
				- * A packet just assembled for transmission is represented by 1 or more
			
 
				- * struct iovec fragments (the first frag contains the portals header),
			
 
				- * followed by 0 or more struct bio_vec fragments.
			
 
				- *
			
 
				- * On the receive side, initially 1 struct iovec fragment is posted for
			
 
				- * receive (the header).  Once the header has been received, the payload is
			
 
				- * received into either struct iovec or struct bio_vec fragments, depending on
			
 
				- * what the header matched or whether the message needs forwarding.
			
 
				- */
			
 
				-struct ksock_conn;  /* forward ref */
			
 
				-struct ksock_peer;  /* forward ref */
			
 
				-struct ksock_route; /* forward ref */
			
 
				-struct ksock_proto; /* forward ref */
			
 
				-
			
 
				-struct ksock_tx {			   /* transmit packet */
			
 
				-	struct list_head  tx_list;         /* queue on conn for transmission etc
			
 
				-					    */
			
 
				-	struct list_head  tx_zc_list;      /* queue on peer for ZC request */
			
 
				-	atomic_t          tx_refcount;     /* tx reference count */
			
 
				-	int               tx_nob;          /* # packet bytes */
			
 
				-	int               tx_resid;        /* residual bytes */
			
 
				-	int               tx_niov;         /* # packet iovec frags */
			
 
				-	struct kvec       *tx_iov;         /* packet iovec frags */
			
 
				-	int               tx_nkiov;        /* # packet page frags */
			
 
				-	unsigned short    tx_zc_aborted;   /* aborted ZC request */
			
 
				-	unsigned short    tx_zc_capable:1; /* payload is large enough for ZC */
			
 
				-	unsigned short    tx_zc_checked:1; /* Have I checked if I should ZC? */
			
 
				-	unsigned short    tx_nonblk:1;     /* it's a non-blocking ACK */
			
 
				-	struct bio_vec	  *tx_kiov;	   /* packet page frags */
			
 
				-	struct ksock_conn *tx_conn;        /* owning conn */
			
 
				-	struct lnet_msg        *tx_lnetmsg;     /* lnet message for lnet_finalize()
			
 
				-					    */
			
 
				-	unsigned long     tx_deadline;     /* when (in jiffies) tx times out */
			
 
				-	struct ksock_msg       tx_msg;          /* socklnd message buffer */
			
 
				-	int               tx_desc_size;    /* size of this descriptor */
			
 
				-	union {
			
 
				-		struct {
			
 
				-			struct kvec iov;     /* virt hdr */
			
 
				-			struct bio_vec kiov[0]; /* paged payload */
			
 
				-		} paged;
			
 
				-		struct {
			
 
				-			struct kvec iov[1];  /* virt hdr + payload */
			
 
				-		} virt;
			
 
				-	} tx_frags;
			
 
				-};
			
 
				-
			
 
				-#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0]))
			
 
				-
			
 
				-/* network zero copy callback descriptor embedded in struct ksock_tx */
			
 
				-
			
 
				-#define SOCKNAL_RX_KSM_HEADER   1 /* reading ksock message header */
			
 
				-#define SOCKNAL_RX_LNET_HEADER  2 /* reading lnet message header */
			
 
				-#define SOCKNAL_RX_PARSE        3 /* Calling lnet_parse() */
			
 
				-#define SOCKNAL_RX_PARSE_WAIT   4 /* waiting to be told to read the body */
			
 
				-#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
			
 
				-#define SOCKNAL_RX_SLOP         6 /* skipping body */
			
 
				-
			
 
				-struct ksock_conn {
			
 
				-	struct ksock_peer  *ksnc_peer;        /* owning peer */
			
 
				-	struct ksock_route *ksnc_route;       /* owning route */
			
 
				-	struct list_head   ksnc_list;         /* stash on peer's conn list */
			
 
				-	struct socket      *ksnc_sock;        /* actual socket */
			
 
				-	void               *ksnc_saved_data_ready;  /* socket's original
			
 
				-						     * data_ready() callback
			
 
				-						     */
			
 
				-	void               *ksnc_saved_write_space; /* socket's original
			
 
				-						     * write_space() callback
			
 
				-						     */
			
 
				-	atomic_t           ksnc_conn_refcount;/* conn refcount */
			
 
				-	atomic_t           ksnc_sock_refcount;/* sock refcount */
			
 
				-	struct ksock_sched *ksnc_scheduler;	/* who schedules this connection
			
 
				-						 */
			
 
				-	__u32              ksnc_myipaddr;     /* my IP */
			
 
				-	__u32              ksnc_ipaddr;       /* peer's IP */
			
 
				-	int                ksnc_port;         /* peer's port */
			
 
				-	signed int         ksnc_type:3;       /* type of connection, should be
			
 
				-					       * signed value
			
 
				-					       */
			
 
				-	unsigned int       ksnc_closing:1;    /* being shut down */
			
 
				-	unsigned int       ksnc_flip:1;       /* flip or not, only for V2.x */
			
 
				-	unsigned int       ksnc_zc_capable:1; /* enable to ZC */
			
 
				-	struct ksock_proto *ksnc_proto;       /* protocol for the connection */
			
 
				-
			
 
				-	/* reader */
			
 
				-	struct list_head   ksnc_rx_list;      /* where I enq waiting input or a
			
 
				-					       * forwarding descriptor
			
 
				-					       */
			
 
				-	unsigned long      ksnc_rx_deadline;  /* when (in jiffies) receive times
			
 
				-					       * out
			
 
				-					       */
			
 
				-	__u8               ksnc_rx_started;   /* started receiving a message */
			
 
				-	__u8               ksnc_rx_ready;     /* data ready to read */
			
 
				-	__u8               ksnc_rx_scheduled; /* being progressed */
			
 
				-	__u8               ksnc_rx_state;     /* what is being read */
			
 
				-	int                ksnc_rx_nob_left;  /* # bytes to next hdr/body */
			
 
				-	struct iov_iter    ksnc_rx_to;		/* copy destination */
			
 
				-	struct kvec        ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */
			
 
				-	__u32              ksnc_rx_csum;      /* partial checksum for incoming
			
 
				-					       * data
			
 
				-					       */
			
 
				-	void               *ksnc_cookie;      /* rx lnet_finalize passthru arg
			
 
				-					       */
			
 
				-	struct ksock_msg        ksnc_msg;          /* incoming message buffer:
			
 
				-					       * V2.x message takes the
			
 
				-					       * whole struct
			
 
				-					       * V1.x message is a bare
			
 
				-					       * struct lnet_hdr, it's stored in
			
 
				-					       * ksnc_msg.ksm_u.lnetmsg
			
 
				-					       */
			
 
				-	/* WRITER */
			
 
				-	struct list_head   ksnc_tx_list;      /* where I enq waiting for output
			
 
				-					       * space
			
 
				-					       */
			
 
				-	struct list_head   ksnc_tx_queue;     /* packets waiting to be sent */
			
 
				-	struct ksock_tx	  *ksnc_tx_carrier;   /* next TX that can carry a LNet
			
 
				-					       * message or ZC-ACK
			
 
				-					       */
			
 
				-	unsigned long      ksnc_tx_deadline;  /* when (in jiffies) tx times out
			
 
				-					       */
			
 
				-	int                ksnc_tx_bufnob;    /* send buffer marker */
			
 
				-	atomic_t           ksnc_tx_nob;       /* # bytes queued */
			
 
				-	int		   ksnc_tx_ready;     /* write space */
			
 
				-	int		   ksnc_tx_scheduled; /* being progressed */
			
 
				-	unsigned long      ksnc_tx_last_post; /* time stamp of the last posted
			
 
				-					       * TX
			
 
				-					       */
			
 
				-};
			
 
				-
			
 
				-struct ksock_route {
			
 
				-	struct list_head  ksnr_list;           /* chain on peer route list */
			
 
				-	struct list_head  ksnr_connd_list;     /* chain on ksnr_connd_routes */
			
 
				-	struct ksock_peer *ksnr_peer;          /* owning peer */
			
 
				-	atomic_t          ksnr_refcount;       /* # users */
			
 
				-	unsigned long     ksnr_timeout;        /* when (in jiffies) reconnection
			
 
				-						* can happen next
			
 
				-						*/
			
 
				-	long              ksnr_retry_interval; /* how long between retries */
			
 
				-	__u32             ksnr_myipaddr;       /* my IP */
			
 
				-	__u32             ksnr_ipaddr;         /* IP address to connect to */
			
 
				-	int               ksnr_port;           /* port to connect to */
			
 
				-	unsigned int      ksnr_scheduled:1;    /* scheduled for attention */
			
 
				-	unsigned int      ksnr_connecting:1;   /* connection establishment in
			
 
				-						* progress
			
 
				-						*/
			
 
				-	unsigned int      ksnr_connected:4;    /* connections established by
			
 
				-						* type
			
 
				-						*/
			
 
				-	unsigned int      ksnr_deleted:1;      /* been removed from peer? */
			
 
				-	unsigned int      ksnr_share_count;    /* created explicitly? */
			
 
				-	int               ksnr_conn_count;     /* # conns established by this
			
 
				-						* route
			
 
				-						*/
			
 
				-};
			
 
				-
			
 
				-#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
			
 
				-
			
 
				-struct ksock_peer {
			
 
				-	struct list_head   ksnp_list;           /* stash on global peer list */
			
 
				-	unsigned long      ksnp_last_alive;     /* when (in jiffies) I was last
			
 
				-						 * alive
			
 
				-						 */
			
 
				-	struct lnet_process_id  ksnp_id;	/* who's on the other end(s) */
			
 
				-	atomic_t           ksnp_refcount;       /* # users */
			
 
				-	int                ksnp_sharecount;     /* lconf usage counter */
			
 
				-	int                ksnp_closing;        /* being closed */
			
 
				-	int                ksnp_accepting;      /* # passive connections pending
			
 
				-						 */
			
 
				-	int                ksnp_error;          /* errno on closing last conn */
			
 
				-	__u64              ksnp_zc_next_cookie; /* ZC completion cookie */
			
 
				-	__u64              ksnp_incarnation;    /* latest known peer incarnation
			
 
				-						 */
			
 
				-	struct ksock_proto *ksnp_proto;         /* latest known peer protocol */
			
 
				-	struct list_head   ksnp_conns;          /* all active connections */
			
 
				-	struct list_head   ksnp_routes;         /* routes */
			
 
				-	struct list_head   ksnp_tx_queue;       /* waiting packets */
			
 
				-	spinlock_t         ksnp_lock;           /* serialize, g_lock unsafe */
			
 
				-	struct list_head   ksnp_zc_req_list;    /* zero copy requests wait for
			
 
				-						 * ACK
			
 
				-						 */
			
 
				-	unsigned long      ksnp_send_keepalive; /* time to send keepalive */
			
 
				-	struct lnet_ni	   *ksnp_ni;		/* which network */
			
 
				-	int                ksnp_n_passive_ips;  /* # of... */
			
 
				-
			
 
				-	/* preferred local interfaces */
			
 
				-	__u32              ksnp_passive_ips[LNET_MAX_INTERFACES];
			
 
				-};
			
 
				-
			
 
				-struct ksock_connreq {
			
 
				-	struct list_head ksncr_list;  /* stash on ksnd_connd_connreqs */
			
 
				-	struct lnet_ni	 *ksncr_ni;	/* chosen NI */
			
 
				-	struct socket    *ksncr_sock; /* accepted socket */
			
 
				-};
			
 
				-
			
 
				-extern struct ksock_nal_data ksocknal_data;
			
 
				-extern struct ksock_tunables ksocknal_tunables;
			
 
				-
			
 
				-#define SOCKNAL_MATCH_NO  0 /* TX can't match type of connection */
			
 
				-#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
			
 
				-#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not
			
 
				-			     * preferred
			
 
				-			     */
			
 
				-
			
 
				-struct ksock_proto {
			
 
				-	/* version number of protocol */
			
 
				-	int        pro_version;
			
 
				-
			
 
				-	/* handshake function */
			
 
				-	int        (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *);
			
 
				-
			
 
				-	/* handshake function */
			
 
				-	int        (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int);
			
 
				-
			
 
				-	/* message pack */
			
 
				-	void       (*pro_pack)(struct ksock_tx *);
			
 
				-
			
 
				-	/* message unpack */
			
 
				-	void       (*pro_unpack)(struct ksock_msg *);
			
 
				-
			
 
				-	/* queue tx on the connection */
			
 
				-	struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *);
			
 
				-
			
 
				-	/* queue ZC ack on the connection */
			
 
				-	int        (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64);
			
 
				-
			
 
				-	/* handle ZC request */
			
 
				-	int        (*pro_handle_zcreq)(struct ksock_conn *, __u64, int);
			
 
				-
			
 
				-	/* handle ZC ACK */
			
 
				-	int        (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64);
			
 
				-
			
 
				-	/*
			
 
				-	 * msg type matches the connection type:
			
 
				-	 * return value:
			
 
				-	 *   return MATCH_NO  : no
			
 
				-	 *   return MATCH_YES : matching type
			
 
				-	 *   return MATCH_MAY : can be backup
			
 
				-	 */
			
 
				-	int        (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int);
			
 
				-};
			
 
				-
			
 
				-extern struct ksock_proto ksocknal_protocol_v1x;
			
 
				-extern struct ksock_proto ksocknal_protocol_v2x;
			
 
				-extern struct ksock_proto ksocknal_protocol_v3x;
			
 
				-
			
 
				-#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
			
 
				-#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
			
 
				-#define KSOCK_PROTO_V1       KSOCK_PROTO_V1_MAJOR
			
 
				-
			
 
				-#ifndef CPU_MASK_NONE
			
 
				-#define CPU_MASK_NONE   0UL
			
 
				-#endif
			
 
				-
			
 
				-static inline int
			
 
				-ksocknal_route_mask(void)
			
 
				-{
			
 
				-	if (!*ksocknal_tunables.ksnd_typed_conns)
			
 
				-		return (1 << SOCKLND_CONN_ANY);
			
 
				-
			
 
				-	return ((1 << SOCKLND_CONN_CONTROL) |
			
 
				-		(1 << SOCKLND_CONN_BULK_IN) |
			
 
				-		(1 << SOCKLND_CONN_BULK_OUT));
			
 
				-}
			
 
				-
			
 
				-static inline struct list_head *
			
 
				-ksocknal_nid2peerlist(lnet_nid_t nid)
			
 
				-{
			
 
				-	unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
			
 
				-
			
 
				-	return &ksocknal_data.ksnd_peers[hash];
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_conn_addref(struct ksock_conn *conn)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
			
 
				-	atomic_inc(&conn->ksnc_conn_refcount);
			
 
				-}
			
 
				-
			
 
				-void ksocknal_queue_zombie_conn(struct ksock_conn *conn);
			
 
				-void ksocknal_finalize_zcreq(struct ksock_conn *conn);
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_conn_decref(struct ksock_conn *conn)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
			
 
				-	if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
			
 
				-		ksocknal_queue_zombie_conn(conn);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-ksocknal_connsock_addref(struct ksock_conn *conn)
			
 
				-{
			
 
				-	int rc = -ESHUTDOWN;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-	if (!conn->ksnc_closing) {
			
 
				-		LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
			
 
				-		atomic_inc(&conn->ksnc_sock_refcount);
			
 
				-		rc = 0;
			
 
				-	}
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_connsock_decref(struct ksock_conn *conn)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
			
 
				-	if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
			
 
				-		LASSERT(conn->ksnc_closing);
			
 
				-		sock_release(conn->ksnc_sock);
			
 
				-		conn->ksnc_sock = NULL;
			
 
				-		ksocknal_finalize_zcreq(conn);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_tx_addref(struct ksock_tx *tx)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&tx->tx_refcount) > 0);
			
 
				-	atomic_inc(&tx->tx_refcount);
			
 
				-}
			
 
				-
			
 
				-void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx);
			
 
				-void ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx);
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_tx_decref(struct ksock_tx *tx)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&tx->tx_refcount) > 0);
			
 
				-	if (atomic_dec_and_test(&tx->tx_refcount))
			
 
				-		ksocknal_tx_done(NULL, tx);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_route_addref(struct ksock_route *route)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&route->ksnr_refcount) > 0);
			
 
				-	atomic_inc(&route->ksnr_refcount);
			
 
				-}
			
 
				-
			
 
				-void ksocknal_destroy_route(struct ksock_route *route);
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_route_decref(struct ksock_route *route)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&route->ksnr_refcount) > 0);
			
 
				-	if (atomic_dec_and_test(&route->ksnr_refcount))
			
 
				-		ksocknal_destroy_route(route);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_peer_addref(struct ksock_peer *peer)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
			
 
				-	atomic_inc(&peer->ksnp_refcount);
			
 
				-}
			
 
				-
			
 
				-void ksocknal_destroy_peer(struct ksock_peer *peer);
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_peer_decref(struct ksock_peer *peer)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
			
 
				-	if (atomic_dec_and_test(&peer->ksnp_refcount))
			
 
				-		ksocknal_destroy_peer(peer);
			
 
				-}
			
 
				-
			
 
				-int ksocknal_startup(struct lnet_ni *ni);
			
 
				-void ksocknal_shutdown(struct lnet_ni *ni);
			
 
				-int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
			
 
				-int ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
			
 
				-int ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
			
 
				-		  int delayed, struct iov_iter *to, unsigned int rlen);
			
 
				-int ksocknal_accept(struct lnet_ni *ni, struct socket *sock);
			
 
				-
			
 
				-int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip,
			
 
				-		      int port);
			
 
				-struct ksock_peer *ksocknal_find_peer_locked(struct lnet_ni *ni,
			
 
				-					     struct lnet_process_id id);
			
 
				-struct ksock_peer *ksocknal_find_peer(struct lnet_ni *ni,
			
 
				-				      struct lnet_process_id id);
			
 
				-void ksocknal_peer_failed(struct ksock_peer *peer);
			
 
				-int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
			
 
				-			 struct socket *sock, int type);
			
 
				-void ksocknal_close_conn_locked(struct ksock_conn *conn, int why);
			
 
				-void ksocknal_terminate_conn(struct ksock_conn *conn);
			
 
				-void ksocknal_destroy_conn(struct ksock_conn *conn);
			
 
				-int  ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
			
 
				-				      __u32 ipaddr, int why);
			
 
				-int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
			
 
				-int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr);
			
 
				-struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
			
 
				-					     struct ksock_tx *tx, int nonblk);
			
 
				-
			
 
				-int  ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
			
 
				-			    struct lnet_process_id id);
			
 
				-struct ksock_tx *ksocknal_alloc_tx(int type, int size);
			
 
				-void ksocknal_free_tx(struct ksock_tx *tx);
			
 
				-struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
			
 
				-void ksocknal_next_tx_carrier(struct ksock_conn *conn);
			
 
				-void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
			
 
				-void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error);
			
 
				-void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive);
			
 
				-void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
			
 
				-int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
			
 
				-void ksocknal_thread_fini(void);
			
 
				-void ksocknal_launch_all_connections_locked(struct ksock_peer *peer);
			
 
				-struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer);
			
 
				-struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer);
			
 
				-int ksocknal_new_packet(struct ksock_conn *conn, int skip);
			
 
				-int ksocknal_scheduler(void *arg);
			
 
				-int ksocknal_connd(void *arg);
			
 
				-int ksocknal_reaper(void *arg);
			
 
				-int ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
			
 
				-			lnet_nid_t peer_nid, struct ksock_hello_msg *hello);
			
 
				-int ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
			
 
				-			struct ksock_hello_msg *hello,
			
 
				-			struct lnet_process_id *id,
			
 
				-			__u64 *incarnation);
			
 
				-void ksocknal_read_callback(struct ksock_conn *conn);
			
 
				-void ksocknal_write_callback(struct ksock_conn *conn);
			
 
				-
			
 
				-int ksocknal_lib_zc_capable(struct ksock_conn *conn);
			
 
				-void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn);
			
 
				-void ksocknal_lib_set_callback(struct socket *sock,  struct ksock_conn *conn);
			
 
				-void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn);
			
 
				-void ksocknal_lib_push_conn(struct ksock_conn *conn);
			
 
				-int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
			
 
				-int ksocknal_lib_setup_sock(struct socket *so);
			
 
				-int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
			
 
				-int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
			
 
				-void ksocknal_lib_eager_ack(struct ksock_conn *conn);
			
 
				-int ksocknal_lib_recv(struct ksock_conn *conn);
			
 
				-int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
			
 
				-				   int *rxmem, int *nagle);
			
 
				-
			
 
				-void ksocknal_read_callback(struct ksock_conn *conn);
			
 
				-void ksocknal_write_callback(struct ksock_conn *conn);
			
 
				-
			
 
				-int ksocknal_tunables_init(void);
			
 
				-
			
 
				-void ksocknal_lib_csum_tx(struct ksock_tx *tx);
			
 
				-
			
 
				-int ksocknal_lib_memory_pressure(struct ksock_conn *conn);
			
 
				-int ksocknal_lib_bind_thread_to_cpu(int id);
			
 
				-
			
 
				-#endif /* _SOCKLND_SOCKLND_H_ */
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -1,2586 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- *
			
 
				- *   Author: Zach Brown <zab@zabbo.net>
			
 
				- *   Author: Peter J. Braam <braam@clusterfs.com>
			
 
				- *   Author: Phil Schwan <phil@clusterfs.com>
			
 
				- *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- *
			
 
				- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#include <linux/sched/mm.h>
			
 
				-#include "socklnd.h"
			
 
				-
			
 
				-struct ksock_tx *
			
 
				-ksocknal_alloc_tx(int type, int size)
			
 
				-{
			
 
				-	struct ksock_tx *tx = NULL;
			
 
				-
			
 
				-	if (type == KSOCK_MSG_NOOP) {
			
 
				-		LASSERT(size == KSOCK_NOOP_TX_SIZE);
			
 
				-
			
 
				-		/* searching for a noop tx in free list */
			
 
				-		spin_lock(&ksocknal_data.ksnd_tx_lock);
			
 
				-
			
 
				-		if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
			
 
				-			tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
			
 
				-					struct ksock_tx, tx_list);
			
 
				-			LASSERT(tx->tx_desc_size == size);
			
 
				-			list_del(&tx->tx_list);
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				-	}
			
 
				-
			
 
				-	if (!tx)
			
 
				-		tx = kzalloc(size, GFP_NOFS);
			
 
				-
			
 
				-	if (!tx)
			
 
				-		return NULL;
			
 
				-
			
 
				-	atomic_set(&tx->tx_refcount, 1);
			
 
				-	tx->tx_zc_aborted = 0;
			
 
				-	tx->tx_zc_capable = 0;
			
 
				-	tx->tx_zc_checked = 0;
			
 
				-	tx->tx_desc_size  = size;
			
 
				-
			
 
				-	atomic_inc(&ksocknal_data.ksnd_nactive_txs);
			
 
				-
			
 
				-	return tx;
			
 
				-}
			
 
				-
			
 
				-struct ksock_tx *
			
 
				-ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
			
 
				-{
			
 
				-	struct ksock_tx *tx;
			
 
				-
			
 
				-	tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
			
 
				-	if (!tx) {
			
 
				-		CERROR("Can't allocate noop tx desc\n");
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_conn    = NULL;
			
 
				-	tx->tx_lnetmsg = NULL;
			
 
				-	tx->tx_kiov    = NULL;
			
 
				-	tx->tx_nkiov   = 0;
			
 
				-	tx->tx_iov     = tx->tx_frags.virt.iov;
			
 
				-	tx->tx_niov    = 1;
			
 
				-	tx->tx_nonblk  = nonblk;
			
 
				-
			
 
				-	tx->tx_msg.ksm_csum = 0;
			
 
				-	tx->tx_msg.ksm_type = KSOCK_MSG_NOOP;
			
 
				-	tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				-	tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				-
			
 
				-	return tx;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_free_tx(struct ksock_tx *tx)
			
 
				-{
			
 
				-	atomic_dec(&ksocknal_data.ksnd_nactive_txs);
			
 
				-
			
 
				-	if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
			
 
				-		/* it's a noop tx */
			
 
				-		spin_lock(&ksocknal_data.ksnd_tx_lock);
			
 
				-
			
 
				-		list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
			
 
				-
			
 
				-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				-	} else {
			
 
				-		kfree(tx);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct kvec *iov = tx->tx_iov;
			
 
				-	int nob;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(tx->tx_niov > 0);
			
 
				-
			
 
				-	/* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
			
 
				-	rc = ksocknal_lib_send_iov(conn, tx);
			
 
				-
			
 
				-	if (rc <= 0)			    /* sent nothing? */
			
 
				-		return rc;
			
 
				-
			
 
				-	nob = rc;
			
 
				-	LASSERT(nob <= tx->tx_resid);
			
 
				-	tx->tx_resid -= nob;
			
 
				-
			
 
				-	/* "consume" iov */
			
 
				-	do {
			
 
				-		LASSERT(tx->tx_niov > 0);
			
 
				-
			
 
				-		if (nob < (int)iov->iov_len) {
			
 
				-			iov->iov_base = (void *)((char *)iov->iov_base + nob);
			
 
				-			iov->iov_len -= nob;
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		nob -= iov->iov_len;
			
 
				-		tx->tx_iov = ++iov;
			
 
				-		tx->tx_niov--;
			
 
				-	} while (nob);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct bio_vec *kiov = tx->tx_kiov;
			
 
				-	int nob;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(!tx->tx_niov);
			
 
				-	LASSERT(tx->tx_nkiov > 0);
			
 
				-
			
 
				-	/* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
			
 
				-	rc = ksocknal_lib_send_kiov(conn, tx);
			
 
				-
			
 
				-	if (rc <= 0)			    /* sent nothing? */
			
 
				-		return rc;
			
 
				-
			
 
				-	nob = rc;
			
 
				-	LASSERT(nob <= tx->tx_resid);
			
 
				-	tx->tx_resid -= nob;
			
 
				-
			
 
				-	/* "consume" kiov */
			
 
				-	do {
			
 
				-		LASSERT(tx->tx_nkiov > 0);
			
 
				-
			
 
				-		if (nob < (int)kiov->bv_len) {
			
 
				-			kiov->bv_offset += nob;
			
 
				-			kiov->bv_len -= nob;
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		nob -= (int)kiov->bv_len;
			
 
				-		tx->tx_kiov = ++kiov;
			
 
				-		tx->tx_nkiov--;
			
 
				-	} while (nob);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	int rc;
			
 
				-	int bufnob;
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_stall_tx) {
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ);
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(tx->tx_resid);
			
 
				-
			
 
				-	rc = ksocknal_connsock_addref(conn);
			
 
				-	if (rc) {
			
 
				-		LASSERT(conn->ksnc_closing);
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	do {
			
 
				-		if (ksocknal_data.ksnd_enomem_tx > 0) {
			
 
				-			/* testing... */
			
 
				-			ksocknal_data.ksnd_enomem_tx--;
			
 
				-			rc = -EAGAIN;
			
 
				-		} else if (tx->tx_niov) {
			
 
				-			rc = ksocknal_send_iov(conn, tx);
			
 
				-		} else {
			
 
				-			rc = ksocknal_send_kiov(conn, tx);
			
 
				-		}
			
 
				-
			
 
				-		bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
			
 
				-		if (rc > 0)		     /* sent something? */
			
 
				-			conn->ksnc_tx_bufnob += rc; /* account it */
			
 
				-
			
 
				-		if (bufnob < conn->ksnc_tx_bufnob) {
			
 
				-			/*
			
 
				-			 * allocated send buffer bytes < computed; infer
			
 
				-			 * something got ACKed
			
 
				-			 */
			
 
				-			conn->ksnc_tx_deadline =
			
 
				-				jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-			conn->ksnc_peer->ksnp_last_alive = jiffies;
			
 
				-			conn->ksnc_tx_bufnob = bufnob;
			
 
				-			mb();
			
 
				-		}
			
 
				-
			
 
				-		if (rc <= 0) { /* Didn't write anything? */
			
 
				-
			
 
				-			if (!rc) /* some stacks return 0 instead of -EAGAIN */
			
 
				-				rc = -EAGAIN;
			
 
				-
			
 
				-			/* Check if EAGAIN is due to memory pressure */
			
 
				-			if (rc == -EAGAIN && ksocknal_lib_memory_pressure(conn))
			
 
				-				rc = -ENOMEM;
			
 
				-
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* socket's wmem_queued now includes 'rc' bytes */
			
 
				-		atomic_sub(rc, &conn->ksnc_tx_nob);
			
 
				-		rc = 0;
			
 
				-
			
 
				-	} while (tx->tx_resid);
			
 
				-
			
 
				-	ksocknal_connsock_decref(conn);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_recv_iter(struct ksock_conn *conn)
			
 
				-{
			
 
				-	int nob;
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * Never touch conn->ksnc_rx_to or change connection
			
 
				-	 * status inside ksocknal_lib_recv
			
 
				-	 */
			
 
				-	rc = ksocknal_lib_recv(conn);
			
 
				-
			
 
				-	if (rc <= 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	/* received something... */
			
 
				-	nob = rc;
			
 
				-
			
 
				-	conn->ksnc_peer->ksnp_last_alive = jiffies;
			
 
				-	conn->ksnc_rx_deadline =
			
 
				-		jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-	mb();		       /* order with setting rx_started */
			
 
				-	conn->ksnc_rx_started = 1;
			
 
				-
			
 
				-	conn->ksnc_rx_nob_left -= nob;
			
 
				-
			
 
				-	iov_iter_advance(&conn->ksnc_rx_to, nob);
			
 
				-	if (iov_iter_count(&conn->ksnc_rx_to))
			
 
				-		return -EAGAIN;
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_receive(struct ksock_conn *conn)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Return 1 on success, 0 on EOF, < 0 on error.
			
 
				-	 * Caller checks ksnc_rx_to to determine
			
 
				-	 * progress/completion.
			
 
				-	 */
			
 
				-	int rc;
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_stall_rx) {
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ);
			
 
				-	}
			
 
				-
			
 
				-	rc = ksocknal_connsock_addref(conn);
			
 
				-	if (rc) {
			
 
				-		LASSERT(conn->ksnc_closing);
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	for (;;) {
			
 
				-		rc = ksocknal_recv_iter(conn);
			
 
				-		if (rc <= 0) {
			
 
				-			/* error/EOF or partial receive */
			
 
				-			if (rc == -EAGAIN) {
			
 
				-				rc = 1;
			
 
				-			} else if (!rc && conn->ksnc_rx_started) {
			
 
				-				/* EOF in the middle of a message */
			
 
				-				rc = -EPROTO;
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* Completed a fragment */
			
 
				-
			
 
				-		if (!iov_iter_count(&conn->ksnc_rx_to)) {
			
 
				-			rc = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_connsock_decref(conn);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct lnet_msg *lnetmsg = tx->tx_lnetmsg;
			
 
				-	int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO;
			
 
				-
			
 
				-	LASSERT(ni || tx->tx_conn);
			
 
				-
			
 
				-	if (tx->tx_conn)
			
 
				-		ksocknal_conn_decref(tx->tx_conn);
			
 
				-
			
 
				-	if (!ni && tx->tx_conn)
			
 
				-		ni = tx->tx_conn->ksnc_peer->ksnp_ni;
			
 
				-
			
 
				-	ksocknal_free_tx(tx);
			
 
				-	if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */
			
 
				-		lnet_finalize(ni, lnetmsg, rc);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error)
			
 
				-{
			
 
				-	struct ksock_tx *tx;
			
 
				-
			
 
				-	while (!list_empty(txlist)) {
			
 
				-		tx = list_entry(txlist->next, struct ksock_tx, tx_list);
			
 
				-
			
 
				-		if (error && tx->tx_lnetmsg) {
			
 
				-			CNETERR("Deleting packet type %d len %d %s->%s\n",
			
 
				-				le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type),
			
 
				-				le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length),
			
 
				-				libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
			
 
				-				libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
			
 
				-		} else if (error) {
			
 
				-			CNETERR("Deleting noop packet\n");
			
 
				-		}
			
 
				-
			
 
				-		list_del(&tx->tx_list);
			
 
				-
			
 
				-		LASSERT(atomic_read(&tx->tx_refcount) == 1);
			
 
				-		ksocknal_tx_done(ni, tx);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_check_zc_req(struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct ksock_conn *conn = tx->tx_conn;
			
 
				-	struct ksock_peer *peer = conn->ksnc_peer;
			
 
				-
			
 
				-	/*
			
 
				-	 * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
			
 
				-	 * to ksnp_zc_req_list if some fragment of this message should be sent
			
 
				-	 * zero-copy.  Our peer will send an ACK containing this cookie when
			
 
				-	 * she has received this message to tell us we can signal completion.
			
 
				-	 * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
			
 
				-	 * ksnp_zc_req_list.
			
 
				-	 */
			
 
				-	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				-	LASSERT(tx->tx_zc_capable);
			
 
				-
			
 
				-	tx->tx_zc_checked = 1;
			
 
				-
			
 
				-	if (conn->ksnc_proto == &ksocknal_protocol_v1x ||
			
 
				-	    !conn->ksnc_zc_capable)
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * assign cookie and queue tx to pending list, it will be released when
			
 
				-	 * a matching ack is received. See ksocknal_handle_zcack()
			
 
				-	 */
			
 
				-	ksocknal_tx_addref(tx);
			
 
				-
			
 
				-	spin_lock(&peer->ksnp_lock);
			
 
				-
			
 
				-	/* ZC_REQ is going to be pinned to the peer */
			
 
				-	tx->tx_deadline =
			
 
				-		jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-
			
 
				-	LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
			
 
				-
			
 
				-	tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
			
 
				-
			
 
				-	if (!peer->ksnp_zc_next_cookie)
			
 
				-		peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
			
 
				-
			
 
				-	list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
			
 
				-
			
 
				-	spin_unlock(&peer->ksnp_lock);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_uncheck_zc_req(struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct ksock_peer *peer = tx->tx_conn->ksnc_peer;
			
 
				-
			
 
				-	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				-	LASSERT(tx->tx_zc_capable);
			
 
				-
			
 
				-	tx->tx_zc_checked = 0;
			
 
				-
			
 
				-	spin_lock(&peer->ksnp_lock);
			
 
				-
			
 
				-	if (!tx->tx_msg.ksm_zc_cookies[0]) {
			
 
				-		/* Not waiting for an ACK */
			
 
				-		spin_unlock(&peer->ksnp_lock);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				-	list_del(&tx->tx_zc_list);
			
 
				-
			
 
				-	spin_unlock(&peer->ksnp_lock);
			
 
				-
			
 
				-	ksocknal_tx_decref(tx);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	if (tx->tx_zc_capable && !tx->tx_zc_checked)
			
 
				-		ksocknal_check_zc_req(tx);
			
 
				-
			
 
				-	rc = ksocknal_transmit(conn, tx);
			
 
				-
			
 
				-	CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc);
			
 
				-
			
 
				-	if (!tx->tx_resid) {
			
 
				-		/* Sent everything OK */
			
 
				-		LASSERT(!rc);
			
 
				-
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (rc == -EAGAIN)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (rc == -ENOMEM) {
			
 
				-		static int counter;
			
 
				-
			
 
				-		counter++;   /* exponential backoff warnings */
			
 
				-		if ((counter & (-counter)) == counter)
			
 
				-			CWARN("%u ENOMEM tx %p\n", counter, conn);
			
 
				-
			
 
				-		/* Queue on ksnd_enomem_conns for retry after a timeout */
			
 
				-		spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-		/* enomem list takes over scheduler's ref... */
			
 
				-		LASSERT(conn->ksnc_tx_scheduled);
			
 
				-		list_add_tail(&conn->ksnc_tx_list,
			
 
				-			      &ksocknal_data.ksnd_enomem_conns);
			
 
				-		if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY,
			
 
				-				   ksocknal_data.ksnd_reaper_waketime))
			
 
				-			wake_up(&ksocknal_data.ksnd_reaper_waitq);
			
 
				-
			
 
				-		spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* Actual error */
			
 
				-	LASSERT(rc < 0);
			
 
				-
			
 
				-	if (!conn->ksnc_closing) {
			
 
				-		switch (rc) {
			
 
				-		case -ECONNRESET:
			
 
				-			LCONSOLE_WARN("Host %pI4h reset our connection while we were sending data; it may have rebooted.\n",
			
 
				-				      &conn->ksnc_ipaddr);
			
 
				-			break;
			
 
				-		default:
			
 
				-			LCONSOLE_WARN("There was an unexpected network error while writing to %pI4h: %d.\n",
			
 
				-				      &conn->ksnc_ipaddr, rc);
			
 
				-			break;
			
 
				-		}
			
 
				-		CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pI4h:%d\n",
			
 
				-		       conn, rc,
			
 
				-		       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-		       &conn->ksnc_ipaddr,
			
 
				-		       conn->ksnc_port);
			
 
				-	}
			
 
				-
			
 
				-	if (tx->tx_zc_checked)
			
 
				-		ksocknal_uncheck_zc_req(tx);
			
 
				-
			
 
				-	/* it's not an error if conn is being closed */
			
 
				-	ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_launch_connection_locked(struct ksock_route *route)
			
 
				-{
			
 
				-	/* called holding write lock on ksnd_global_lock */
			
 
				-
			
 
				-	LASSERT(!route->ksnr_scheduled);
			
 
				-	LASSERT(!route->ksnr_connecting);
			
 
				-	LASSERT(ksocknal_route_mask() & ~route->ksnr_connected);
			
 
				-
			
 
				-	route->ksnr_scheduled = 1;	      /* scheduling conn for connd */
			
 
				-	ksocknal_route_addref(route);	   /* extra ref for connd */
			
 
				-
			
 
				-	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-
			
 
				-	list_add_tail(&route->ksnr_connd_list,
			
 
				-		      &ksocknal_data.ksnd_connd_routes);
			
 
				-	wake_up(&ksocknal_data.ksnd_connd_waitq);
			
 
				-
			
 
				-	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_launch_all_connections_locked(struct ksock_peer *peer)
			
 
				-{
			
 
				-	struct ksock_route *route;
			
 
				-
			
 
				-	/* called holding write lock on ksnd_global_lock */
			
 
				-	for (;;) {
			
 
				-		/* launch any/all connections that need it */
			
 
				-		route = ksocknal_find_connectable_route_locked(peer);
			
 
				-		if (!route)
			
 
				-			return;
			
 
				-
			
 
				-		ksocknal_launch_connection_locked(route);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-struct ksock_conn *
			
 
				-ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx,
			
 
				-			  int nonblk)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_conn *typed = NULL;
			
 
				-	struct ksock_conn *fallback = NULL;
			
 
				-	int tnob = 0;
			
 
				-	int fnob = 0;
			
 
				-
			
 
				-	list_for_each(tmp, &peer->ksnp_conns) {
			
 
				-		struct ksock_conn *c;
			
 
				-		int nob, rc;
			
 
				-
			
 
				-		c = list_entry(tmp, struct ksock_conn, ksnc_list);
			
 
				-		nob = atomic_read(&c->ksnc_tx_nob) +
			
 
				-		      c->ksnc_sock->sk->sk_wmem_queued;
			
 
				-
			
 
				-		LASSERT(!c->ksnc_closing);
			
 
				-		LASSERT(c->ksnc_proto &&
			
 
				-			c->ksnc_proto->pro_match_tx);
			
 
				-
			
 
				-		rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk);
			
 
				-
			
 
				-		switch (rc) {
			
 
				-		default:
			
 
				-			LBUG();
			
 
				-		case SOCKNAL_MATCH_NO: /* protocol rejected the tx */
			
 
				-			continue;
			
 
				-
			
 
				-		case SOCKNAL_MATCH_YES: /* typed connection */
			
 
				-			if (!typed || tnob > nob ||
			
 
				-			    (tnob == nob && *ksocknal_tunables.ksnd_round_robin &&
			
 
				-			     time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
			
 
				-				typed = c;
			
 
				-				tnob  = nob;
			
 
				-			}
			
 
				-			break;
			
 
				-
			
 
				-		case SOCKNAL_MATCH_MAY: /* fallback connection */
			
 
				-			if (!fallback || fnob > nob ||
			
 
				-			    (fnob == nob && *ksocknal_tunables.ksnd_round_robin &&
			
 
				-			     time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
			
 
				-				fallback = c;
			
 
				-				fnob = nob;
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* prefer the typed selection */
			
 
				-	conn = (typed) ? typed : fallback;
			
 
				-
			
 
				-	if (conn)
			
 
				-		conn->ksnc_tx_last_post = jiffies;
			
 
				-
			
 
				-	return conn;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	conn->ksnc_proto->pro_pack(tx);
			
 
				-
			
 
				-	atomic_add(tx->tx_nob, &conn->ksnc_tx_nob);
			
 
				-	ksocknal_conn_addref(conn); /* +1 ref for tx */
			
 
				-	tx->tx_conn = conn;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct ksock_sched *sched = conn->ksnc_scheduler;
			
 
				-	struct ksock_msg *msg = &tx->tx_msg;
			
 
				-	struct ksock_tx *ztx = NULL;
			
 
				-	int bufnob = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * called holding global lock (read or irq-write) and caller may
			
 
				-	 * not have dropped this lock between finding conn and calling me,
			
 
				-	 * so we don't need the {get,put}connsock dance to deref
			
 
				-	 * ksnc_sock...
			
 
				-	 */
			
 
				-	LASSERT(!conn->ksnc_closing);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n",
			
 
				-	       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-	       &conn->ksnc_ipaddr, conn->ksnc_port);
			
 
				-
			
 
				-	ksocknal_tx_prep(conn, tx);
			
 
				-
			
 
				-	/*
			
 
				-	 * Ensure the frags we've been given EXACTLY match the number of
			
 
				-	 * bytes we want to send.  Many TCP/IP stacks disregard any total
			
 
				-	 * size parameters passed to them and just look at the frags.
			
 
				-	 *
			
 
				-	 * We always expect at least 1 mapped fragment containing the
			
 
				-	 * complete ksocknal message header.
			
 
				-	 */
			
 
				-	LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) +
			
 
				-		lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
			
 
				-		(unsigned int)tx->tx_nob);
			
 
				-	LASSERT(tx->tx_niov >= 1);
			
 
				-	LASSERT(tx->tx_resid == tx->tx_nob);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
			
 
				-	       tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type :
			
 
				-					      KSOCK_MSG_NOOP,
			
 
				-	       tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
			
 
				-
			
 
				-	/*
			
 
				-	 * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
			
 
				-	 * but they're used inside spinlocks a lot.
			
 
				-	 */
			
 
				-	bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	if (list_empty(&conn->ksnc_tx_queue) && !bufnob) {
			
 
				-		/* First packet starts the timeout */
			
 
				-		conn->ksnc_tx_deadline =
			
 
				-			jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-		if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
			
 
				-			conn->ksnc_peer->ksnp_last_alive = jiffies;
			
 
				-		conn->ksnc_tx_bufnob = 0;
			
 
				-		mb(); /* order with adding to tx_queue */
			
 
				-	}
			
 
				-
			
 
				-	if (msg->ksm_type == KSOCK_MSG_NOOP) {
			
 
				-		/*
			
 
				-		 * The packet is noop ZC ACK, try to piggyback the ack_cookie
			
 
				-		 * on a normal packet so I don't need to send it
			
 
				-		 */
			
 
				-		LASSERT(msg->ksm_zc_cookies[1]);
			
 
				-		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
			
 
				-
			
 
				-		/* ZC ACK piggybacked on ztx release tx later */
			
 
				-		if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
			
 
				-			ztx = tx;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * It's a normal packet - can it piggback a noop zc-ack that
			
 
				-		 * has been queued already?
			
 
				-		 */
			
 
				-		LASSERT(!msg->ksm_zc_cookies[1]);
			
 
				-		LASSERT(conn->ksnc_proto->pro_queue_tx_msg);
			
 
				-
			
 
				-		ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx);
			
 
				-		/* ztx will be released later */
			
 
				-	}
			
 
				-
			
 
				-	if (ztx) {
			
 
				-		atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob);
			
 
				-		list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
			
 
				-	}
			
 
				-
			
 
				-	if (conn->ksnc_tx_ready &&      /* able to send */
			
 
				-	    !conn->ksnc_tx_scheduled) { /* not scheduled to send */
			
 
				-		/* +1 ref for scheduler */
			
 
				-		ksocknal_conn_addref(conn);
			
 
				-		list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
			
 
				-		conn->ksnc_tx_scheduled = 1;
			
 
				-		wake_up(&sched->kss_waitq);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-}
			
 
				-
			
 
				-struct ksock_route *
			
 
				-ksocknal_find_connectable_route_locked(struct ksock_peer *peer)
			
 
				-{
			
 
				-	unsigned long now = jiffies;
			
 
				-	struct list_head *tmp;
			
 
				-	struct ksock_route *route;
			
 
				-
			
 
				-	list_for_each(tmp, &peer->ksnp_routes) {
			
 
				-		route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-		LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
			
 
				-
			
 
				-		/* connections being established */
			
 
				-		if (route->ksnr_scheduled)
			
 
				-			continue;
			
 
				-
			
 
				-		/* all route types connected ? */
			
 
				-		if (!(ksocknal_route_mask() & ~route->ksnr_connected))
			
 
				-			continue;
			
 
				-
			
 
				-		if (!(!route->ksnr_retry_interval || /* first attempt */
			
 
				-		      time_after_eq(now, route->ksnr_timeout))) {
			
 
				-			CDEBUG(D_NET,
			
 
				-			       "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n",
			
 
				-			       &route->ksnr_ipaddr,
			
 
				-			       route->ksnr_connected,
			
 
				-			       route->ksnr_retry_interval,
			
 
				-			       (route->ksnr_timeout - now) / HZ);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		return route;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-struct ksock_route *
			
 
				-ksocknal_find_connecting_route_locked(struct ksock_peer *peer)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-	struct ksock_route *route;
			
 
				-
			
 
				-	list_for_each(tmp, &peer->ksnp_routes) {
			
 
				-		route = list_entry(tmp, struct ksock_route, ksnr_list);
			
 
				-
			
 
				-		LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
			
 
				-
			
 
				-		if (route->ksnr_scheduled)
			
 
				-			return route;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
			
 
				-		       struct lnet_process_id id)
			
 
				-{
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct ksock_conn *conn;
			
 
				-	rwlock_t *g_lock;
			
 
				-	int retry;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(!tx->tx_conn);
			
 
				-
			
 
				-	g_lock = &ksocknal_data.ksnd_global_lock;
			
 
				-
			
 
				-	for (retry = 0;; retry = 1) {
			
 
				-		read_lock(g_lock);
			
 
				-		peer = ksocknal_find_peer_locked(ni, id);
			
 
				-		if (peer) {
			
 
				-			if (!ksocknal_find_connectable_route_locked(peer)) {
			
 
				-				conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
			
 
				-				if (conn) {
			
 
				-					/*
			
 
				-					 * I've got no routes that need to be
			
 
				-					 * connecting and I do have an actual
			
 
				-					 * connection...
			
 
				-					 */
			
 
				-					ksocknal_queue_tx_locked(tx, conn);
			
 
				-					read_unlock(g_lock);
			
 
				-					return 0;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/* I'll need a write lock... */
			
 
				-		read_unlock(g_lock);
			
 
				-
			
 
				-		write_lock_bh(g_lock);
			
 
				-
			
 
				-		peer = ksocknal_find_peer_locked(ni, id);
			
 
				-		if (peer)
			
 
				-			break;
			
 
				-
			
 
				-		write_unlock_bh(g_lock);
			
 
				-
			
 
				-		if (id.pid & LNET_PID_USERFLAG) {
			
 
				-			CERROR("Refusing to create a connection to userspace process %s\n",
			
 
				-			       libcfs_id2str(id));
			
 
				-			return -EHOSTUNREACH;
			
 
				-		}
			
 
				-
			
 
				-		if (retry) {
			
 
				-			CERROR("Can't find peer %s\n", libcfs_id2str(id));
			
 
				-			return -EHOSTUNREACH;
			
 
				-		}
			
 
				-
			
 
				-		rc = ksocknal_add_peer(ni, id,
			
 
				-				       LNET_NIDADDR(id.nid),
			
 
				-				       lnet_acceptor_port());
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't add peer %s: %d\n",
			
 
				-			       libcfs_id2str(id), rc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_launch_all_connections_locked(peer);
			
 
				-
			
 
				-	conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
			
 
				-	if (conn) {
			
 
				-		/* Connection exists; queue message on it */
			
 
				-		ksocknal_queue_tx_locked(tx, conn);
			
 
				-		write_unlock_bh(g_lock);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (peer->ksnp_accepting > 0 ||
			
 
				-	    ksocknal_find_connecting_route_locked(peer)) {
			
 
				-		/* the message is going to be pinned to the peer */
			
 
				-		tx->tx_deadline =
			
 
				-			jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-
			
 
				-		/* Queue the message until a connection is established */
			
 
				-		list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue);
			
 
				-		write_unlock_bh(g_lock);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(g_lock);
			
 
				-
			
 
				-	/* NB Routes may be ignored if connections to them failed recently */
			
 
				-	CNETERR("No usable routes to %s\n", libcfs_id2str(id));
			
 
				-	return -EHOSTUNREACH;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
			
 
				-{
			
 
				-	unsigned int mpflag = 0;
			
 
				-	int type = lntmsg->msg_type;
			
 
				-	struct lnet_process_id target = lntmsg->msg_target;
			
 
				-	unsigned int payload_niov = lntmsg->msg_niov;
			
 
				-	struct kvec *payload_iov = lntmsg->msg_iov;
			
 
				-	struct bio_vec *payload_kiov = lntmsg->msg_kiov;
			
 
				-	unsigned int payload_offset = lntmsg->msg_offset;
			
 
				-	unsigned int payload_nob = lntmsg->msg_len;
			
 
				-	struct ksock_tx *tx;
			
 
				-	int desc_size;
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * NB 'private' is different depending on what we're sending.
			
 
				-	 * Just ignore it...
			
 
				-	 */
			
 
				-	CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
			
 
				-	       payload_nob, payload_niov, libcfs_id2str(target));
			
 
				-
			
 
				-	LASSERT(!payload_nob || payload_niov > 0);
			
 
				-	LASSERT(payload_niov <= LNET_MAX_IOV);
			
 
				-	/* payload is either all vaddrs or all pages */
			
 
				-	LASSERT(!(payload_kiov && payload_iov));
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	if (payload_iov)
			
 
				-		desc_size = offsetof(struct ksock_tx,
			
 
				-				     tx_frags.virt.iov[1 + payload_niov]);
			
 
				-	else
			
 
				-		desc_size = offsetof(struct ksock_tx,
			
 
				-				     tx_frags.paged.kiov[payload_niov]);
			
 
				-
			
 
				-	if (lntmsg->msg_vmflush)
			
 
				-		mpflag = memalloc_noreclaim_save();
			
 
				-	tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
			
 
				-	if (!tx) {
			
 
				-		CERROR("Can't allocate tx desc type %d size %d\n",
			
 
				-		       type, desc_size);
			
 
				-		if (lntmsg->msg_vmflush)
			
 
				-			memalloc_noreclaim_restore(mpflag);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_conn = NULL;		     /* set when assigned a conn */
			
 
				-	tx->tx_lnetmsg = lntmsg;
			
 
				-
			
 
				-	if (payload_iov) {
			
 
				-		tx->tx_kiov = NULL;
			
 
				-		tx->tx_nkiov = 0;
			
 
				-		tx->tx_iov = tx->tx_frags.virt.iov;
			
 
				-		tx->tx_niov = 1 +
			
 
				-			      lnet_extract_iov(payload_niov, &tx->tx_iov[1],
			
 
				-					       payload_niov, payload_iov,
			
 
				-					       payload_offset, payload_nob);
			
 
				-	} else {
			
 
				-		tx->tx_niov = 1;
			
 
				-		tx->tx_iov = &tx->tx_frags.paged.iov;
			
 
				-		tx->tx_kiov = tx->tx_frags.paged.kiov;
			
 
				-		tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
			
 
				-						 payload_niov, payload_kiov,
			
 
				-						 payload_offset, payload_nob);
			
 
				-
			
 
				-		if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload)
			
 
				-			tx->tx_zc_capable = 1;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_msg.ksm_csum = 0;
			
 
				-	tx->tx_msg.ksm_type = KSOCK_MSG_LNET;
			
 
				-	tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				-	tx->tx_msg.ksm_zc_cookies[1] = 0;
			
 
				-
			
 
				-	/* The first fragment will be set later in pro_pack */
			
 
				-	rc = ksocknal_launch_packet(ni, tx, target);
			
 
				-	if (mpflag)
			
 
				-		memalloc_noreclaim_restore(mpflag);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		return 0;
			
 
				-
			
 
				-	ksocknal_free_tx(tx);
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
			
 
				-{
			
 
				-	struct task_struct *task = kthread_run(fn, arg, "%s", name);
			
 
				-
			
 
				-	if (IS_ERR(task))
			
 
				-		return PTR_ERR(task);
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-	ksocknal_data.ksnd_nthreads++;
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_thread_fini(void)
			
 
				-{
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-	ksocknal_data.ksnd_nthreads--;
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
			
 
				-{
			
 
				-	static char ksocknal_slop_buffer[4096];
			
 
				-	struct kvec *kvec = conn->ksnc_rx_iov_space;
			
 
				-
			
 
				-	int nob;
			
 
				-	unsigned int niov;
			
 
				-	int skipped;
			
 
				-
			
 
				-	LASSERT(conn->ksnc_proto);
			
 
				-
			
 
				-	if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) {
			
 
				-		/* Remind the socket to ack eagerly... */
			
 
				-		ksocknal_lib_eager_ack(conn);
			
 
				-	}
			
 
				-
			
 
				-	if (!nob_to_skip) {	 /* right at next packet boundary now */
			
 
				-		conn->ksnc_rx_started = 0;
			
 
				-		mb();		       /* racing with timeout thread */
			
 
				-
			
 
				-		switch (conn->ksnc_proto->pro_version) {
			
 
				-		case  KSOCK_PROTO_V2:
			
 
				-		case  KSOCK_PROTO_V3:
			
 
				-			conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
			
 
				-			kvec->iov_base = &conn->ksnc_msg;
			
 
				-			kvec->iov_len = offsetof(struct ksock_msg, ksm_u);
			
 
				-			conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
			
 
				-			iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
			
 
				-					1, offsetof(struct ksock_msg, ksm_u));
			
 
				-			break;
			
 
				-
			
 
				-		case KSOCK_PROTO_V1:
			
 
				-			/* Receiving bare struct lnet_hdr */
			
 
				-			conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
			
 
				-			kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
			
 
				-			kvec->iov_len = sizeof(struct lnet_hdr);
			
 
				-			conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
			
 
				-			iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
			
 
				-					1, sizeof(struct lnet_hdr));
			
 
				-			break;
			
 
				-
			
 
				-		default:
			
 
				-			LBUG();
			
 
				-		}
			
 
				-		conn->ksnc_rx_csum = ~0;
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Set up to skip as much as possible now.  If there's more left
			
 
				-	 * (ran out of iov entries) we'll get called again
			
 
				-	 */
			
 
				-	conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
			
 
				-	conn->ksnc_rx_nob_left = nob_to_skip;
			
 
				-	skipped = 0;
			
 
				-	niov = 0;
			
 
				-
			
 
				-	do {
			
 
				-		nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer));
			
 
				-
			
 
				-		kvec[niov].iov_base = ksocknal_slop_buffer;
			
 
				-		kvec[niov].iov_len  = nob;
			
 
				-		niov++;
			
 
				-		skipped += nob;
			
 
				-		nob_to_skip -= nob;
			
 
				-
			
 
				-	} while (nob_to_skip &&    /* mustn't overflow conn's rx iov */
			
 
				-		 niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec));
			
 
				-
			
 
				-	iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_process_receive(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct kvec *kvec = conn->ksnc_rx_iov_space;
			
 
				-	struct lnet_hdr *lhdr;
			
 
				-	struct lnet_process_id *id;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
			
 
				-
			
 
				-	/* NB: sched lock NOT held */
			
 
				-	/* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */
			
 
				-	LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
			
 
				-		conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
			
 
				-		conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
			
 
				-		conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
			
 
				- again:
			
 
				-	if (iov_iter_count(&conn->ksnc_rx_to)) {
			
 
				-		rc = ksocknal_receive(conn);
			
 
				-
			
 
				-		if (rc <= 0) {
			
 
				-			LASSERT(rc != -EAGAIN);
			
 
				-
			
 
				-			if (!rc)
			
 
				-				CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n",
			
 
				-				       conn,
			
 
				-				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-				       &conn->ksnc_ipaddr,
			
 
				-				       conn->ksnc_port);
			
 
				-			else if (!conn->ksnc_closing)
			
 
				-				CERROR("[%p] Error %d on read from %s ip %pI4h:%d\n",
			
 
				-				       conn, rc,
			
 
				-				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-				       &conn->ksnc_ipaddr,
			
 
				-				       conn->ksnc_port);
			
 
				-
			
 
				-			/* it's not an error if conn is being closed */
			
 
				-			ksocknal_close_conn_and_siblings(conn,
			
 
				-							 (conn->ksnc_closing) ? 0 : rc);
			
 
				-			return (!rc ? -ESHUTDOWN : rc);
			
 
				-		}
			
 
				-
			
 
				-		if (iov_iter_count(&conn->ksnc_rx_to)) {
			
 
				-			/* short read */
			
 
				-			return -EAGAIN;
			
 
				-		}
			
 
				-	}
			
 
				-	switch (conn->ksnc_rx_state) {
			
 
				-	case SOCKNAL_RX_KSM_HEADER:
			
 
				-		if (conn->ksnc_flip) {
			
 
				-			__swab32s(&conn->ksnc_msg.ksm_type);
			
 
				-			__swab32s(&conn->ksnc_msg.ksm_csum);
			
 
				-			__swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
			
 
				-			__swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
			
 
				-		}
			
 
				-
			
 
				-		if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
			
 
				-		    conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
			
 
				-			CERROR("%s: Unknown message type: %x\n",
			
 
				-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-			       conn->ksnc_msg.ksm_type);
			
 
				-			ksocknal_new_packet(conn, 0);
			
 
				-			ksocknal_close_conn_and_siblings(conn, -EPROTO);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
			
 
				-		    conn->ksnc_msg.ksm_csum &&     /* has checksum */
			
 
				-		    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
			
 
				-			/* NOOP Checksum error */
			
 
				-			CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
			
 
				-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-			       conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
			
 
				-			ksocknal_new_packet(conn, 0);
			
 
				-			ksocknal_close_conn_and_siblings(conn, -EPROTO);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		if (conn->ksnc_msg.ksm_zc_cookies[1]) {
			
 
				-			__u64 cookie = 0;
			
 
				-
			
 
				-			LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
			
 
				-
			
 
				-			if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
			
 
				-				cookie = conn->ksnc_msg.ksm_zc_cookies[0];
			
 
				-
			
 
				-			rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
			
 
				-					       conn->ksnc_msg.ksm_zc_cookies[1]);
			
 
				-
			
 
				-			if (rc) {
			
 
				-				CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n",
			
 
				-				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-				       cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
			
 
				-				ksocknal_new_packet(conn, 0);
			
 
				-				ksocknal_close_conn_and_siblings(conn, -EPROTO);
			
 
				-				return rc;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
			
 
				-			ksocknal_new_packet(conn, 0);
			
 
				-			return 0;       /* NOOP is done and just return */
			
 
				-		}
			
 
				-
			
 
				-		conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
			
 
				-		conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
			
 
				-
			
 
				-		kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
			
 
				-		kvec->iov_len = sizeof(struct ksock_lnet_msg);
			
 
				-
			
 
				-		iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
			
 
				-				1, sizeof(struct ksock_lnet_msg));
			
 
				-
			
 
				-		goto again;     /* read lnet header now */
			
 
				-
			
 
				-	case SOCKNAL_RX_LNET_HEADER:
			
 
				-		/* unpack message header */
			
 
				-		conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
			
 
				-
			
 
				-		if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) {
			
 
				-			/* Userspace peer */
			
 
				-			lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
			
 
				-			id = &conn->ksnc_peer->ksnp_id;
			
 
				-
			
 
				-			/* Substitute process ID assigned at connection time */
			
 
				-			lhdr->src_pid = cpu_to_le32(id->pid);
			
 
				-			lhdr->src_nid = cpu_to_le64(id->nid);
			
 
				-		}
			
 
				-
			
 
				-		conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
			
 
				-		ksocknal_conn_addref(conn);     /* ++ref while parsing */
			
 
				-
			
 
				-		rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
			
 
				-				&conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
			
 
				-				conn->ksnc_peer->ksnp_id.nid, conn, 0);
			
 
				-		if (rc < 0) {
			
 
				-			/* I just received garbage: give up on this conn */
			
 
				-			ksocknal_new_packet(conn, 0);
			
 
				-			ksocknal_close_conn_and_siblings(conn, rc);
			
 
				-			ksocknal_conn_decref(conn);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		/* I'm racing with ksocknal_recv() */
			
 
				-		LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
			
 
				-			conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
			
 
				-
			
 
				-		if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
			
 
				-			return 0;
			
 
				-
			
 
				-		/* ksocknal_recv() got called */
			
 
				-		goto again;
			
 
				-
			
 
				-	case SOCKNAL_RX_LNET_PAYLOAD:
			
 
				-		/* payload all received */
			
 
				-		rc = 0;
			
 
				-
			
 
				-		if (!conn->ksnc_rx_nob_left &&   /* not truncating */
			
 
				-		    conn->ksnc_msg.ksm_csum &&  /* has checksum */
			
 
				-		    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
			
 
				-			CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
			
 
				-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				-			       conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
			
 
				-			rc = -EIO;
			
 
				-		}
			
 
				-
			
 
				-		if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) {
			
 
				-			LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
			
 
				-
			
 
				-			lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
			
 
				-			id = &conn->ksnc_peer->ksnp_id;
			
 
				-
			
 
				-			rc = conn->ksnc_proto->pro_handle_zcreq(conn,
			
 
				-					conn->ksnc_msg.ksm_zc_cookies[0],
			
 
				-					*ksocknal_tunables.ksnd_nonblk_zcack ||
			
 
				-					le64_to_cpu(lhdr->src_nid) != id->nid);
			
 
				-		}
			
 
				-
			
 
				-		lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
			
 
				-
			
 
				-		if (rc) {
			
 
				-			ksocknal_new_packet(conn, 0);
			
 
				-			ksocknal_close_conn_and_siblings(conn, rc);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-		/* Fall through */
			
 
				-
			
 
				-	case SOCKNAL_RX_SLOP:
			
 
				-		/* starting new packet? */
			
 
				-		if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left))
			
 
				-			return 0;       /* come back later */
			
 
				-		goto again;	     /* try to finish reading slop now */
			
 
				-
			
 
				-	default:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	/* Not Reached */
			
 
				-	LBUG();
			
 
				-	return -EINVAL;		       /* keep gcc happy */
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
			
 
				-	      int delayed, struct iov_iter *to, unsigned int rlen)
			
 
				-{
			
 
				-	struct ksock_conn *conn = private;
			
 
				-	struct ksock_sched *sched = conn->ksnc_scheduler;
			
 
				-
			
 
				-	LASSERT(iov_iter_count(to) <= rlen);
			
 
				-	LASSERT(to->nr_segs <= LNET_MAX_IOV);
			
 
				-
			
 
				-	conn->ksnc_cookie = msg;
			
 
				-	conn->ksnc_rx_nob_left = rlen;
			
 
				-
			
 
				-	conn->ksnc_rx_to = *to;
			
 
				-
			
 
				-	LASSERT(conn->ksnc_rx_scheduled);
			
 
				-
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	switch (conn->ksnc_rx_state) {
			
 
				-	case SOCKNAL_RX_PARSE_WAIT:
			
 
				-		list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
			
 
				-		wake_up(&sched->kss_waitq);
			
 
				-		LASSERT(conn->ksnc_rx_ready);
			
 
				-		break;
			
 
				-
			
 
				-	case SOCKNAL_RX_PARSE:
			
 
				-		/* scheduler hasn't noticed I'm parsing yet */
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-	ksocknal_conn_decref(conn);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-ksocknal_sched_cansleep(struct ksock_sched *sched)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	rc = !ksocknal_data.ksnd_shuttingdown &&
			
 
				-	      list_empty(&sched->kss_rx_conns) &&
			
 
				-	      list_empty(&sched->kss_tx_conns);
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int ksocknal_scheduler(void *arg)
			
 
				-{
			
 
				-	struct ksock_sched_info *info;
			
 
				-	struct ksock_sched *sched;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_tx *tx;
			
 
				-	int rc;
			
 
				-	int nloops = 0;
			
 
				-	long id = (long)arg;
			
 
				-
			
 
				-	info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)];
			
 
				-	sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
			
 
				-
			
 
				-	rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt);
			
 
				-	if (rc) {
			
 
				-		CWARN("Can't set CPU partition affinity to %d: %d\n",
			
 
				-		      info->ksi_cpt, rc);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	while (!ksocknal_data.ksnd_shuttingdown) {
			
 
				-		int did_something = 0;
			
 
				-
			
 
				-		/* Ensure I progress everything semi-fairly */
			
 
				-
			
 
				-		if (!list_empty(&sched->kss_rx_conns)) {
			
 
				-			conn = list_entry(sched->kss_rx_conns.next,
			
 
				-					  struct ksock_conn, ksnc_rx_list);
			
 
				-			list_del(&conn->ksnc_rx_list);
			
 
				-
			
 
				-			LASSERT(conn->ksnc_rx_scheduled);
			
 
				-			LASSERT(conn->ksnc_rx_ready);
			
 
				-
			
 
				-			/*
			
 
				-			 * clear rx_ready in case receive isn't complete.
			
 
				-			 * Do it BEFORE we call process_recv, since
			
 
				-			 * data_ready can set it any time after we release
			
 
				-			 * kss_lock.
			
 
				-			 */
			
 
				-			conn->ksnc_rx_ready = 0;
			
 
				-			spin_unlock_bh(&sched->kss_lock);
			
 
				-
			
 
				-			rc = ksocknal_process_receive(conn);
			
 
				-
			
 
				-			spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-			/* I'm the only one that can clear this flag */
			
 
				-			LASSERT(conn->ksnc_rx_scheduled);
			
 
				-
			
 
				-			/* Did process_receive get everything it wanted? */
			
 
				-			if (!rc)
			
 
				-				conn->ksnc_rx_ready = 1;
			
 
				-
			
 
				-			if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
			
 
				-				/*
			
 
				-				 * Conn blocked waiting for ksocknal_recv()
			
 
				-				 * I change its state (under lock) to signal
			
 
				-				 * it can be rescheduled
			
 
				-				 */
			
 
				-				conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
			
 
				-			} else if (conn->ksnc_rx_ready) {
			
 
				-				/* reschedule for rx */
			
 
				-				list_add_tail(&conn->ksnc_rx_list,
			
 
				-					      &sched->kss_rx_conns);
			
 
				-			} else {
			
 
				-				conn->ksnc_rx_scheduled = 0;
			
 
				-				/* drop my ref */
			
 
				-				ksocknal_conn_decref(conn);
			
 
				-			}
			
 
				-
			
 
				-			did_something = 1;
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&sched->kss_tx_conns)) {
			
 
				-			LIST_HEAD(zlist);
			
 
				-
			
 
				-			if (!list_empty(&sched->kss_zombie_noop_txs)) {
			
 
				-				list_add(&zlist, &sched->kss_zombie_noop_txs);
			
 
				-				list_del_init(&sched->kss_zombie_noop_txs);
			
 
				-			}
			
 
				-
			
 
				-			conn = list_entry(sched->kss_tx_conns.next,
			
 
				-					  struct ksock_conn, ksnc_tx_list);
			
 
				-			list_del(&conn->ksnc_tx_list);
			
 
				-
			
 
				-			LASSERT(conn->ksnc_tx_scheduled);
			
 
				-			LASSERT(conn->ksnc_tx_ready);
			
 
				-			LASSERT(!list_empty(&conn->ksnc_tx_queue));
			
 
				-
			
 
				-			tx = list_entry(conn->ksnc_tx_queue.next,
			
 
				-					struct ksock_tx, tx_list);
			
 
				-
			
 
				-			if (conn->ksnc_tx_carrier == tx)
			
 
				-				ksocknal_next_tx_carrier(conn);
			
 
				-
			
 
				-			/* dequeue now so empty list => more to send */
			
 
				-			list_del(&tx->tx_list);
			
 
				-
			
 
				-			/*
			
 
				-			 * Clear tx_ready in case send isn't complete.  Do
			
 
				-			 * it BEFORE we call process_transmit, since
			
 
				-			 * write_space can set it any time after we release
			
 
				-			 * kss_lock.
			
 
				-			 */
			
 
				-			conn->ksnc_tx_ready = 0;
			
 
				-			spin_unlock_bh(&sched->kss_lock);
			
 
				-
			
 
				-			if (!list_empty(&zlist)) {
			
 
				-				/*
			
 
				-				 * free zombie noop txs, it's fast because
			
 
				-				 * noop txs are just put in freelist
			
 
				-				 */
			
 
				-				ksocknal_txlist_done(NULL, &zlist, 0);
			
 
				-			}
			
 
				-
			
 
				-			rc = ksocknal_process_transmit(conn, tx);
			
 
				-
			
 
				-			if (rc == -ENOMEM || rc == -EAGAIN) {
			
 
				-				/*
			
 
				-				 * Incomplete send: replace tx on HEAD of
			
 
				-				 * tx_queue
			
 
				-				 */
			
 
				-				spin_lock_bh(&sched->kss_lock);
			
 
				-				list_add(&tx->tx_list, &conn->ksnc_tx_queue);
			
 
				-			} else {
			
 
				-				/* Complete send; tx -ref */
			
 
				-				ksocknal_tx_decref(tx);
			
 
				-
			
 
				-				spin_lock_bh(&sched->kss_lock);
			
 
				-				/* assume space for more */
			
 
				-				conn->ksnc_tx_ready = 1;
			
 
				-			}
			
 
				-
			
 
				-			if (rc == -ENOMEM) {
			
 
				-				/*
			
 
				-				 * Do nothing; after a short timeout, this
			
 
				-				 * conn will be reposted on kss_tx_conns.
			
 
				-				 */
			
 
				-			} else if (conn->ksnc_tx_ready &&
			
 
				-				   !list_empty(&conn->ksnc_tx_queue)) {
			
 
				-				/* reschedule for tx */
			
 
				-				list_add_tail(&conn->ksnc_tx_list,
			
 
				-					      &sched->kss_tx_conns);
			
 
				-			} else {
			
 
				-				conn->ksnc_tx_scheduled = 0;
			
 
				-				/* drop my ref */
			
 
				-				ksocknal_conn_decref(conn);
			
 
				-			}
			
 
				-
			
 
				-			did_something = 1;
			
 
				-		}
			
 
				-		if (!did_something ||	   /* nothing to do */
			
 
				-		    ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
			
 
				-			spin_unlock_bh(&sched->kss_lock);
			
 
				-
			
 
				-			nloops = 0;
			
 
				-
			
 
				-			if (!did_something) {   /* wait for something to do */
			
 
				-				rc = wait_event_interruptible_exclusive(
			
 
				-					sched->kss_waitq,
			
 
				-					!ksocknal_sched_cansleep(sched));
			
 
				-				LASSERT(!rc);
			
 
				-			} else {
			
 
				-				cond_resched();
			
 
				-			}
			
 
				-
			
 
				-			spin_lock_bh(&sched->kss_lock);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-	ksocknal_thread_fini();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Add connection to kss_rx_conns of scheduler
			
 
				- * and wakeup the scheduler.
			
 
				- */
			
 
				-void ksocknal_read_callback(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct ksock_sched *sched;
			
 
				-
			
 
				-	sched = conn->ksnc_scheduler;
			
 
				-
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	conn->ksnc_rx_ready = 1;
			
 
				-
			
 
				-	if (!conn->ksnc_rx_scheduled) {  /* not being progressed */
			
 
				-		list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
			
 
				-		conn->ksnc_rx_scheduled = 1;
			
 
				-		/* extra ref for scheduler */
			
 
				-		ksocknal_conn_addref(conn);
			
 
				-
			
 
				-		wake_up(&sched->kss_waitq);
			
 
				-	}
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Add connection to kss_tx_conns of scheduler
			
 
				- * and wakeup the scheduler.
			
 
				- */
			
 
				-void ksocknal_write_callback(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct ksock_sched *sched;
			
 
				-
			
 
				-	sched = conn->ksnc_scheduler;
			
 
				-
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	conn->ksnc_tx_ready = 1;
			
 
				-
			
 
				-	if (!conn->ksnc_tx_scheduled && /* not being progressed */
			
 
				-	    !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */
			
 
				-		list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
			
 
				-		conn->ksnc_tx_scheduled = 1;
			
 
				-		/* extra ref for scheduler */
			
 
				-		ksocknal_conn_addref(conn);
			
 
				-
			
 
				-		wake_up(&sched->kss_waitq);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-}
			
 
				-
			
 
				-static struct ksock_proto *
			
 
				-ksocknal_parse_proto_version(struct ksock_hello_msg *hello)
			
 
				-{
			
 
				-	__u32 version = 0;
			
 
				-
			
 
				-	if (hello->kshm_magic == LNET_PROTO_MAGIC)
			
 
				-		version = hello->kshm_version;
			
 
				-	else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
			
 
				-		version = __swab32(hello->kshm_version);
			
 
				-
			
 
				-	if (version) {
			
 
				-#if SOCKNAL_VERSION_DEBUG
			
 
				-		if (*ksocknal_tunables.ksnd_protocol == 1)
			
 
				-			return NULL;
			
 
				-
			
 
				-		if (*ksocknal_tunables.ksnd_protocol == 2 &&
			
 
				-		    version == KSOCK_PROTO_V3)
			
 
				-			return NULL;
			
 
				-#endif
			
 
				-		if (version == KSOCK_PROTO_V2)
			
 
				-			return &ksocknal_protocol_v2x;
			
 
				-
			
 
				-		if (version == KSOCK_PROTO_V3)
			
 
				-			return &ksocknal_protocol_v3x;
			
 
				-
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
			
 
				-		struct lnet_magicversion *hmv = (struct lnet_magicversion *)hello;
			
 
				-
			
 
				-		BUILD_BUG_ON(sizeof(struct lnet_magicversion) !=
			
 
				-			     offsetof(struct ksock_hello_msg, kshm_src_nid));
			
 
				-
			
 
				-		if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) &&
			
 
				-		    hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR))
			
 
				-			return &ksocknal_protocol_v1x;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
			
 
				-		    lnet_nid_t peer_nid, struct ksock_hello_msg *hello)
			
 
				-{
			
 
				-	/* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
			
 
				-	struct ksock_net *net = (struct ksock_net *)ni->ni_data;
			
 
				-
			
 
				-	LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES);
			
 
				-
			
 
				-	/* rely on caller to hold a ref on socket so it wouldn't disappear */
			
 
				-	LASSERT(conn->ksnc_proto);
			
 
				-
			
 
				-	hello->kshm_src_nid = ni->ni_nid;
			
 
				-	hello->kshm_dst_nid = peer_nid;
			
 
				-	hello->kshm_src_pid = the_lnet.ln_pid;
			
 
				-
			
 
				-	hello->kshm_src_incarnation = net->ksnn_incarnation;
			
 
				-	hello->kshm_ctype = conn->ksnc_type;
			
 
				-
			
 
				-	return conn->ksnc_proto->pro_send_hello(conn, hello);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_invert_type(int type)
			
 
				-{
			
 
				-	switch (type) {
			
 
				-	case SOCKLND_CONN_ANY:
			
 
				-	case SOCKLND_CONN_CONTROL:
			
 
				-		return type;
			
 
				-	case SOCKLND_CONN_BULK_IN:
			
 
				-		return SOCKLND_CONN_BULK_OUT;
			
 
				-	case SOCKLND_CONN_BULK_OUT:
			
 
				-		return SOCKLND_CONN_BULK_IN;
			
 
				-	default:
			
 
				-		return SOCKLND_CONN_NONE;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
			
 
				-		    struct ksock_hello_msg *hello,
			
 
				-		    struct lnet_process_id *peerid,
			
 
				-		    __u64 *incarnation)
			
 
				-{
			
 
				-	/* Return < 0	fatal error
			
 
				-	 *	0	  success
			
 
				-	 *	EALREADY   lost connection race
			
 
				-	 *	EPROTO     protocol version mismatch
			
 
				-	 */
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	int active = !!conn->ksnc_proto;
			
 
				-	int timeout;
			
 
				-	int proto_match;
			
 
				-	int rc;
			
 
				-	struct ksock_proto *proto;
			
 
				-	struct lnet_process_id recv_id;
			
 
				-
			
 
				-	/* socket type set on active connections - not set on passive */
			
 
				-	LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
			
 
				-
			
 
				-	timeout = active ? *ksocknal_tunables.ksnd_timeout :
			
 
				-			    lnet_acceptor_timeout();
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, &hello->kshm_magic,
			
 
				-			    sizeof(hello->kshm_magic), timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading HELLO from %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (hello->kshm_magic != LNET_PROTO_MAGIC &&
			
 
				-	    hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
			
 
				-	    hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
			
 
				-		/* Unexpected magic! */
			
 
				-		CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n",
			
 
				-		       __cpu_to_le32(hello->kshm_magic),
			
 
				-		       LNET_PROTO_TCP_MAGIC,
			
 
				-		       &conn->ksnc_ipaddr);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, &hello->kshm_version,
			
 
				-			    sizeof(hello->kshm_version), timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading HELLO from %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	proto = ksocknal_parse_proto_version(hello);
			
 
				-	if (!proto) {
			
 
				-		if (!active) {
			
 
				-			/* unknown protocol from peer, tell peer my protocol */
			
 
				-			conn->ksnc_proto = &ksocknal_protocol_v3x;
			
 
				-#if SOCKNAL_VERSION_DEBUG
			
 
				-			if (*ksocknal_tunables.ksnd_protocol == 2)
			
 
				-				conn->ksnc_proto = &ksocknal_protocol_v2x;
			
 
				-			else if (*ksocknal_tunables.ksnd_protocol == 1)
			
 
				-				conn->ksnc_proto = &ksocknal_protocol_v1x;
			
 
				-#endif
			
 
				-			hello->kshm_nips = 0;
			
 
				-			ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
			
 
				-		}
			
 
				-
			
 
				-		CERROR("Unknown protocol version (%d.x expected) from %pI4h\n",
			
 
				-		       conn->ksnc_proto->pro_version,
			
 
				-		       &conn->ksnc_ipaddr);
			
 
				-
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	proto_match = (conn->ksnc_proto == proto);
			
 
				-	conn->ksnc_proto = proto;
			
 
				-
			
 
				-	/* receive the rest of hello message anyway */
			
 
				-	rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading or checking hello from from %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	*incarnation = hello->kshm_src_incarnation;
			
 
				-
			
 
				-	if (hello->kshm_src_nid == LNET_NID_ANY) {
			
 
				-		CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pI4h\n",
			
 
				-		       &conn->ksnc_ipaddr);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (!active &&
			
 
				-	    conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
			
 
				-		/* Userspace NAL assigns peer process ID from socket */
			
 
				-		recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
			
 
				-		recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
			
 
				-					 conn->ksnc_ipaddr);
			
 
				-	} else {
			
 
				-		recv_id.nid = hello->kshm_src_nid;
			
 
				-		recv_id.pid = hello->kshm_src_pid;
			
 
				-	}
			
 
				-
			
 
				-	if (!active) {
			
 
				-		*peerid = recv_id;
			
 
				-
			
 
				-		/* peer determines type */
			
 
				-		conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
			
 
				-		if (conn->ksnc_type == SOCKLND_CONN_NONE) {
			
 
				-			CERROR("Unexpected type %d from %s ip %pI4h\n",
			
 
				-			       hello->kshm_ctype, libcfs_id2str(*peerid),
			
 
				-			       &conn->ksnc_ipaddr);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (peerid->pid != recv_id.pid ||
			
 
				-	    peerid->nid != recv_id.nid) {
			
 
				-		LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host %pI4h, but they claimed they were %s; please check your Lustre configuration.\n",
			
 
				-				   libcfs_id2str(*peerid),
			
 
				-				   &conn->ksnc_ipaddr,
			
 
				-				   libcfs_id2str(recv_id));
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
			
 
				-		/* Possible protocol mismatch or I lost the connection race */
			
 
				-		return proto_match ? EALREADY : EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
			
 
				-		CERROR("Mismatched types: me %d, %s ip %pI4h %d\n",
			
 
				-		       conn->ksnc_type, libcfs_id2str(*peerid),
			
 
				-		       &conn->ksnc_ipaddr, hello->kshm_ctype);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_connect(struct ksock_route *route)
			
 
				-{
			
 
				-	LIST_HEAD(zombies);
			
 
				-	struct ksock_peer *peer = route->ksnr_peer;
			
 
				-	int type;
			
 
				-	int wanted;
			
 
				-	struct socket *sock;
			
 
				-	unsigned long deadline;
			
 
				-	int retry_later = 0;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	LASSERT(route->ksnr_scheduled);
			
 
				-	LASSERT(!route->ksnr_connecting);
			
 
				-
			
 
				-	route->ksnr_connecting = 1;
			
 
				-
			
 
				-	for (;;) {
			
 
				-		wanted = ksocknal_route_mask() & ~route->ksnr_connected;
			
 
				-
			
 
				-		/*
			
 
				-		 * stop connecting if peer/route got closed under me, or
			
 
				-		 * route got connected while queued
			
 
				-		 */
			
 
				-		if (peer->ksnp_closing || route->ksnr_deleted ||
			
 
				-		    !wanted) {
			
 
				-			retry_later = 0;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* reschedule if peer is connecting to me */
			
 
				-		if (peer->ksnp_accepting > 0) {
			
 
				-			CDEBUG(D_NET,
			
 
				-			       "peer %s(%d) already connecting to me, retry later.\n",
			
 
				-			       libcfs_nid2str(peer->ksnp_id.nid),
			
 
				-			       peer->ksnp_accepting);
			
 
				-			retry_later = 1;
			
 
				-		}
			
 
				-
			
 
				-		if (retry_later) /* needs reschedule */
			
 
				-			break;
			
 
				-
			
 
				-		if (wanted & BIT(SOCKLND_CONN_ANY)) {
			
 
				-			type = SOCKLND_CONN_ANY;
			
 
				-		} else if (wanted & BIT(SOCKLND_CONN_CONTROL)) {
			
 
				-			type = SOCKLND_CONN_CONTROL;
			
 
				-		} else if (wanted & BIT(SOCKLND_CONN_BULK_IN)) {
			
 
				-			type = SOCKLND_CONN_BULK_IN;
			
 
				-		} else {
			
 
				-			LASSERT(wanted & BIT(SOCKLND_CONN_BULK_OUT));
			
 
				-			type = SOCKLND_CONN_BULK_OUT;
			
 
				-		}
			
 
				-
			
 
				-		write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-		if (time_after_eq(jiffies, deadline)) {
			
 
				-			rc = -ETIMEDOUT;
			
 
				-			lnet_connect_console_error(rc, peer->ksnp_id.nid,
			
 
				-						   route->ksnr_ipaddr,
			
 
				-						   route->ksnr_port);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		rc = lnet_connect(&sock, peer->ksnp_id.nid,
			
 
				-				  route->ksnr_myipaddr,
			
 
				-				  route->ksnr_ipaddr, route->ksnr_port);
			
 
				-		if (rc)
			
 
				-			goto failed;
			
 
				-
			
 
				-		rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
			
 
				-		if (rc < 0) {
			
 
				-			lnet_connect_console_error(rc, peer->ksnp_id.nid,
			
 
				-						   route->ksnr_ipaddr,
			
 
				-						   route->ksnr_port);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * A +ve RC means I have to retry because I lost the connection
			
 
				-		 * race or I have to renegotiate protocol version
			
 
				-		 */
			
 
				-		retry_later = (rc);
			
 
				-		if (retry_later)
			
 
				-			CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
			
 
				-			       libcfs_nid2str(peer->ksnp_id.nid));
			
 
				-
			
 
				-		write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-	}
			
 
				-
			
 
				-	route->ksnr_scheduled = 0;
			
 
				-	route->ksnr_connecting = 0;
			
 
				-
			
 
				-	if (retry_later) {
			
 
				-		/*
			
 
				-		 * re-queue for attention; this frees me up to handle
			
 
				-		 * the peer's incoming connection request
			
 
				-		 */
			
 
				-		if (rc == EALREADY ||
			
 
				-		    (!rc && peer->ksnp_accepting > 0)) {
			
 
				-			/*
			
 
				-			 * We want to introduce a delay before next
			
 
				-			 * attempt to connect if we lost conn race,
			
 
				-			 * but the race is resolved quickly usually,
			
 
				-			 * so min_reconnectms should be good heuristic
			
 
				-			 */
			
 
				-			route->ksnr_retry_interval =
			
 
				-				*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000;
			
 
				-			route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
			
 
				-		}
			
 
				-
			
 
				-		ksocknal_launch_connection_locked(route);
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-	return retry_later;
			
 
				-
			
 
				- failed:
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	route->ksnr_scheduled = 0;
			
 
				-	route->ksnr_connecting = 0;
			
 
				-
			
 
				-	/* This is a retry rather than a new connection */
			
 
				-	route->ksnr_retry_interval *= 2;
			
 
				-	route->ksnr_retry_interval =
			
 
				-		max(route->ksnr_retry_interval,
			
 
				-		    (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000);
			
 
				-	route->ksnr_retry_interval =
			
 
				-		min(route->ksnr_retry_interval,
			
 
				-		    (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000);
			
 
				-
			
 
				-	LASSERT(route->ksnr_retry_interval);
			
 
				-	route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
			
 
				-
			
 
				-	if (!list_empty(&peer->ksnp_tx_queue) &&
			
 
				-	    !peer->ksnp_accepting &&
			
 
				-	    !ksocknal_find_connecting_route_locked(peer)) {
			
 
				-		struct ksock_conn *conn;
			
 
				-
			
 
				-		/*
			
 
				-		 * ksnp_tx_queue is queued on a conn on successful
			
 
				-		 * connection for V1.x and V2.x
			
 
				-		 */
			
 
				-		if (!list_empty(&peer->ksnp_conns)) {
			
 
				-			conn = list_entry(peer->ksnp_conns.next,
			
 
				-					  struct ksock_conn, ksnc_list);
			
 
				-			LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * take all the blocked packets while I've got the lock and
			
 
				-		 * complete below...
			
 
				-		 */
			
 
				-		list_splice_init(&peer->ksnp_tx_queue, &zombies);
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	ksocknal_peer_failed(peer);
			
 
				-	ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * check whether we need to create more connds.
			
 
				- * It will try to create new thread if it's necessary, @timeout can
			
 
				- * be updated if failed to create, so caller wouldn't keep try while
			
 
				- * running out of resource.
			
 
				- */
			
 
				-static int
			
 
				-ksocknal_connd_check_start(time64_t sec, long *timeout)
			
 
				-{
			
 
				-	char name[16];
			
 
				-	int rc;
			
 
				-	int total = ksocknal_data.ksnd_connd_starting +
			
 
				-		    ksocknal_data.ksnd_connd_running;
			
 
				-
			
 
				-	if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
			
 
				-		/* still in initializing */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (total >= *ksocknal_tunables.ksnd_nconnds_max ||
			
 
				-	    total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) {
			
 
				-		/*
			
 
				-		 * can't create more connd, or still have enough
			
 
				-		 * threads to handle more connecting
			
 
				-		 */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (list_empty(&ksocknal_data.ksnd_connd_routes)) {
			
 
				-		/* no pending connecting request */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) {
			
 
				-		/* may run out of resource, retry later */
			
 
				-		*timeout = HZ;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_connd_starting > 0) {
			
 
				-		/* serialize starting to avoid flood */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_data.ksnd_connd_starting_stamp = sec;
			
 
				-	ksocknal_data.ksnd_connd_starting++;
			
 
				-	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-
			
 
				-	/* NB: total is the next id */
			
 
				-	snprintf(name, sizeof(name), "socknal_cd%02d", total);
			
 
				-	rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
			
 
				-
			
 
				-	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				-	if (!rc)
			
 
				-		return 1;
			
 
				-
			
 
				-	/* we tried ... */
			
 
				-	LASSERT(ksocknal_data.ksnd_connd_starting > 0);
			
 
				-	ksocknal_data.ksnd_connd_starting--;
			
 
				-	ksocknal_data.ksnd_connd_failed_stamp = ktime_get_real_seconds();
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * check whether current thread can exit, it will return 1 if there are too
			
 
				- * many threads and no creating in past 120 seconds.
			
 
				- * Also, this function may update @timeout to make caller come back
			
 
				- * again to recheck these conditions.
			
 
				- */
			
 
				-static int
			
 
				-ksocknal_connd_check_stop(time64_t sec, long *timeout)
			
 
				-{
			
 
				-	int val;
			
 
				-
			
 
				-	if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
			
 
				-		/* still in initializing */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_connd_starting > 0) {
			
 
				-		/* in progress of starting new thread */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (ksocknal_data.ksnd_connd_running <=
			
 
				-	    *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* created thread in past 120 seconds? */
			
 
				-	val = (int)(ksocknal_data.ksnd_connd_starting_stamp +
			
 
				-		    SOCKNAL_CONND_TIMEOUT - sec);
			
 
				-
			
 
				-	*timeout = (val > 0) ? val * HZ :
			
 
				-			       SOCKNAL_CONND_TIMEOUT * HZ;
			
 
				-	if (val > 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* no creating in past 120 seconds */
			
 
				-
			
 
				-	return ksocknal_data.ksnd_connd_running >
			
 
				-	       ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Go through connd_routes queue looking for a route that we can process
			
 
				- * right now, @timeout_p can be updated if we need to come back later
			
 
				- */
			
 
				-static struct ksock_route *
			
 
				-ksocknal_connd_get_route_locked(signed long *timeout_p)
			
 
				-{
			
 
				-	struct ksock_route *route;
			
 
				-	unsigned long now;
			
 
				-
			
 
				-	now = jiffies;
			
 
				-
			
 
				-	/* connd_routes can contain both pending and ordinary routes */
			
 
				-	list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes,
			
 
				-			    ksnr_connd_list) {
			
 
				-		if (!route->ksnr_retry_interval ||
			
 
				-		    time_after_eq(now, route->ksnr_timeout))
			
 
				-			return route;
			
 
				-
			
 
				-		if (*timeout_p == MAX_SCHEDULE_TIMEOUT ||
			
 
				-		    (int)*timeout_p > (int)(route->ksnr_timeout - now))
			
 
				-			*timeout_p = (int)(route->ksnr_timeout - now);
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_connd(void *arg)
			
 
				-{
			
 
				-	spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
			
 
				-	struct ksock_connreq *cr;
			
 
				-	wait_queue_entry_t wait;
			
 
				-	int nloops = 0;
			
 
				-	int cons_retry = 0;
			
 
				-
			
 
				-	init_waitqueue_entry(&wait, current);
			
 
				-
			
 
				-	spin_lock_bh(connd_lock);
			
 
				-
			
 
				-	LASSERT(ksocknal_data.ksnd_connd_starting > 0);
			
 
				-	ksocknal_data.ksnd_connd_starting--;
			
 
				-	ksocknal_data.ksnd_connd_running++;
			
 
				-
			
 
				-	while (!ksocknal_data.ksnd_shuttingdown) {
			
 
				-		struct ksock_route *route = NULL;
			
 
				-		time64_t sec = ktime_get_real_seconds();
			
 
				-		long timeout = MAX_SCHEDULE_TIMEOUT;
			
 
				-		int dropped_lock = 0;
			
 
				-
			
 
				-		if (ksocknal_connd_check_stop(sec, &timeout)) {
			
 
				-			/* wakeup another one to check stop */
			
 
				-			wake_up(&ksocknal_data.ksnd_connd_waitq);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (ksocknal_connd_check_start(sec, &timeout)) {
			
 
				-			/* created new thread */
			
 
				-			dropped_lock = 1;
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
			
 
				-			/* Connection accepted by the listener */
			
 
				-			cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
			
 
				-					struct ksock_connreq, ksncr_list);
			
 
				-
			
 
				-			list_del(&cr->ksncr_list);
			
 
				-			spin_unlock_bh(connd_lock);
			
 
				-			dropped_lock = 1;
			
 
				-
			
 
				-			ksocknal_create_conn(cr->ksncr_ni, NULL,
			
 
				-					     cr->ksncr_sock, SOCKLND_CONN_NONE);
			
 
				-			lnet_ni_decref(cr->ksncr_ni);
			
 
				-			kfree(cr);
			
 
				-
			
 
				-			spin_lock_bh(connd_lock);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Only handle an outgoing connection request if there
			
 
				-		 * is a thread left to handle incoming connections and
			
 
				-		 * create new connd
			
 
				-		 */
			
 
				-		if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV <
			
 
				-		    ksocknal_data.ksnd_connd_running) {
			
 
				-			route = ksocknal_connd_get_route_locked(&timeout);
			
 
				-		}
			
 
				-		if (route) {
			
 
				-			list_del(&route->ksnr_connd_list);
			
 
				-			ksocknal_data.ksnd_connd_connecting++;
			
 
				-			spin_unlock_bh(connd_lock);
			
 
				-			dropped_lock = 1;
			
 
				-
			
 
				-			if (ksocknal_connect(route)) {
			
 
				-				/* consecutive retry */
			
 
				-				if (cons_retry++ > SOCKNAL_INSANITY_RECONN) {
			
 
				-					CWARN("massive consecutive re-connecting to %pI4h\n",
			
 
				-					      &route->ksnr_ipaddr);
			
 
				-					cons_retry = 0;
			
 
				-				}
			
 
				-			} else {
			
 
				-				cons_retry = 0;
			
 
				-			}
			
 
				-
			
 
				-			ksocknal_route_decref(route);
			
 
				-
			
 
				-			spin_lock_bh(connd_lock);
			
 
				-			ksocknal_data.ksnd_connd_connecting--;
			
 
				-		}
			
 
				-
			
 
				-		if (dropped_lock) {
			
 
				-			if (++nloops < SOCKNAL_RESCHED)
			
 
				-				continue;
			
 
				-			spin_unlock_bh(connd_lock);
			
 
				-			nloops = 0;
			
 
				-			cond_resched();
			
 
				-			spin_lock_bh(connd_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* Nothing to do for 'timeout'  */
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq,
			
 
				-					 &wait);
			
 
				-		spin_unlock_bh(connd_lock);
			
 
				-
			
 
				-		nloops = 0;
			
 
				-		schedule_timeout(timeout);
			
 
				-
			
 
				-		remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
			
 
				-		spin_lock_bh(connd_lock);
			
 
				-	}
			
 
				-	ksocknal_data.ksnd_connd_running--;
			
 
				-	spin_unlock_bh(connd_lock);
			
 
				-
			
 
				-	ksocknal_thread_fini();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct ksock_conn *
			
 
				-ksocknal_find_timed_out_conn(struct ksock_peer *peer)
			
 
				-{
			
 
				-	/* We're called with a shared lock on ksnd_global_lock */
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct list_head *ctmp;
			
 
				-
			
 
				-	list_for_each(ctmp, &peer->ksnp_conns) {
			
 
				-		int error;
			
 
				-
			
 
				-		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
			
 
				-
			
 
				-		/* Don't need the {get,put}connsock dance to deref ksnc_sock */
			
 
				-		LASSERT(!conn->ksnc_closing);
			
 
				-
			
 
				-		/*
			
 
				-		 * SOCK_ERROR will reset error code of socket in
			
 
				-		 * some platform (like Darwin8.x)
			
 
				-		 */
			
 
				-		error = conn->ksnc_sock->sk->sk_err;
			
 
				-		if (error) {
			
 
				-			ksocknal_conn_addref(conn);
			
 
				-
			
 
				-			switch (error) {
			
 
				-			case ECONNRESET:
			
 
				-				CNETERR("A connection with %s (%pI4h:%d) was reset; it may have rebooted.\n",
			
 
				-					libcfs_id2str(peer->ksnp_id),
			
 
				-					&conn->ksnc_ipaddr,
			
 
				-					conn->ksnc_port);
			
 
				-				break;
			
 
				-			case ETIMEDOUT:
			
 
				-				CNETERR("A connection with %s (%pI4h:%d) timed out; the network or node may be down.\n",
			
 
				-					libcfs_id2str(peer->ksnp_id),
			
 
				-					&conn->ksnc_ipaddr,
			
 
				-					conn->ksnc_port);
			
 
				-				break;
			
 
				-			default:
			
 
				-				CNETERR("An unexpected network error %d occurred with %s (%pI4h:%d\n",
			
 
				-					error,
			
 
				-					libcfs_id2str(peer->ksnp_id),
			
 
				-					&conn->ksnc_ipaddr,
			
 
				-					conn->ksnc_port);
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			return conn;
			
 
				-		}
			
 
				-
			
 
				-		if (conn->ksnc_rx_started &&
			
 
				-		    time_after_eq(jiffies,
			
 
				-				  conn->ksnc_rx_deadline)) {
			
 
				-			/* Timed out incomplete incoming message */
			
 
				-			ksocknal_conn_addref(conn);
			
 
				-			CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n",
			
 
				-				libcfs_id2str(peer->ksnp_id),
			
 
				-				&conn->ksnc_ipaddr,
			
 
				-				conn->ksnc_port,
			
 
				-				conn->ksnc_rx_state,
			
 
				-				iov_iter_count(&conn->ksnc_rx_to),
			
 
				-				conn->ksnc_rx_nob_left);
			
 
				-			return conn;
			
 
				-		}
			
 
				-
			
 
				-		if ((!list_empty(&conn->ksnc_tx_queue) ||
			
 
				-		     conn->ksnc_sock->sk->sk_wmem_queued) &&
			
 
				-		    time_after_eq(jiffies,
			
 
				-				  conn->ksnc_tx_deadline)) {
			
 
				-			/*
			
 
				-			 * Timed out messages queued for sending or
			
 
				-			 * buffered in the socket's send buffer
			
 
				-			 */
			
 
				-			ksocknal_conn_addref(conn);
			
 
				-			CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n",
			
 
				-				libcfs_id2str(peer->ksnp_id),
			
 
				-				&conn->ksnc_ipaddr,
			
 
				-				conn->ksnc_port);
			
 
				-			return conn;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-ksocknal_flush_stale_txs(struct ksock_peer *peer)
			
 
				-{
			
 
				-	struct ksock_tx *tx;
			
 
				-	struct ksock_tx *tmp;
			
 
				-	LIST_HEAD(stale_txs);
			
 
				-
			
 
				-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) {
			
 
				-		if (!time_after_eq(jiffies,
			
 
				-				   tx->tx_deadline))
			
 
				-			break;
			
 
				-
			
 
				-		list_del(&tx->tx_list);
			
 
				-		list_add_tail(&tx->tx_list, &stale_txs);
			
 
				-	}
			
 
				-
			
 
				-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_send_keepalive_locked(struct ksock_peer *peer)
			
 
				-	__must_hold(&ksocknal_data.ksnd_global_lock)
			
 
				-{
			
 
				-	struct ksock_sched *sched;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_tx *tx;
			
 
				-
			
 
				-	/* last_alive will be updated by create_conn */
			
 
				-	if (list_empty(&peer->ksnp_conns))
			
 
				-		return 0;
			
 
				-
			
 
				-	if (peer->ksnp_proto != &ksocknal_protocol_v3x)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
			
 
				-	    time_before(jiffies,
			
 
				-			peer->ksnp_last_alive + *ksocknal_tunables.ksnd_keepalive * HZ))
			
 
				-		return 0;
			
 
				-
			
 
				-	if (time_before(jiffies, peer->ksnp_send_keepalive))
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * retry 10 secs later, so we wouldn't put pressure
			
 
				-	 * on this peer if we failed to send keepalive this time
			
 
				-	 */
			
 
				-	peer->ksnp_send_keepalive = jiffies + 10 * HZ;
			
 
				-
			
 
				-	conn = ksocknal_find_conn_locked(peer, NULL, 1);
			
 
				-	if (conn) {
			
 
				-		sched = conn->ksnc_scheduler;
			
 
				-
			
 
				-		spin_lock_bh(&sched->kss_lock);
			
 
				-		if (!list_empty(&conn->ksnc_tx_queue)) {
			
 
				-			spin_unlock_bh(&sched->kss_lock);
			
 
				-			/* there is an queued ACK, don't need keepalive */
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock_bh(&sched->kss_lock);
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	/* cookie = 1 is reserved for keepalive PING */
			
 
				-	tx = ksocknal_alloc_tx_noop(1, 1);
			
 
				-	if (!tx) {
			
 
				-		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) {
			
 
				-		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_free_tx(tx);
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_check_peer_timeouts(int idx)
			
 
				-{
			
 
				-	struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
			
 
				-	struct ksock_peer *peer;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_tx *tx;
			
 
				-
			
 
				- again:
			
 
				-	/*
			
 
				-	 * NB. We expect to have a look at all the peers and not find any
			
 
				-	 * connections to time out, so we just use a shared lock while we
			
 
				-	 * take a look...
			
 
				-	 */
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	list_for_each_entry(peer, peers, ksnp_list) {
			
 
				-		unsigned long deadline = 0;
			
 
				-		struct ksock_tx *tx_stale;
			
 
				-		int resid = 0;
			
 
				-		int n = 0;
			
 
				-
			
 
				-		if (ksocknal_send_keepalive_locked(peer)) {
			
 
				-			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-			goto again;
			
 
				-		}
			
 
				-
			
 
				-		conn = ksocknal_find_timed_out_conn(peer);
			
 
				-
			
 
				-		if (conn) {
			
 
				-			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-			ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
			
 
				-
			
 
				-			/*
			
 
				-			 * NB we won't find this one again, but we can't
			
 
				-			 * just proceed with the next peer, since we dropped
			
 
				-			 * ksnd_global_lock and it might be dead already!
			
 
				-			 */
			
 
				-			ksocknal_conn_decref(conn);
			
 
				-			goto again;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * we can't process stale txs right here because we're
			
 
				-		 * holding only shared lock
			
 
				-		 */
			
 
				-		if (!list_empty(&peer->ksnp_tx_queue)) {
			
 
				-			tx = list_entry(peer->ksnp_tx_queue.next,
			
 
				-					struct ksock_tx, tx_list);
			
 
				-
			
 
				-			if (time_after_eq(jiffies,
			
 
				-					  tx->tx_deadline)) {
			
 
				-				ksocknal_peer_addref(peer);
			
 
				-				read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-				ksocknal_flush_stale_txs(peer);
			
 
				-
			
 
				-				ksocknal_peer_decref(peer);
			
 
				-				goto again;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if (list_empty(&peer->ksnp_zc_req_list))
			
 
				-			continue;
			
 
				-
			
 
				-		tx_stale = NULL;
			
 
				-		spin_lock(&peer->ksnp_lock);
			
 
				-		list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
			
 
				-			if (!time_after_eq(jiffies,
			
 
				-					   tx->tx_deadline))
			
 
				-				break;
			
 
				-			/* ignore the TX if connection is being closed */
			
 
				-			if (tx->tx_conn->ksnc_closing)
			
 
				-				continue;
			
 
				-			if (!tx_stale)
			
 
				-				tx_stale = tx;
			
 
				-			n++;
			
 
				-		}
			
 
				-
			
 
				-		if (!tx_stale) {
			
 
				-			spin_unlock(&peer->ksnp_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		deadline = tx_stale->tx_deadline;
			
 
				-		resid = tx_stale->tx_resid;
			
 
				-		conn = tx_stale->tx_conn;
			
 
				-		ksocknal_conn_addref(conn);
			
 
				-
			
 
				-		spin_unlock(&peer->ksnp_lock);
			
 
				-		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-		CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n",
			
 
				-		       n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale,
			
 
				-		       (jiffies - deadline) / HZ,
			
 
				-		       resid, conn->ksnc_sock->sk->sk_wmem_queued);
			
 
				-
			
 
				-		ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
			
 
				-		ksocknal_conn_decref(conn);
			
 
				-		goto again;
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_reaper(void *arg)
			
 
				-{
			
 
				-	wait_queue_entry_t wait;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_sched *sched;
			
 
				-	struct list_head enomem_conns;
			
 
				-	int nenomem_conns;
			
 
				-	long timeout;
			
 
				-	int i;
			
 
				-	int peer_index = 0;
			
 
				-	unsigned long deadline = jiffies;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&enomem_conns);
			
 
				-	init_waitqueue_entry(&wait, current);
			
 
				-
			
 
				-	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-	while (!ksocknal_data.ksnd_shuttingdown) {
			
 
				-		if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) {
			
 
				-			conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next,
			
 
				-					  struct ksock_conn, ksnc_list);
			
 
				-			list_del(&conn->ksnc_list);
			
 
				-
			
 
				-			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-			ksocknal_terminate_conn(conn);
			
 
				-			ksocknal_conn_decref(conn);
			
 
				-
			
 
				-			spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) {
			
 
				-			conn = list_entry(ksocknal_data.ksnd_zombie_conns.next,
			
 
				-					  struct ksock_conn, ksnc_list);
			
 
				-			list_del(&conn->ksnc_list);
			
 
				-
			
 
				-			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-			ksocknal_destroy_conn(conn);
			
 
				-
			
 
				-			spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) {
			
 
				-			list_add(&enomem_conns,
			
 
				-				 &ksocknal_data.ksnd_enomem_conns);
			
 
				-			list_del_init(&ksocknal_data.ksnd_enomem_conns);
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-		/* reschedule all the connections that stalled with ENOMEM... */
			
 
				-		nenomem_conns = 0;
			
 
				-		while (!list_empty(&enomem_conns)) {
			
 
				-			conn = list_entry(enomem_conns.next, struct ksock_conn,
			
 
				-					  ksnc_tx_list);
			
 
				-			list_del(&conn->ksnc_tx_list);
			
 
				-
			
 
				-			sched = conn->ksnc_scheduler;
			
 
				-
			
 
				-			spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-			LASSERT(conn->ksnc_tx_scheduled);
			
 
				-			conn->ksnc_tx_ready = 1;
			
 
				-			list_add_tail(&conn->ksnc_tx_list,
			
 
				-				      &sched->kss_tx_conns);
			
 
				-			wake_up(&sched->kss_waitq);
			
 
				-
			
 
				-			spin_unlock_bh(&sched->kss_lock);
			
 
				-			nenomem_conns++;
			
 
				-		}
			
 
				-
			
 
				-		/* careful with the jiffy wrap... */
			
 
				-		while ((timeout = deadline - jiffies) <= 0) {
			
 
				-			const int n = 4;
			
 
				-			const int p = 1;
			
 
				-			int chunk = ksocknal_data.ksnd_peer_hash_size;
			
 
				-
			
 
				-			/*
			
 
				-			 * Time to check for timeouts on a few more peers: I do
			
 
				-			 * checks every 'p' seconds on a proportion of the peer
			
 
				-			 * table and I need to check every connection 'n' times
			
 
				-			 * within a timeout interval, to ensure I detect a
			
 
				-			 * timeout on any connection within (n+1)/n times the
			
 
				-			 * timeout interval.
			
 
				-			 */
			
 
				-			if (*ksocknal_tunables.ksnd_timeout > n * p)
			
 
				-				chunk = (chunk * n * p) /
			
 
				-					*ksocknal_tunables.ksnd_timeout;
			
 
				-			if (!chunk)
			
 
				-				chunk = 1;
			
 
				-
			
 
				-			for (i = 0; i < chunk; i++) {
			
 
				-				ksocknal_check_peer_timeouts(peer_index);
			
 
				-				peer_index = (peer_index + 1) %
			
 
				-					     ksocknal_data.ksnd_peer_hash_size;
			
 
				-			}
			
 
				-
			
 
				-			deadline = deadline + p * HZ;
			
 
				-		}
			
 
				-
			
 
				-		if (nenomem_conns) {
			
 
				-			/*
			
 
				-			 * Reduce my timeout if I rescheduled ENOMEM conns.
			
 
				-			 * This also prevents me getting woken immediately
			
 
				-			 * if any go back on my enomem list.
			
 
				-			 */
			
 
				-			timeout = SOCKNAL_ENOMEM_RETRY;
			
 
				-		}
			
 
				-		ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
			
 
				-
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
			
 
				-
			
 
				-		if (!ksocknal_data.ksnd_shuttingdown &&
			
 
				-		    list_empty(&ksocknal_data.ksnd_deathrow_conns) &&
			
 
				-		    list_empty(&ksocknal_data.ksnd_zombie_conns))
			
 
				-			schedule_timeout(timeout);
			
 
				-
			
 
				-		set_current_state(TASK_RUNNING);
			
 
				-		remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
			
 
				-
			
 
				-		spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				-
			
 
				-	ksocknal_thread_fini();
			
 
				-	return 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -1,534 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#include <linux/highmem.h>
			
 
				-#include "socklnd.h"
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_get_conn_addrs(struct ksock_conn *conn)
			
 
				-{
			
 
				-	int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr,
			
 
				-				   &conn->ksnc_port);
			
 
				-
			
 
				-	/* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
			
 
				-	LASSERT(!conn->ksnc_closing);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d getting sock peer IP\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d getting sock local IP\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_zc_capable(struct ksock_conn *conn)
			
 
				-{
			
 
				-	int caps = conn->ksnc_sock->sk->sk_route_caps;
			
 
				-
			
 
				-	if (conn->ksnc_proto == &ksocknal_protocol_v1x)
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * ZC if the socket supports scatter/gather and doesn't need software
			
 
				-	 * checksums
			
 
				-	 */
			
 
				-	return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK));
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	int nob, i;
			
 
				-
			
 
				-	if (*ksocknal_tunables.ksnd_enable_csum	&& /* checksum enabled */
			
 
				-	    conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection  */
			
 
				-	    tx->tx_nob == tx->tx_resid		 && /* frist sending    */
			
 
				-	    !tx->tx_msg.ksm_csum)		     /* not checksummed  */
			
 
				-		ksocknal_lib_csum_tx(tx);
			
 
				-
			
 
				-	for (nob = i = 0; i < tx->tx_niov; i++)
			
 
				-		nob += tx->tx_iov[i].iov_len;
			
 
				-
			
 
				-	if (!list_empty(&conn->ksnc_tx_queue) ||
			
 
				-	    nob < tx->tx_resid)
			
 
				-		msg.msg_flags |= MSG_MORE;
			
 
				-
			
 
				-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
			
 
				-		      tx->tx_iov, tx->tx_niov, nob);
			
 
				-	return sock_sendmsg(sock, &msg);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
			
 
				-{
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	struct bio_vec *kiov = tx->tx_kiov;
			
 
				-	int rc;
			
 
				-	int nob;
			
 
				-
			
 
				-	/* Not NOOP message */
			
 
				-	LASSERT(tx->tx_lnetmsg);
			
 
				-
			
 
				-	if (tx->tx_msg.ksm_zc_cookies[0]) {
			
 
				-		/* Zero copy is enabled */
			
 
				-		struct sock *sk = sock->sk;
			
 
				-		struct page *page = kiov->bv_page;
			
 
				-		int offset = kiov->bv_offset;
			
 
				-		int fragsize = kiov->bv_len;
			
 
				-		int msgflg = MSG_DONTWAIT;
			
 
				-
			
 
				-		CDEBUG(D_NET, "page %p + offset %x for %d\n",
			
 
				-		       page, offset, kiov->bv_len);
			
 
				-
			
 
				-		if (!list_empty(&conn->ksnc_tx_queue) ||
			
 
				-		    fragsize < tx->tx_resid)
			
 
				-			msgflg |= MSG_MORE;
			
 
				-
			
 
				-		if (sk->sk_prot->sendpage) {
			
 
				-			rc = sk->sk_prot->sendpage(sk, page,
			
 
				-						   offset, fragsize, msgflg);
			
 
				-		} else {
			
 
				-			rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
			
 
				-		}
			
 
				-	} else {
			
 
				-		struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
			
 
				-		int i;
			
 
				-
			
 
				-		for (nob = i = 0; i < tx->tx_nkiov; i++)
			
 
				-			nob += kiov[i].bv_len;
			
 
				-
			
 
				-		if (!list_empty(&conn->ksnc_tx_queue) ||
			
 
				-		    nob < tx->tx_resid)
			
 
				-			msg.msg_flags |= MSG_MORE;
			
 
				-
			
 
				-		iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
			
 
				-			      kiov, tx->tx_nkiov, nob);
			
 
				-		rc = sock_sendmsg(sock, &msg);
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_lib_eager_ack(struct ksock_conn *conn)
			
 
				-{
			
 
				-	int opt = 1;
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-
			
 
				-	/*
			
 
				-	 * Remind the socket to ACK eagerly.  If I don't, the socket might
			
 
				-	 * think I'm about to send something it could piggy-back the ACK
			
 
				-	 * on, introducing delay in completing zero-copy sends in my
			
 
				-	 * peer.
			
 
				-	 */
			
 
				-	kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt,
			
 
				-			  sizeof(opt));
			
 
				-}
			
 
				-
			
 
				-static int lustre_csum(struct kvec *v, void *context)
			
 
				-{
			
 
				-	struct ksock_conn *conn = context;
			
 
				-	conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
			
 
				-				      v->iov_base, v->iov_len);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_recv(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct msghdr msg = { .msg_iter = conn->ksnc_rx_to };
			
 
				-	__u32 saved_csum;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
			
 
				-	if (rc <= 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	saved_csum = conn->ksnc_msg.ksm_csum;
			
 
				-	if (!saved_csum)
			
 
				-		return rc;
			
 
				-
			
 
				-	/* header is included only in V2 - V3 checksums only the bulk data */
			
 
				-	if (!(conn->ksnc_rx_to.type & ITER_BVEC) &&
			
 
				-	     conn->ksnc_proto != &ksocknal_protocol_v2x)
			
 
				-		return rc;
			
 
				-
			
 
				-	/* accumulate checksum */
			
 
				-	conn->ksnc_msg.ksm_csum = 0;
			
 
				-	iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn);
			
 
				-	conn->ksnc_msg.ksm_csum = saved_csum;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_lib_csum_tx(struct ksock_tx *tx)
			
 
				-{
			
 
				-	int i;
			
 
				-	__u32 csum;
			
 
				-	void *base;
			
 
				-
			
 
				-	LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg);
			
 
				-	LASSERT(tx->tx_conn);
			
 
				-	LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
			
 
				-
			
 
				-	tx->tx_msg.ksm_csum = 0;
			
 
				-
			
 
				-	csum = crc32_le(~0, tx->tx_iov[0].iov_base,
			
 
				-			tx->tx_iov[0].iov_len);
			
 
				-
			
 
				-	if (tx->tx_kiov) {
			
 
				-		for (i = 0; i < tx->tx_nkiov; i++) {
			
 
				-			base = kmap(tx->tx_kiov[i].bv_page) +
			
 
				-			       tx->tx_kiov[i].bv_offset;
			
 
				-
			
 
				-			csum = crc32_le(csum, base, tx->tx_kiov[i].bv_len);
			
 
				-
			
 
				-			kunmap(tx->tx_kiov[i].bv_page);
			
 
				-		}
			
 
				-	} else {
			
 
				-		for (i = 1; i < tx->tx_niov; i++)
			
 
				-			csum = crc32_le(csum, tx->tx_iov[i].iov_base,
			
 
				-					tx->tx_iov[i].iov_len);
			
 
				-	}
			
 
				-
			
 
				-	if (*ksocknal_tunables.ksnd_inject_csum_error) {
			
 
				-		csum++;
			
 
				-		*ksocknal_tunables.ksnd_inject_csum_error = 0;
			
 
				-	}
			
 
				-
			
 
				-	tx->tx_msg.ksm_csum = csum;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
			
 
				-			       int *rxmem, int *nagle)
			
 
				-{
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	int len;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = ksocknal_connsock_addref(conn);
			
 
				-	if (rc) {
			
 
				-		LASSERT(conn->ksnc_closing);
			
 
				-		*txmem = *rxmem = *nagle = 0;
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_getbuf(sock, txmem, rxmem);
			
 
				-	if (!rc) {
			
 
				-		len = sizeof(*nagle);
			
 
				-		rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY,
			
 
				-				       (char *)nagle, &len);
			
 
				-	}
			
 
				-
			
 
				-	ksocknal_connsock_decref(conn);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*nagle = !*nagle;
			
 
				-	else
			
 
				-		*txmem = *rxmem = *nagle = 0;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_setup_sock(struct socket *sock)
			
 
				-{
			
 
				-	int rc;
			
 
				-	int option;
			
 
				-	int keep_idle;
			
 
				-	int keep_intvl;
			
 
				-	int keep_count;
			
 
				-	int do_keepalive;
			
 
				-	struct linger linger;
			
 
				-
			
 
				-	sock->sk->sk_allocation = GFP_NOFS;
			
 
				-
			
 
				-	/*
			
 
				-	 * Ensure this socket aborts active sends immediately when we close
			
 
				-	 * it.
			
 
				-	 */
			
 
				-	linger.l_onoff = 0;
			
 
				-	linger.l_linger = 0;
			
 
				-
			
 
				-	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger,
			
 
				-			       sizeof(linger));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set SO_LINGER: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	option = -1;
			
 
				-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option,
			
 
				-			       sizeof(option));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set SO_LINGER2: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!*ksocknal_tunables.ksnd_nagle) {
			
 
				-		option = 1;
			
 
				-
			
 
				-		rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
			
 
				-				       (char *)&option, sizeof(option));
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't disable nagle: %d\n", rc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size,
			
 
				-			      *ksocknal_tunables.ksnd_rx_buffer_size);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set buffer tx %d, rx %d buffers: %d\n",
			
 
				-		       *ksocknal_tunables.ksnd_tx_buffer_size,
			
 
				-		       *ksocknal_tunables.ksnd_rx_buffer_size, rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
			
 
				-
			
 
				-	/* snapshot tunables */
			
 
				-	keep_idle  = *ksocknal_tunables.ksnd_keepalive_idle;
			
 
				-	keep_count = *ksocknal_tunables.ksnd_keepalive_count;
			
 
				-	keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
			
 
				-
			
 
				-	do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
			
 
				-
			
 
				-	option = (do_keepalive ? 1 : 0);
			
 
				-	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option,
			
 
				-			       sizeof(option));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set SO_KEEPALIVE: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!do_keepalive)
			
 
				-		return 0;
			
 
				-
			
 
				-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle,
			
 
				-			       sizeof(keep_idle));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set TCP_KEEPIDLE: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
			
 
				-			       (char *)&keep_intvl, sizeof(keep_intvl));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set TCP_KEEPINTVL: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count,
			
 
				-			       sizeof(keep_count));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set TCP_KEEPCNT: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_lib_push_conn(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct sock *sk;
			
 
				-	struct tcp_sock *tp;
			
 
				-	int nonagle;
			
 
				-	int val = 1;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = ksocknal_connsock_addref(conn);
			
 
				-	if (rc)			    /* being shut down */
			
 
				-		return;
			
 
				-
			
 
				-	sk = conn->ksnc_sock->sk;
			
 
				-	tp = tcp_sk(sk);
			
 
				-
			
 
				-	lock_sock(sk);
			
 
				-	nonagle = tp->nonagle;
			
 
				-	tp->nonagle = 1;
			
 
				-	release_sock(sk);
			
 
				-
			
 
				-	rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY,
			
 
				-			       (char *)&val, sizeof(val));
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	lock_sock(sk);
			
 
				-	tp->nonagle = nonagle;
			
 
				-	release_sock(sk);
			
 
				-
			
 
				-	ksocknal_connsock_decref(conn);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * socket call back in Linux
			
 
				- */
			
 
				-static void
			
 
				-ksocknal_data_ready(struct sock *sk)
			
 
				-{
			
 
				-	struct ksock_conn *conn;
			
 
				-
			
 
				-	/* interleave correctly with closing sockets... */
			
 
				-	LASSERT(!in_irq());
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	conn = sk->sk_user_data;
			
 
				-	if (!conn) {	     /* raced with ksocknal_terminate_conn */
			
 
				-		LASSERT(sk->sk_data_ready != &ksocknal_data_ready);
			
 
				-		sk->sk_data_ready(sk);
			
 
				-	} else {
			
 
				-		ksocknal_read_callback(conn);
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_write_space(struct sock *sk)
			
 
				-{
			
 
				-	struct ksock_conn *conn;
			
 
				-	int wspace;
			
 
				-	int min_wpace;
			
 
				-
			
 
				-	/* interleave correctly with closing sockets... */
			
 
				-	LASSERT(!in_irq());
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	conn = sk->sk_user_data;
			
 
				-	wspace = sk_stream_wspace(sk);
			
 
				-	min_wpace = sk_stream_min_wspace(sk);
			
 
				-
			
 
				-	CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
			
 
				-	       sk, wspace, min_wpace, conn,
			
 
				-	       !conn ? "" : (conn->ksnc_tx_ready ?
			
 
				-				      " ready" : " blocked"),
			
 
				-	       !conn ? "" : (conn->ksnc_tx_scheduled ?
			
 
				-				      " scheduled" : " idle"),
			
 
				-	       !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ?
			
 
				-				      " empty" : " queued"));
			
 
				-
			
 
				-	if (!conn) {	     /* raced with ksocknal_terminate_conn */
			
 
				-		LASSERT(sk->sk_write_space != &ksocknal_write_space);
			
 
				-		sk->sk_write_space(sk);
			
 
				-
			
 
				-		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (wspace >= min_wpace) {	      /* got enough space */
			
 
				-		ksocknal_write_callback(conn);
			
 
				-
			
 
				-		/*
			
 
				-		 * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
			
 
				-		 * ENOMEM check in ksocknal_transmit is race-free (think about
			
 
				-		 * it).
			
 
				-		 */
			
 
				-		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn)
			
 
				-{
			
 
				-	conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
			
 
				-	conn->ksnc_saved_write_space = sock->sk->sk_write_space;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_lib_set_callback(struct socket *sock,  struct ksock_conn *conn)
			
 
				-{
			
 
				-	sock->sk->sk_user_data = conn;
			
 
				-	sock->sk->sk_data_ready = ksocknal_data_ready;
			
 
				-	sock->sk->sk_write_space = ksocknal_write_space;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Remove conn's network callbacks.
			
 
				-	 * NB I _have_ to restore the callback, rather than storing a noop,
			
 
				-	 * since the socket could survive past this module being unloaded!!
			
 
				-	 */
			
 
				-	sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
			
 
				-	sock->sk->sk_write_space = conn->ksnc_saved_write_space;
			
 
				-
			
 
				-	/*
			
 
				-	 * A callback could be in progress already; they hold a read lock
			
 
				-	 * on ksnd_global_lock (to serialise with me) and NOOP if
			
 
				-	 * sk_user_data is NULL.
			
 
				-	 */
			
 
				-	sock->sk->sk_user_data = NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-ksocknal_lib_memory_pressure(struct ksock_conn *conn)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	struct ksock_sched *sched;
			
 
				-
			
 
				-	sched = conn->ksnc_scheduler;
			
 
				-	spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
			
 
				-	    !conn->ksnc_tx_ready) {
			
 
				-		/*
			
 
				-		 * SOCK_NOSPACE is set when the socket fills
			
 
				-		 * and cleared in the write_space callback
			
 
				-		 * (which also sets ksnc_tx_ready).  If
			
 
				-		 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
			
 
				-		 * zero, I didn't fill the socket and
			
 
				-		 * write_space won't reschedule me, so I
			
 
				-		 * return -ENOMEM to get my caller to retry
			
 
				-		 * after a timeout
			
 
				-		 */
			
 
				-		rc = -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_bh(&sched->kss_lock);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
@@ -1,184 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- *
			
 
				- *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#include "socklnd.h"
			
 
				-
			
 
				-static int sock_timeout = 50;
			
 
				-module_param(sock_timeout, int, 0644);
			
 
				-MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)");
			
 
				-
			
 
				-static int credits = 256;
			
 
				-module_param(credits, int, 0444);
			
 
				-MODULE_PARM_DESC(credits, "# concurrent sends");
			
 
				-
			
 
				-static int peer_credits = 8;
			
 
				-module_param(peer_credits, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
			
 
				-
			
 
				-static int peer_buffer_credits;
			
 
				-module_param(peer_buffer_credits, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
			
 
				-
			
 
				-static int peer_timeout = 180;
			
 
				-module_param(peer_timeout, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
			
 
				-
			
 
				-/*
			
 
				- * Number of daemons in each thread pool which is percpt,
			
 
				- * we will estimate reasonable value based on CPUs if it's not set.
			
 
				- */
			
 
				-static unsigned int nscheds;
			
 
				-module_param(nscheds, int, 0444);
			
 
				-MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting");
			
 
				-
			
 
				-static int nconnds = 4;
			
 
				-module_param(nconnds, int, 0444);
			
 
				-MODULE_PARM_DESC(nconnds, "# connection daemons while starting");
			
 
				-
			
 
				-static int nconnds_max = 64;
			
 
				-module_param(nconnds_max, int, 0444);
			
 
				-MODULE_PARM_DESC(nconnds_max, "max # connection daemons");
			
 
				-
			
 
				-static int min_reconnectms = 1000;
			
 
				-module_param(min_reconnectms, int, 0644);
			
 
				-MODULE_PARM_DESC(min_reconnectms, "min connection retry interval (mS)");
			
 
				-
			
 
				-static int max_reconnectms = 60000;
			
 
				-module_param(max_reconnectms, int, 0644);
			
 
				-MODULE_PARM_DESC(max_reconnectms, "max connection retry interval (mS)");
			
 
				-
			
 
				-# define DEFAULT_EAGER_ACK 0
			
 
				-static int eager_ack = DEFAULT_EAGER_ACK;
			
 
				-module_param(eager_ack, int, 0644);
			
 
				-MODULE_PARM_DESC(eager_ack, "send tcp ack packets eagerly");
			
 
				-
			
 
				-static int typed_conns = 1;
			
 
				-module_param(typed_conns, int, 0444);
			
 
				-MODULE_PARM_DESC(typed_conns, "use different sockets for bulk");
			
 
				-
			
 
				-static int min_bulk = 1 << 10;
			
 
				-module_param(min_bulk, int, 0644);
			
 
				-MODULE_PARM_DESC(min_bulk, "smallest 'large' message");
			
 
				-
			
 
				-# define DEFAULT_BUFFER_SIZE 0
			
 
				-static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
			
 
				-module_param(tx_buffer_size, int, 0644);
			
 
				-MODULE_PARM_DESC(tx_buffer_size, "socket tx buffer size (0 for system default)");
			
 
				-
			
 
				-static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
			
 
				-module_param(rx_buffer_size, int, 0644);
			
 
				-MODULE_PARM_DESC(rx_buffer_size, "socket rx buffer size (0 for system default)");
			
 
				-
			
 
				-static int nagle;
			
 
				-module_param(nagle, int, 0644);
			
 
				-MODULE_PARM_DESC(nagle, "enable NAGLE?");
			
 
				-
			
 
				-static int round_robin = 1;
			
 
				-module_param(round_robin, int, 0644);
			
 
				-MODULE_PARM_DESC(round_robin, "Round robin for multiple interfaces");
			
 
				-
			
 
				-static int keepalive = 30;
			
 
				-module_param(keepalive, int, 0644);
			
 
				-MODULE_PARM_DESC(keepalive, "# seconds before send keepalive");
			
 
				-
			
 
				-static int keepalive_idle = 30;
			
 
				-module_param(keepalive_idle, int, 0644);
			
 
				-MODULE_PARM_DESC(keepalive_idle, "# idle seconds before probe");
			
 
				-
			
 
				-#define DEFAULT_KEEPALIVE_COUNT  5
			
 
				-static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
			
 
				-module_param(keepalive_count, int, 0644);
			
 
				-MODULE_PARM_DESC(keepalive_count, "# missed probes == dead");
			
 
				-
			
 
				-static int keepalive_intvl = 5;
			
 
				-module_param(keepalive_intvl, int, 0644);
			
 
				-MODULE_PARM_DESC(keepalive_intvl, "seconds between probes");
			
 
				-
			
 
				-static int enable_csum;
			
 
				-module_param(enable_csum, int, 0644);
			
 
				-MODULE_PARM_DESC(enable_csum, "enable check sum");
			
 
				-
			
 
				-static int inject_csum_error;
			
 
				-module_param(inject_csum_error, int, 0644);
			
 
				-MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error");
			
 
				-
			
 
				-static int nonblk_zcack = 1;
			
 
				-module_param(nonblk_zcack, int, 0644);
			
 
				-MODULE_PARM_DESC(nonblk_zcack, "always send ZC-ACK on non-blocking connection");
			
 
				-
			
 
				-static unsigned int zc_min_payload = 16 << 10;
			
 
				-module_param(zc_min_payload, int, 0644);
			
 
				-MODULE_PARM_DESC(zc_min_payload, "minimum payload size to zero copy");
			
 
				-
			
 
				-static unsigned int zc_recv;
			
 
				-module_param(zc_recv, int, 0644);
			
 
				-MODULE_PARM_DESC(zc_recv, "enable ZC recv for Chelsio driver");
			
 
				-
			
 
				-static unsigned int zc_recv_min_nfrags = 16;
			
 
				-module_param(zc_recv_min_nfrags, int, 0644);
			
 
				-MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv");
			
 
				-
			
 
				-#if SOCKNAL_VERSION_DEBUG
			
 
				-static int protocol = 3;
			
 
				-module_param(protocol, int, 0644);
			
 
				-MODULE_PARM_DESC(protocol, "protocol version");
			
 
				-#endif
			
 
				-
			
 
				-struct ksock_tunables ksocknal_tunables;
			
 
				-
			
 
				-int ksocknal_tunables_init(void)
			
 
				-{
			
 
				-	/* initialize ksocknal_tunables structure */
			
 
				-	ksocknal_tunables.ksnd_timeout            = &sock_timeout;
			
 
				-	ksocknal_tunables.ksnd_nscheds            = &nscheds;
			
 
				-	ksocknal_tunables.ksnd_nconnds            = &nconnds;
			
 
				-	ksocknal_tunables.ksnd_nconnds_max        = &nconnds_max;
			
 
				-	ksocknal_tunables.ksnd_min_reconnectms    = &min_reconnectms;
			
 
				-	ksocknal_tunables.ksnd_max_reconnectms    = &max_reconnectms;
			
 
				-	ksocknal_tunables.ksnd_eager_ack          = &eager_ack;
			
 
				-	ksocknal_tunables.ksnd_typed_conns        = &typed_conns;
			
 
				-	ksocknal_tunables.ksnd_min_bulk           = &min_bulk;
			
 
				-	ksocknal_tunables.ksnd_tx_buffer_size     = &tx_buffer_size;
			
 
				-	ksocknal_tunables.ksnd_rx_buffer_size     = &rx_buffer_size;
			
 
				-	ksocknal_tunables.ksnd_nagle              = &nagle;
			
 
				-	ksocknal_tunables.ksnd_round_robin        = &round_robin;
			
 
				-	ksocknal_tunables.ksnd_keepalive          = &keepalive;
			
 
				-	ksocknal_tunables.ksnd_keepalive_idle     = &keepalive_idle;
			
 
				-	ksocknal_tunables.ksnd_keepalive_count    = &keepalive_count;
			
 
				-	ksocknal_tunables.ksnd_keepalive_intvl    = &keepalive_intvl;
			
 
				-	ksocknal_tunables.ksnd_credits            = &credits;
			
 
				-	ksocknal_tunables.ksnd_peertxcredits      = &peer_credits;
			
 
				-	ksocknal_tunables.ksnd_peerrtrcredits     = &peer_buffer_credits;
			
 
				-	ksocknal_tunables.ksnd_peertimeout        = &peer_timeout;
			
 
				-	ksocknal_tunables.ksnd_enable_csum        = &enable_csum;
			
 
				-	ksocknal_tunables.ksnd_inject_csum_error  = &inject_csum_error;
			
 
				-	ksocknal_tunables.ksnd_nonblk_zcack       = &nonblk_zcack;
			
 
				-	ksocknal_tunables.ksnd_zc_min_payload     = &zc_min_payload;
			
 
				-	ksocknal_tunables.ksnd_zc_recv            = &zc_recv;
			
 
				-	ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
			
 
				-
			
 
				-#if SOCKNAL_VERSION_DEBUG
			
 
				-	ksocknal_tunables.ksnd_protocol           = &protocol;
			
 
				-#endif
			
 
				-
			
 
				-	if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
			
 
				-		*ksocknal_tunables.ksnd_zc_min_payload = 2 << 10;
			
 
				-
			
 
				-	return 0;
			
 
				-};
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
@@ -1,810 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- *
			
 
				- *   Author: Zach Brown <zab@zabbo.net>
			
 
				- *   Author: Peter J. Braam <braam@clusterfs.com>
			
 
				- *   Author: Phil Schwan <phil@clusterfs.com>
			
 
				- *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				- *
			
 
				- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#include "socklnd.h"
			
 
				-
			
 
				-/*
			
 
				- * Protocol entries :
			
 
				- *   pro_send_hello       : send hello message
			
 
				- *   pro_recv_hello       : receive hello message
			
 
				- *   pro_pack	     : pack message header
			
 
				- *   pro_unpack	   : unpack message header
			
 
				- *   pro_queue_tx_zcack() : Called holding BH lock: kss_lock
			
 
				- *			  return 1 if ACK is piggybacked, otherwise return 0
			
 
				- *   pro_queue_tx_msg()   : Called holding BH lock: kss_lock
			
 
				- *			  return the ACK that piggybacked by my message, or NULL
			
 
				- *   pro_handle_zcreq()   : handler of incoming ZC-REQ
			
 
				- *   pro_handle_zcack()   : handler of incoming ZC-ACK
			
 
				- *   pro_match_tx()       : Called holding glock
			
 
				- */
			
 
				-
			
 
				-static struct ksock_tx *
			
 
				-ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
			
 
				-{
			
 
				-	/* V1.x, just enqueue it */
			
 
				-	list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-ksocknal_next_tx_carrier(struct ksock_conn *conn)
			
 
				-{
			
 
				-	struct ksock_tx *tx = conn->ksnc_tx_carrier;
			
 
				-
			
 
				-	/* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
			
 
				-	LASSERT(!list_empty(&conn->ksnc_tx_queue));
			
 
				-	LASSERT(tx);
			
 
				-
			
 
				-	/* Next TX that can carry ZC-ACK or LNet message */
			
 
				-	if (tx->tx_list.next == &conn->ksnc_tx_queue) {
			
 
				-		/* no more packets queued */
			
 
				-		conn->ksnc_tx_carrier = NULL;
			
 
				-	} else {
			
 
				-		conn->ksnc_tx_carrier = list_next_entry(tx, tx_list);
			
 
				-		LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
			
 
				-			   struct ksock_tx *tx_ack, __u64 cookie)
			
 
				-{
			
 
				-	struct ksock_tx *tx = conn->ksnc_tx_carrier;
			
 
				-
			
 
				-	LASSERT(!tx_ack ||
			
 
				-		tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				-
			
 
				-	/*
			
 
				-	 * Enqueue or piggyback tx_ack / cookie
			
 
				-	 * . no tx can piggyback cookie of tx_ack (or cookie), just
			
 
				-	 *   enqueue the tx_ack (if tx_ack != NUL) and return NULL.
			
 
				-	 * . There is tx can piggyback cookie of tx_ack (or cookie),
			
 
				-	 *   piggyback the cookie and return the tx.
			
 
				-	 */
			
 
				-	if (!tx) {
			
 
				-		if (tx_ack) {
			
 
				-			list_add_tail(&tx_ack->tx_list,
			
 
				-				      &conn->ksnc_tx_queue);
			
 
				-			conn->ksnc_tx_carrier = tx_ack;
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
			
 
				-		/* tx is noop zc-ack, can't piggyback zc-ack cookie */
			
 
				-		if (tx_ack)
			
 
				-			list_add_tail(&tx_ack->tx_list,
			
 
				-				      &conn->ksnc_tx_queue);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
			
 
				-	LASSERT(!tx->tx_msg.ksm_zc_cookies[1]);
			
 
				-
			
 
				-	if (tx_ack)
			
 
				-		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
			
 
				-
			
 
				-	/* piggyback the zc-ack cookie */
			
 
				-	tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				-	/* move on to the next TX which can carry cookie */
			
 
				-	ksocknal_next_tx_carrier(conn);
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static struct ksock_tx *
			
 
				-ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
			
 
				-{
			
 
				-	struct ksock_tx *tx  = conn->ksnc_tx_carrier;
			
 
				-
			
 
				-	/*
			
 
				-	 * Enqueue tx_msg:
			
 
				-	 * . If there is no NOOP on the connection, just enqueue
			
 
				-	 *   tx_msg and return NULL
			
 
				-	 * . If there is NOOP on the connection, piggyback the cookie
			
 
				-	 *   and replace the NOOP tx, and return the NOOP tx.
			
 
				-	 */
			
 
				-	if (!tx) { /* nothing on queue */
			
 
				-		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
			
 
				-		conn->ksnc_tx_carrier = tx_msg;
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
			
 
				-		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				-
			
 
				-	/* There is a noop zc-ack can be piggybacked */
			
 
				-	tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
			
 
				-	ksocknal_next_tx_carrier(conn);
			
 
				-
			
 
				-	/* use new_tx to replace the noop zc-ack packet */
			
 
				-	list_add(&tx_msg->tx_list, &tx->tx_list);
			
 
				-	list_del(&tx->tx_list);
			
 
				-
			
 
				-	return tx;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
			
 
				-			   struct ksock_tx *tx_ack, __u64 cookie)
			
 
				-{
			
 
				-	struct ksock_tx *tx;
			
 
				-
			
 
				-	if (conn->ksnc_type != SOCKLND_CONN_ACK)
			
 
				-		return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
			
 
				-
			
 
				-	/* non-blocking ZC-ACK (to router) */
			
 
				-	LASSERT(!tx_ack ||
			
 
				-		tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				-
			
 
				-	tx = conn->ksnc_tx_carrier;
			
 
				-	if (!tx) {
			
 
				-		if (tx_ack) {
			
 
				-			list_add_tail(&tx_ack->tx_list,
			
 
				-				      &conn->ksnc_tx_queue);
			
 
				-			conn->ksnc_tx_carrier = tx_ack;
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* conn->ksnc_tx_carrier */
			
 
				-
			
 
				-	if (tx_ack)
			
 
				-		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
			
 
				-
			
 
				-	if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
			
 
				-		return 1;
			
 
				-
			
 
				-	if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
			
 
				-		/* replace the keepalive PING with a real ACK */
			
 
				-		LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
			
 
				-		tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
			
 
				-	    cookie == tx->tx_msg.ksm_zc_cookies[1]) {
			
 
				-		CWARN("%s: duplicated ZC cookie: %llu\n",
			
 
				-		      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
			
 
				-		return 1; /* XXX return error in the future */
			
 
				-	}
			
 
				-
			
 
				-	if (!tx->tx_msg.ksm_zc_cookies[0]) {
			
 
				-		/*
			
 
				-		 * NOOP tx has only one ZC-ACK cookie,
			
 
				-		 * can carry at least one more
			
 
				-		 */
			
 
				-		if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
			
 
				-			tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
			
 
				-			tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				-		} else {
			
 
				-			tx->tx_msg.ksm_zc_cookies[0] = cookie;
			
 
				-		}
			
 
				-
			
 
				-		if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
			
 
				-			/*
			
 
				-			 * not likely to carry more ACKs, skip it
			
 
				-			 * to simplify logic
			
 
				-			 */
			
 
				-			ksocknal_next_tx_carrier(conn);
			
 
				-		}
			
 
				-
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	/* takes two or more cookies already */
			
 
				-
			
 
				-	if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
			
 
				-		__u64   tmp = 0;
			
 
				-
			
 
				-		/* two separated cookies: (a+2, a) or (a+1, a) */
			
 
				-		LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
			
 
				-			 tx->tx_msg.ksm_zc_cookies[1] <= 2);
			
 
				-
			
 
				-		if (tx->tx_msg.ksm_zc_cookies[0] -
			
 
				-		    tx->tx_msg.ksm_zc_cookies[1] == 2) {
			
 
				-			if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
			
 
				-				tmp = cookie;
			
 
				-		} else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
			
 
				-			tmp = tx->tx_msg.ksm_zc_cookies[1];
			
 
				-		} else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
			
 
				-			tmp = tx->tx_msg.ksm_zc_cookies[0];
			
 
				-		}
			
 
				-
			
 
				-		if (tmp) {
			
 
				-			/* range of cookies */
			
 
				-			tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
			
 
				-			tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
			
 
				-			return 1;
			
 
				-		}
			
 
				-
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * ksm_zc_cookies[0] < ksm_zc_cookies[1],
			
 
				-		 * it is range of cookies
			
 
				-		 */
			
 
				-		if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
			
 
				-		    cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
			
 
				-			CWARN("%s: duplicated ZC cookie: %llu\n",
			
 
				-			      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
			
 
				-			return 1; /* XXX: return error in the future */
			
 
				-		}
			
 
				-
			
 
				-		if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
			
 
				-			tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				-			return 1;
			
 
				-		}
			
 
				-
			
 
				-		if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
			
 
				-			tx->tx_msg.ksm_zc_cookies[0] = cookie;
			
 
				-			return 1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* failed to piggyback ZC-ACK */
			
 
				-	if (tx_ack) {
			
 
				-		list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
			
 
				-		/* the next tx can piggyback at least 1 ACK */
			
 
				-		ksocknal_next_tx_carrier(conn);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
			
 
				-{
			
 
				-	int nob;
			
 
				-
			
 
				-#if SOCKNAL_VERSION_DEBUG
			
 
				-	if (!*ksocknal_tunables.ksnd_typed_conns)
			
 
				-		return SOCKNAL_MATCH_YES;
			
 
				-#endif
			
 
				-
			
 
				-	if (!tx || !tx->tx_lnetmsg) {
			
 
				-		/* noop packet */
			
 
				-		nob = offsetof(struct ksock_msg, ksm_u);
			
 
				-	} else {
			
 
				-		nob = tx->tx_lnetmsg->msg_len +
			
 
				-		      ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
			
 
				-		       sizeof(struct lnet_hdr) : sizeof(struct ksock_msg));
			
 
				-	}
			
 
				-
			
 
				-	/* default checking for typed connection */
			
 
				-	switch (conn->ksnc_type) {
			
 
				-	default:
			
 
				-		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
			
 
				-		LBUG();
			
 
				-	case SOCKLND_CONN_ANY:
			
 
				-		return SOCKNAL_MATCH_YES;
			
 
				-
			
 
				-	case SOCKLND_CONN_BULK_IN:
			
 
				-		return SOCKNAL_MATCH_MAY;
			
 
				-
			
 
				-	case SOCKLND_CONN_BULK_OUT:
			
 
				-		if (nob < *ksocknal_tunables.ksnd_min_bulk)
			
 
				-			return SOCKNAL_MATCH_MAY;
			
 
				-		else
			
 
				-			return SOCKNAL_MATCH_YES;
			
 
				-
			
 
				-	case SOCKLND_CONN_CONTROL:
			
 
				-		if (nob >= *ksocknal_tunables.ksnd_min_bulk)
			
 
				-			return SOCKNAL_MATCH_MAY;
			
 
				-		else
			
 
				-			return SOCKNAL_MATCH_YES;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
			
 
				-{
			
 
				-	int nob;
			
 
				-
			
 
				-	if (!tx || !tx->tx_lnetmsg)
			
 
				-		nob = offsetof(struct ksock_msg, ksm_u);
			
 
				-	else
			
 
				-		nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg);
			
 
				-
			
 
				-	switch (conn->ksnc_type) {
			
 
				-	default:
			
 
				-		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
			
 
				-		LBUG();
			
 
				-	case SOCKLND_CONN_ANY:
			
 
				-		return SOCKNAL_MATCH_NO;
			
 
				-
			
 
				-	case SOCKLND_CONN_ACK:
			
 
				-		if (nonblk)
			
 
				-			return SOCKNAL_MATCH_YES;
			
 
				-		else if (!tx || !tx->tx_lnetmsg)
			
 
				-			return SOCKNAL_MATCH_MAY;
			
 
				-		else
			
 
				-			return SOCKNAL_MATCH_NO;
			
 
				-
			
 
				-	case SOCKLND_CONN_BULK_OUT:
			
 
				-		if (nonblk)
			
 
				-			return SOCKNAL_MATCH_NO;
			
 
				-		else if (nob < *ksocknal_tunables.ksnd_min_bulk)
			
 
				-			return SOCKNAL_MATCH_MAY;
			
 
				-		else
			
 
				-			return SOCKNAL_MATCH_YES;
			
 
				-
			
 
				-	case SOCKLND_CONN_CONTROL:
			
 
				-		if (nonblk)
			
 
				-			return SOCKNAL_MATCH_NO;
			
 
				-		else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
			
 
				-			return SOCKNAL_MATCH_MAY;
			
 
				-		else
			
 
				-			return SOCKNAL_MATCH_YES;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* (Sink) handle incoming ZC request from sender */
			
 
				-static int
			
 
				-ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
			
 
				-{
			
 
				-	struct ksock_peer *peer = c->ksnc_peer;
			
 
				-	struct ksock_conn *conn;
			
 
				-	struct ksock_tx *tx;
			
 
				-	int rc;
			
 
				-
			
 
				-	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
			
 
				-	if (conn) {
			
 
				-		struct ksock_sched *sched = conn->ksnc_scheduler;
			
 
				-
			
 
				-		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
			
 
				-
			
 
				-		spin_lock_bh(&sched->kss_lock);
			
 
				-
			
 
				-		rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
			
 
				-
			
 
				-		spin_unlock_bh(&sched->kss_lock);
			
 
				-
			
 
				-		if (rc) { /* piggybacked */
			
 
				-			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				-
			
 
				-	/* ACK connection is not ready, or can't piggyback the ACK */
			
 
				-	tx = ksocknal_alloc_tx_noop(cookie, !!remote);
			
 
				-	if (!tx)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
			
 
				-	if (!rc)
			
 
				-		return 0;
			
 
				-
			
 
				-	ksocknal_free_tx(tx);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/* (Sender) handle ZC_ACK from sink */
			
 
				-static int
			
 
				-ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
			
 
				-{
			
 
				-	struct ksock_peer *peer = conn->ksnc_peer;
			
 
				-	struct ksock_tx *tx;
			
 
				-	struct ksock_tx *temp;
			
 
				-	struct ksock_tx *tmp;
			
 
				-	LIST_HEAD(zlist);
			
 
				-	int count;
			
 
				-
			
 
				-	if (!cookie1)
			
 
				-		cookie1 = cookie2;
			
 
				-
			
 
				-	count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
			
 
				-
			
 
				-	if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
			
 
				-	    conn->ksnc_proto == &ksocknal_protocol_v3x) {
			
 
				-		/* keepalive PING for V3.x, just ignore it */
			
 
				-		return count == 1 ? 0 : -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&peer->ksnp_lock);
			
 
				-
			
 
				-	list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list,
			
 
				-				 tx_zc_list) {
			
 
				-		__u64 c = tx->tx_msg.ksm_zc_cookies[0];
			
 
				-
			
 
				-		if (c == cookie1 || c == cookie2 ||
			
 
				-		    (cookie1 < c && c < cookie2)) {
			
 
				-			tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				-			list_del(&tx->tx_zc_list);
			
 
				-			list_add(&tx->tx_zc_list, &zlist);
			
 
				-
			
 
				-			if (!--count)
			
 
				-				break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&peer->ksnp_lock);
			
 
				-
			
 
				-	list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
			
 
				-		list_del(&tx->tx_zc_list);
			
 
				-		ksocknal_tx_decref(tx);
			
 
				-	}
			
 
				-
			
 
				-	return !count ? 0 : -EPROTO;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello)
			
 
				-{
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	struct lnet_hdr *hdr;
			
 
				-	struct lnet_magicversion *hmv;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	BUILD_BUG_ON(sizeof(struct lnet_magicversion) != offsetof(struct lnet_hdr, src_nid));
			
 
				-
			
 
				-	hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
			
 
				-	if (!hdr) {
			
 
				-		CERROR("Can't allocate struct lnet_hdr\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	hmv = (struct lnet_magicversion *)&hdr->dest_nid;
			
 
				-
			
 
				-	/*
			
 
				-	 * Re-organize V2.x message header to V1.x (struct lnet_hdr)
			
 
				-	 * header and send out
			
 
				-	 */
			
 
				-	hmv->magic         = cpu_to_le32(LNET_PROTO_TCP_MAGIC);
			
 
				-	hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR);
			
 
				-	hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR);
			
 
				-
			
 
				-	if (the_lnet.ln_testprotocompat) {
			
 
				-		/* single-shot proto check */
			
 
				-		LNET_LOCK();
			
 
				-		if (the_lnet.ln_testprotocompat & 1) {
			
 
				-			hmv->version_major++;   /* just different! */
			
 
				-			the_lnet.ln_testprotocompat &= ~1;
			
 
				-		}
			
 
				-		if (the_lnet.ln_testprotocompat & 2) {
			
 
				-			hmv->magic = LNET_PROTO_MAGIC;
			
 
				-			the_lnet.ln_testprotocompat &= ~2;
			
 
				-		}
			
 
				-		LNET_UNLOCK();
			
 
				-	}
			
 
				-
			
 
				-	hdr->src_nid = cpu_to_le64(hello->kshm_src_nid);
			
 
				-	hdr->src_pid = cpu_to_le32(hello->kshm_src_pid);
			
 
				-	hdr->type = cpu_to_le32(LNET_MSG_HELLO);
			
 
				-	hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32));
			
 
				-	hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype);
			
 
				-	hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation);
			
 
				-
			
 
				-	rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
			
 
				-	if (rc) {
			
 
				-		CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
			
 
				-			rc, &conn->ksnc_ipaddr, conn->ksnc_port);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (!hello->kshm_nips)
			
 
				-		goto out;
			
 
				-
			
 
				-	for (i = 0; i < (int)hello->kshm_nips; i++)
			
 
				-		hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
			
 
				-
			
 
				-	rc = lnet_sock_write(sock, hello->kshm_ips,
			
 
				-			     hello->kshm_nips * sizeof(__u32),
			
 
				-			     lnet_acceptor_timeout());
			
 
				-	if (rc) {
			
 
				-		CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
			
 
				-			rc, hello->kshm_nips,
			
 
				-			&conn->ksnc_ipaddr, conn->ksnc_port);
			
 
				-	}
			
 
				-out:
			
 
				-	kfree(hdr);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello)
			
 
				-{
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	int rc;
			
 
				-
			
 
				-	hello->kshm_magic   = LNET_PROTO_MAGIC;
			
 
				-	hello->kshm_version = conn->ksnc_proto->pro_version;
			
 
				-
			
 
				-	if (the_lnet.ln_testprotocompat) {
			
 
				-		/* single-shot proto check */
			
 
				-		LNET_LOCK();
			
 
				-		if (the_lnet.ln_testprotocompat & 1) {
			
 
				-			hello->kshm_version++;   /* just different! */
			
 
				-			the_lnet.ln_testprotocompat &= ~1;
			
 
				-		}
			
 
				-		LNET_UNLOCK();
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips),
			
 
				-			     lnet_acceptor_timeout());
			
 
				-	if (rc) {
			
 
				-		CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
			
 
				-			rc, &conn->ksnc_ipaddr, conn->ksnc_port);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!hello->kshm_nips)
			
 
				-		return 0;
			
 
				-
			
 
				-	rc = lnet_sock_write(sock, hello->kshm_ips,
			
 
				-			     hello->kshm_nips * sizeof(__u32),
			
 
				-			     lnet_acceptor_timeout());
			
 
				-	if (rc) {
			
 
				-		CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
			
 
				-			rc, hello->kshm_nips,
			
 
				-			&conn->ksnc_ipaddr, conn->ksnc_port);
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello,
			
 
				-		       int timeout)
			
 
				-{
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	struct lnet_hdr *hdr;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
			
 
				-	if (!hdr) {
			
 
				-		CERROR("Can't allocate struct lnet_hdr\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, &hdr->src_nid,
			
 
				-			    sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid),
			
 
				-			    timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0 && rc != -EALREADY);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/* ...and check we got what we expected */
			
 
				-	if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) {
			
 
				-		CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
			
 
				-		       le32_to_cpu(hdr->type),
			
 
				-		       &conn->ksnc_ipaddr);
			
 
				-		rc = -EPROTO;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	hello->kshm_src_nid         = le64_to_cpu(hdr->src_nid);
			
 
				-	hello->kshm_src_pid         = le32_to_cpu(hdr->src_pid);
			
 
				-	hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
			
 
				-	hello->kshm_ctype           = le32_to_cpu(hdr->msg.hello.type);
			
 
				-	hello->kshm_nips            = le32_to_cpu(hdr->payload_length) /
			
 
				-						  sizeof(__u32);
			
 
				-
			
 
				-	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
			
 
				-		CERROR("Bad nips %d from ip %pI4h\n",
			
 
				-		       hello->kshm_nips, &conn->ksnc_ipaddr);
			
 
				-		rc = -EPROTO;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (!hello->kshm_nips)
			
 
				-		goto out;
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, hello->kshm_ips,
			
 
				-			    hello->kshm_nips * sizeof(__u32), timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading IPs from ip %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0 && rc != -EALREADY);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < (int)hello->kshm_nips; i++) {
			
 
				-		hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
			
 
				-
			
 
				-		if (!hello->kshm_ips[i]) {
			
 
				-			CERROR("Zero IP[%d] from ip %pI4h\n",
			
 
				-			       i, &conn->ksnc_ipaddr);
			
 
				-			rc = -EPROTO;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-out:
			
 
				-	kfree(hdr);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello,
			
 
				-		       int timeout)
			
 
				-{
			
 
				-	struct socket *sock = conn->ksnc_sock;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	if (hello->kshm_magic == LNET_PROTO_MAGIC)
			
 
				-		conn->ksnc_flip = 0;
			
 
				-	else
			
 
				-		conn->ksnc_flip = 1;
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, &hello->kshm_src_nid,
			
 
				-			    offsetof(struct ksock_hello_msg, kshm_ips) -
			
 
				-				     offsetof(struct ksock_hello_msg, kshm_src_nid),
			
 
				-			    timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading HELLO from %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0 && rc != -EALREADY);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (conn->ksnc_flip) {
			
 
				-		__swab32s(&hello->kshm_src_pid);
			
 
				-		__swab64s(&hello->kshm_src_nid);
			
 
				-		__swab32s(&hello->kshm_dst_pid);
			
 
				-		__swab64s(&hello->kshm_dst_nid);
			
 
				-		__swab64s(&hello->kshm_src_incarnation);
			
 
				-		__swab64s(&hello->kshm_dst_incarnation);
			
 
				-		__swab32s(&hello->kshm_ctype);
			
 
				-		__swab32s(&hello->kshm_nips);
			
 
				-	}
			
 
				-
			
 
				-	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
			
 
				-		CERROR("Bad nips %d from ip %pI4h\n",
			
 
				-		       hello->kshm_nips, &conn->ksnc_ipaddr);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (!hello->kshm_nips)
			
 
				-		return 0;
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, hello->kshm_ips,
			
 
				-			    hello->kshm_nips * sizeof(__u32), timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading IPs from ip %pI4h\n",
			
 
				-		       rc, &conn->ksnc_ipaddr);
			
 
				-		LASSERT(rc < 0 && rc != -EALREADY);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < (int)hello->kshm_nips; i++) {
			
 
				-		if (conn->ksnc_flip)
			
 
				-			__swab32s(&hello->kshm_ips[i]);
			
 
				-
			
 
				-		if (!hello->kshm_ips[i]) {
			
 
				-			CERROR("Zero IP[%d] from ip %pI4h\n",
			
 
				-			       i, &conn->ksnc_ipaddr);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_pack_msg_v1(struct ksock_tx *tx)
			
 
				-{
			
 
				-	/* V1.x has no KSOCK_MSG_NOOP */
			
 
				-	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				-	LASSERT(tx->tx_lnetmsg);
			
 
				-
			
 
				-	tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
			
 
				-	tx->tx_iov[0].iov_len  = sizeof(struct lnet_hdr);
			
 
				-
			
 
				-	tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
			
 
				-	tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_pack_msg_v2(struct ksock_tx *tx)
			
 
				-{
			
 
				-	tx->tx_iov[0].iov_base = &tx->tx_msg;
			
 
				-
			
 
				-	if (tx->tx_lnetmsg) {
			
 
				-		LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				-
			
 
				-		tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
			
 
				-		tx->tx_iov[0].iov_len = sizeof(struct ksock_msg);
			
 
				-		tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
			
 
				-		tx->tx_resid = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
			
 
				-	} else {
			
 
				-		LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				-
			
 
				-		tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
			
 
				-		tx->tx_nob = offsetof(struct ksock_msg,  ksm_u.lnetmsg.ksnm_hdr);
			
 
				-		tx->tx_resid = offsetof(struct ksock_msg,  ksm_u.lnetmsg.ksnm_hdr);
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Don't checksum before start sending, because packet can be
			
 
				-	 * piggybacked with ACK
			
 
				-	 */
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_unpack_msg_v1(struct ksock_msg *msg)
			
 
				-{
			
 
				-	msg->ksm_csum = 0;
			
 
				-	msg->ksm_type = KSOCK_MSG_LNET;
			
 
				-	msg->ksm_zc_cookies[0] = 0;
			
 
				-	msg->ksm_zc_cookies[1] = 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ksocknal_unpack_msg_v2(struct ksock_msg *msg)
			
 
				-{
			
 
				-	return;  /* Do nothing */
			
 
				-}
			
 
				-
			
 
				-struct ksock_proto ksocknal_protocol_v1x = {
			
 
				-	.pro_version        = KSOCK_PROTO_V1,
			
 
				-	.pro_send_hello     = ksocknal_send_hello_v1,
			
 
				-	.pro_recv_hello     = ksocknal_recv_hello_v1,
			
 
				-	.pro_pack           = ksocknal_pack_msg_v1,
			
 
				-	.pro_unpack         = ksocknal_unpack_msg_v1,
			
 
				-	.pro_queue_tx_msg   = ksocknal_queue_tx_msg_v1,
			
 
				-	.pro_handle_zcreq   = NULL,
			
 
				-	.pro_handle_zcack   = NULL,
			
 
				-	.pro_queue_tx_zcack = NULL,
			
 
				-	.pro_match_tx       = ksocknal_match_tx
			
 
				-};
			
 
				-
			
 
				-struct ksock_proto ksocknal_protocol_v2x = {
			
 
				-	.pro_version        = KSOCK_PROTO_V2,
			
 
				-	.pro_send_hello     = ksocknal_send_hello_v2,
			
 
				-	.pro_recv_hello     = ksocknal_recv_hello_v2,
			
 
				-	.pro_pack           = ksocknal_pack_msg_v2,
			
 
				-	.pro_unpack         = ksocknal_unpack_msg_v2,
			
 
				-	.pro_queue_tx_msg   = ksocknal_queue_tx_msg_v2,
			
 
				-	.pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
			
 
				-	.pro_handle_zcreq   = ksocknal_handle_zcreq,
			
 
				-	.pro_handle_zcack   = ksocknal_handle_zcack,
			
 
				-	.pro_match_tx       = ksocknal_match_tx
			
 
				-};
			
 
				-
			
 
				-struct ksock_proto ksocknal_protocol_v3x = {
			
 
				-	.pro_version        = KSOCK_PROTO_V3,
			
 
				-	.pro_send_hello     = ksocknal_send_hello_v2,
			
 
				-	.pro_recv_hello     = ksocknal_recv_hello_v2,
			
 
				-	.pro_pack           = ksocknal_pack_msg_v2,
			
 
				-	.pro_unpack         = ksocknal_unpack_msg_v2,
			
 
				-	.pro_queue_tx_msg   = ksocknal_queue_tx_msg_v2,
			
 
				-	.pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
			
 
				-	.pro_handle_zcreq   = ksocknal_handle_zcreq,
			
 
				-	.pro_handle_zcack   = ksocknal_handle_zcack,
			
 
				-	.pro_match_tx       = ksocknal_match_tx_v3
			
 
				-};
			
--- a/drivers/staging/lustre/lnet/libcfs/Makefile
+++ b/drivers/staging/lustre/lnet/libcfs/Makefile
@@ -1,16 +0,0 @@
 
				-# SPDX-License-Identifier: GPL-2.0
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
			
 
				-
			
 
				-obj-$(CONFIG_LNET) += libcfs.o
			
 
				-
			
 
				-libcfs-obj-y += linux-tracefile.o linux-debug.o
			
 
				-libcfs-obj-y += linux-crypto.o
			
 
				-libcfs-obj-y += linux-crypto-adler.o
			
 
				-
			
 
				-libcfs-obj-y += debug.o fail.o module.o tracefile.o
			
 
				-libcfs-obj-y += libcfs_string.o hash.o
			
 
				-libcfs-obj-$(CONFIG_SMP) += libcfs_cpu.o
			
 
				-libcfs-obj-y += libcfs_mem.o libcfs_lock.o
			
 
				-
			
 
				-libcfs-objs := $(libcfs-obj-y)
			
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -1,461 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/libcfs/debug.c
			
 
				- *
			
 
				- * Author: Phil Schwan <phil@clusterfs.com>
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-# define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/ctype.h>
			
 
				-#include <linux/libcfs/libcfs_string.h>
			
 
				-#include <linux/kthread.h>
			
 
				-#include "tracefile.h"
			
 
				-
			
 
				-static char debug_file_name[1024];
			
 
				-
			
 
				-unsigned int libcfs_subsystem_debug = ~0;
			
 
				-EXPORT_SYMBOL(libcfs_subsystem_debug);
			
 
				-module_param(libcfs_subsystem_debug, int, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
			
 
				-
			
 
				-unsigned int libcfs_debug = (D_CANTMASK |
			
 
				-			     D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
			
 
				-EXPORT_SYMBOL(libcfs_debug);
			
 
				-module_param(libcfs_debug, int, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
			
 
				-
			
 
				-static int libcfs_param_debug_mb_set(const char *val,
			
 
				-				     const struct kernel_param *kp)
			
 
				-{
			
 
				-	int rc;
			
 
				-	unsigned int num;
			
 
				-
			
 
				-	rc = kstrtouint(val, 0, &num);
			
 
				-	if (rc < 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (!*((unsigned int *)kp->arg)) {
			
 
				-		*((unsigned int *)kp->arg) = num;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_trace_set_debug_mb(num);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/* While debug_mb setting look like unsigned int, in fact
			
 
				- * it needs quite a bunch of extra processing, so we define special
			
 
				- * debugmb parameter type with corresponding methods to handle this case
			
 
				- */
			
 
				-static const struct kernel_param_ops param_ops_debugmb = {
			
 
				-	.set = libcfs_param_debug_mb_set,
			
 
				-	.get = param_get_uint,
			
 
				-};
			
 
				-
			
 
				-#define param_check_debugmb(name, p) \
			
 
				-		__param_check(name, p, unsigned int)
			
 
				-
			
 
				-static unsigned int libcfs_debug_mb;
			
 
				-module_param(libcfs_debug_mb, debugmb, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
			
 
				-
			
 
				-unsigned int libcfs_printk = D_CANTMASK;
			
 
				-module_param(libcfs_printk, uint, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
			
 
				-
			
 
				-unsigned int libcfs_console_ratelimit = 1;
			
 
				-module_param(libcfs_console_ratelimit, uint, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
			
 
				-
			
 
				-static int param_set_delay_minmax(const char *val,
			
 
				-				  const struct kernel_param *kp,
			
 
				-				  long min, long max)
			
 
				-{
			
 
				-	long d;
			
 
				-	int sec;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = kstrtoint(val, 0, &sec);
			
 
				-	if (rc)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	d = sec * HZ / 100;
			
 
				-	if (d < min || d > max)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	*((unsigned int *)kp->arg) = d;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int param_get_delay(char *buffer, const struct kernel_param *kp)
			
 
				-{
			
 
				-	unsigned int d = *(unsigned int *)kp->arg;
			
 
				-
			
 
				-	return sprintf(buffer, "%u", (unsigned int)(d * 100) / HZ);
			
 
				-}
			
 
				-
			
 
				-unsigned int libcfs_console_max_delay;
			
 
				-unsigned int libcfs_console_min_delay;
			
 
				-
			
 
				-static int param_set_console_max_delay(const char *val,
			
 
				-				       const struct kernel_param *kp)
			
 
				-{
			
 
				-	return param_set_delay_minmax(val, kp,
			
 
				-				      libcfs_console_min_delay, INT_MAX);
			
 
				-}
			
 
				-
			
 
				-static const struct kernel_param_ops param_ops_console_max_delay = {
			
 
				-	.set = param_set_console_max_delay,
			
 
				-	.get = param_get_delay,
			
 
				-};
			
 
				-
			
 
				-#define param_check_console_max_delay(name, p) \
			
 
				-		__param_check(name, p, unsigned int)
			
 
				-
			
 
				-module_param(libcfs_console_max_delay, console_max_delay, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
			
 
				-
			
 
				-static int param_set_console_min_delay(const char *val,
			
 
				-				       const struct kernel_param *kp)
			
 
				-{
			
 
				-	return param_set_delay_minmax(val, kp,
			
 
				-				      1, libcfs_console_max_delay);
			
 
				-}
			
 
				-
			
 
				-static const struct kernel_param_ops param_ops_console_min_delay = {
			
 
				-	.set = param_set_console_min_delay,
			
 
				-	.get = param_get_delay,
			
 
				-};
			
 
				-
			
 
				-#define param_check_console_min_delay(name, p) \
			
 
				-		__param_check(name, p, unsigned int)
			
 
				-
			
 
				-module_param(libcfs_console_min_delay, console_min_delay, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
			
 
				-
			
 
				-static int param_set_uint_minmax(const char *val,
			
 
				-				 const struct kernel_param *kp,
			
 
				-				 unsigned int min, unsigned int max)
			
 
				-{
			
 
				-	unsigned int num;
			
 
				-	int ret;
			
 
				-
			
 
				-	if (!val)
			
 
				-		return -EINVAL;
			
 
				-	ret = kstrtouint(val, 0, &num);
			
 
				-	if (ret < 0 || num < min || num > max)
			
 
				-		return -EINVAL;
			
 
				-	*((unsigned int *)kp->arg) = num;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int param_set_uintpos(const char *val, const struct kernel_param *kp)
			
 
				-{
			
 
				-	return param_set_uint_minmax(val, kp, 1, -1);
			
 
				-}
			
 
				-
			
 
				-static const struct kernel_param_ops param_ops_uintpos = {
			
 
				-	.set = param_set_uintpos,
			
 
				-	.get = param_get_uint,
			
 
				-};
			
 
				-
			
 
				-#define param_check_uintpos(name, p) \
			
 
				-		__param_check(name, p, unsigned int)
			
 
				-
			
 
				-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
			
 
				-module_param(libcfs_console_backoff, uintpos, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
			
 
				-
			
 
				-unsigned int libcfs_debug_binary = 1;
			
 
				-
			
 
				-unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
			
 
				-EXPORT_SYMBOL(libcfs_stack);
			
 
				-
			
 
				-unsigned int libcfs_catastrophe;
			
 
				-EXPORT_SYMBOL(libcfs_catastrophe);
			
 
				-
			
 
				-unsigned int libcfs_panic_on_lbug = 1;
			
 
				-module_param(libcfs_panic_on_lbug, uint, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
			
 
				-
			
 
				-static wait_queue_head_t debug_ctlwq;
			
 
				-
			
 
				-char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
			
 
				-
			
 
				-/* We need to pass a pointer here, but elsewhere this must be a const */
			
 
				-static char *libcfs_debug_file_path;
			
 
				-module_param(libcfs_debug_file_path, charp, 0644);
			
 
				-MODULE_PARM_DESC(libcfs_debug_file_path,
			
 
				-		 "Path for dumping debug logs, set 'NONE' to prevent log dumping");
			
 
				-
			
 
				-int libcfs_panic_in_progress;
			
 
				-
			
 
				-/* libcfs_debug_token2mask() expects the returned string in lower-case */
			
 
				-static const char *
			
 
				-libcfs_debug_subsys2str(int subsys)
			
 
				-{
			
 
				-	static const char * const libcfs_debug_subsystems[] =
			
 
				-		LIBCFS_DEBUG_SUBSYS_NAMES;
			
 
				-
			
 
				-	if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
			
 
				-		return NULL;
			
 
				-
			
 
				-	return libcfs_debug_subsystems[subsys];
			
 
				-}
			
 
				-
			
 
				-/* libcfs_debug_token2mask() expects the returned string in lower-case */
			
 
				-static const char *
			
 
				-libcfs_debug_dbg2str(int debug)
			
 
				-{
			
 
				-	static const char * const libcfs_debug_masks[] =
			
 
				-		LIBCFS_DEBUG_MASKS_NAMES;
			
 
				-
			
 
				-	if (debug >= ARRAY_SIZE(libcfs_debug_masks))
			
 
				-		return NULL;
			
 
				-
			
 
				-	return libcfs_debug_masks[debug];
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
			
 
				-{
			
 
				-	const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
			
 
				-						 libcfs_debug_dbg2str;
			
 
				-	int len = 0;
			
 
				-	const char *token;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!mask) {			/* "0" */
			
 
				-		if (size > 0)
			
 
				-			str[0] = '0';
			
 
				-		len = 1;
			
 
				-	} else {				/* space-separated tokens */
			
 
				-		for (i = 0; i < 32; i++) {
			
 
				-			if (!(mask & (1 << i)))
			
 
				-				continue;
			
 
				-
			
 
				-			token = fn(i);
			
 
				-			if (!token)	      /* unused bit */
			
 
				-				continue;
			
 
				-
			
 
				-			if (len > 0) {		  /* separator? */
			
 
				-				if (len < size)
			
 
				-					str[len] = ' ';
			
 
				-				len++;
			
 
				-			}
			
 
				-
			
 
				-			while (*token) {
			
 
				-				if (len < size)
			
 
				-					str[len] = *token;
			
 
				-				token++;
			
 
				-				len++;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* terminate 'str' */
			
 
				-	if (len < size)
			
 
				-		str[len] = 0;
			
 
				-	else
			
 
				-		str[size - 1] = 0;
			
 
				-
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
			
 
				-{
			
 
				-	const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
			
 
				-						 libcfs_debug_dbg2str;
			
 
				-	int m = 0;
			
 
				-	int matched;
			
 
				-	int n;
			
 
				-	int t;
			
 
				-
			
 
				-	/* Allow a number for backwards compatibility */
			
 
				-
			
 
				-	for (n = strlen(str); n > 0; n--)
			
 
				-		if (!isspace(str[n - 1]))
			
 
				-			break;
			
 
				-	matched = n;
			
 
				-	t = sscanf(str, "%i%n", &m, &matched);
			
 
				-	if (t >= 1 && matched == n) {
			
 
				-		/* don't print warning for lctl set_param debug=0 or -1 */
			
 
				-		if (m && m != -1)
			
 
				-			CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
			
 
				-		*mask = m;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
			
 
				-			    0xffffffff);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
			
 
				- */
			
 
				-void libcfs_debug_dumplog_internal(void *arg)
			
 
				-{
			
 
				-	static time64_t last_dump_time;
			
 
				-	time64_t current_time;
			
 
				-	void *journal_info;
			
 
				-
			
 
				-	journal_info = current->journal_info;
			
 
				-	current->journal_info = NULL;
			
 
				-	current_time = ktime_get_real_seconds();
			
 
				-
			
 
				-	if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
			
 
				-	    current_time > last_dump_time) {
			
 
				-		last_dump_time = current_time;
			
 
				-		snprintf(debug_file_name, sizeof(debug_file_name) - 1,
			
 
				-			 "%s.%lld.%ld", libcfs_debug_file_path_arr,
			
 
				-			 (s64)current_time, (long)arg);
			
 
				-		pr_alert("LustreError: dumping log to %s\n", debug_file_name);
			
 
				-		cfs_tracefile_dump_all_pages(debug_file_name);
			
 
				-		libcfs_run_debug_log_upcall(debug_file_name);
			
 
				-	}
			
 
				-
			
 
				-	current->journal_info = journal_info;
			
 
				-}
			
 
				-
			
 
				-static int libcfs_debug_dumplog_thread(void *arg)
			
 
				-{
			
 
				-	libcfs_debug_dumplog_internal(arg);
			
 
				-	wake_up(&debug_ctlwq);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void libcfs_debug_dumplog(void)
			
 
				-{
			
 
				-	wait_queue_entry_t wait;
			
 
				-	struct task_struct *dumper;
			
 
				-
			
 
				-	/* we're being careful to ensure that the kernel thread is
			
 
				-	 * able to set our state to running as it exits before we
			
 
				-	 * get to schedule()
			
 
				-	 */
			
 
				-	init_waitqueue_entry(&wait, current);
			
 
				-	add_wait_queue(&debug_ctlwq, &wait);
			
 
				-
			
 
				-	dumper = kthread_run(libcfs_debug_dumplog_thread,
			
 
				-			     (void *)(long)current->pid,
			
 
				-			     "libcfs_debug_dumper");
			
 
				-	set_current_state(TASK_INTERRUPTIBLE);
			
 
				-	if (IS_ERR(dumper))
			
 
				-		pr_err("LustreError: cannot start log dump thread: %ld\n",
			
 
				-		       PTR_ERR(dumper));
			
 
				-	else
			
 
				-		schedule();
			
 
				-
			
 
				-	/* be sure to teardown if cfs_create_thread() failed */
			
 
				-	remove_wait_queue(&debug_ctlwq, &wait);
			
 
				-	set_current_state(TASK_RUNNING);
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_debug_dumplog);
			
 
				-
			
 
				-int libcfs_debug_init(unsigned long bufsize)
			
 
				-{
			
 
				-	unsigned int max = libcfs_debug_mb;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	init_waitqueue_head(&debug_ctlwq);
			
 
				-
			
 
				-	if (libcfs_console_max_delay <= 0 || /* not set by user or */
			
 
				-	    libcfs_console_min_delay <= 0 || /* set to invalid values */
			
 
				-	    libcfs_console_min_delay >= libcfs_console_max_delay) {
			
 
				-		libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
			
 
				-		libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
			
 
				-	}
			
 
				-
			
 
				-	if (libcfs_debug_file_path) {
			
 
				-		strlcpy(libcfs_debug_file_path_arr,
			
 
				-			libcfs_debug_file_path,
			
 
				-			sizeof(libcfs_debug_file_path_arr));
			
 
				-	}
			
 
				-
			
 
				-	/* If libcfs_debug_mb is set to an invalid value or uninitialized
			
 
				-	 * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
			
 
				-	 */
			
 
				-	if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
			
 
				-		max = TCD_MAX_PAGES;
			
 
				-	} else {
			
 
				-		max = max / num_possible_cpus();
			
 
				-		max <<= (20 - PAGE_SHIFT);
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_tracefile_init(max);
			
 
				-	if (!rc) {
			
 
				-		libcfs_register_panic_notifier();
			
 
				-		libcfs_debug_mb = cfs_trace_get_debug_mb();
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int libcfs_debug_cleanup(void)
			
 
				-{
			
 
				-	libcfs_unregister_panic_notifier();
			
 
				-	cfs_tracefile_exit();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int libcfs_debug_clear_buffer(void)
			
 
				-{
			
 
				-	cfs_trace_flush_pages();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/* Debug markers, although printed by S_LNET should not be marked as such. */
			
 
				-#undef DEBUG_SUBSYSTEM
			
 
				-#define DEBUG_SUBSYSTEM S_UNDEFINED
			
 
				-int libcfs_debug_mark_buffer(const char *text)
			
 
				-{
			
 
				-	CDEBUG(D_TRACE,
			
 
				-	       "***************************************************\n");
			
 
				-	LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
			
 
				-	CDEBUG(D_TRACE,
			
 
				-	       "***************************************************\n");
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-#undef DEBUG_SUBSYSTEM
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -1,146 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Oracle Corporation, Inc.
			
 
				- */
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-#include <linux/random.h>
			
 
				-
			
 
				-unsigned long cfs_fail_loc;
			
 
				-EXPORT_SYMBOL(cfs_fail_loc);
			
 
				-
			
 
				-unsigned int cfs_fail_val;
			
 
				-EXPORT_SYMBOL(cfs_fail_val);
			
 
				-
			
 
				-int cfs_fail_err;
			
 
				-EXPORT_SYMBOL(cfs_fail_err);
			
 
				-
			
 
				-DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
			
 
				-EXPORT_SYMBOL(cfs_race_waitq);
			
 
				-
			
 
				-int cfs_race_state;
			
 
				-EXPORT_SYMBOL(cfs_race_state);
			
 
				-
			
 
				-int __cfs_fail_check_set(u32 id, u32 value, int set)
			
 
				-{
			
 
				-	static atomic_t cfs_fail_count = ATOMIC_INIT(0);
			
 
				-
			
 
				-	LASSERT(!(id & CFS_FAIL_ONCE));
			
 
				-
			
 
				-	if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
			
 
				-	    (CFS_FAILED | CFS_FAIL_ONCE)) {
			
 
				-		atomic_set(&cfs_fail_count, 0); /* paranoia */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* Fail 1/cfs_fail_val times */
			
 
				-	if (cfs_fail_loc & CFS_FAIL_RAND) {
			
 
				-		if (cfs_fail_val < 2 || prandom_u32_max(cfs_fail_val) > 0)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* Skip the first cfs_fail_val, then fail */
			
 
				-	if (cfs_fail_loc & CFS_FAIL_SKIP) {
			
 
				-		if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* check cfs_fail_val... */
			
 
				-	if (set == CFS_FAIL_LOC_VALUE) {
			
 
				-		if (cfs_fail_val != -1 && cfs_fail_val != value)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* Fail cfs_fail_val times, overridden by FAIL_ONCE */
			
 
				-	if (cfs_fail_loc & CFS_FAIL_SOME &&
			
 
				-	    (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
			
 
				-		int count = atomic_inc_return(&cfs_fail_count);
			
 
				-
			
 
				-		if (count >= cfs_fail_val) {
			
 
				-			set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
			
 
				-			atomic_set(&cfs_fail_count, 0);
			
 
				-			/* we are lost race to increase  */
			
 
				-			if (count > cfs_fail_val)
			
 
				-				return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* Take into account the current call for FAIL_ONCE for ORSET only,
			
 
				-	 * as RESET is a new fail_loc, it does not change the current call
			
 
				-	 */
			
 
				-	if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
			
 
				-		set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
			
 
				-	/* Lost race to set CFS_FAILED_BIT. */
			
 
				-	if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
			
 
				-		/* If CFS_FAIL_ONCE is valid, only one process can fail,
			
 
				-		 * otherwise multi-process can fail at the same time.
			
 
				-		 */
			
 
				-		if (cfs_fail_loc & CFS_FAIL_ONCE)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	switch (set) {
			
 
				-	case CFS_FAIL_LOC_NOSET:
			
 
				-	case CFS_FAIL_LOC_VALUE:
			
 
				-		break;
			
 
				-	case CFS_FAIL_LOC_ORSET:
			
 
				-		cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
			
 
				-		break;
			
 
				-	case CFS_FAIL_LOC_RESET:
			
 
				-		cfs_fail_loc = value;
			
 
				-		atomic_set(&cfs_fail_count, 0);
			
 
				-		break;
			
 
				-	default:
			
 
				-		LASSERTF(0, "called with bad set %u\n", set);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-EXPORT_SYMBOL(__cfs_fail_check_set);
			
 
				-
			
 
				-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = __cfs_fail_check_set(id, value, set);
			
 
				-	if (ret && likely(ms > 0)) {
			
 
				-		CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
			
 
				-		       id, ms);
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(ms * HZ / 1000);
			
 
				-		CERROR("cfs_fail_timeout id %x awake\n", id);
			
 
				-	}
			
 
				-	return ret;
			
 
				-}
			
 
				-EXPORT_SYMBOL(__cfs_fail_timeout_set);
			
--- a/drivers/staging/lustre/lnet/libcfs/hash.c
+++ b/drivers/staging/lustre/lnet/libcfs/hash.c
@@ -1,2065 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/libcfs/hash.c
			
 
				- *
			
 
				- * Implement a hash class for hash process in lustre system.
			
 
				- *
			
 
				- * Author: YuZhangyong <yzy@clusterfs.com>
			
 
				- *
			
 
				- * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
			
 
				- * - Simplified API and improved documentation
			
 
				- * - Added per-hash feature flags:
			
 
				- *   * CFS_HASH_DEBUG additional validation
			
 
				- *   * CFS_HASH_REHASH dynamic rehashing
			
 
				- * - Added per-hash statistics
			
 
				- * - General performance enhancements
			
 
				- *
			
 
				- * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
			
 
				- * - move all stuff to libcfs
			
 
				- * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
			
 
				- * - ignore hs_rwlock if without CFS_HASH_REHASH setting
			
 
				- * - buckets are allocated one by one(instead of contiguous memory),
			
 
				- *   to avoid unnecessary cacheline conflict
			
 
				- *
			
 
				- * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
			
 
				- * - "bucket" is a group of hlist_head now, user can specify bucket size
			
 
				- *   by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
			
 
				- *   one lock for reducing memory overhead.
			
 
				- *
			
 
				- * - support lockless hash, caller will take care of locks:
			
 
				- *   avoid lock overhead for hash tables that are already protected
			
 
				- *   by locking in the caller for another reason
			
 
				- *
			
 
				- * - support both spin_lock/rwlock for bucket:
			
 
				- *   overhead of spinlock contention is lower than read/write
			
 
				- *   contention of rwlock, so using spinlock to serialize operations on
			
 
				- *   bucket is more reasonable for those frequently changed hash tables
			
 
				- *
			
 
				- * - support one-single lock mode:
			
 
				- *   one lock to protect all hash operations to avoid overhead of
			
 
				- *   multiple locks if hash table is always small
			
 
				- *
			
 
				- * - removed a lot of unnecessary addref & decref on hash element:
			
 
				- *   addref & decref are atomic operations in many use-cases which
			
 
				- *   are expensive.
			
 
				- *
			
 
				- * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
			
 
				- *   some lustre use-cases require these functions to be strictly
			
 
				- *   non-blocking, we need to schedule required rehash on a different
			
 
				- *   thread on those cases.
			
 
				- *
			
 
				- * - safer rehash on large hash table
			
 
				- *   In old implementation, rehash function will exclusively lock the
			
 
				- *   hash table and finish rehash in one batch, it's dangerous on SMP
			
 
				- *   system because rehash millions of elements could take long time.
			
 
				- *   New implemented rehash can release lock and relax CPU in middle
			
 
				- *   of rehash, it's safe for another thread to search/change on the
			
 
				- *   hash table even it's in rehasing.
			
 
				- *
			
 
				- * - support two different refcount modes
			
 
				- *   . hash table has refcount on element
			
 
				- *   . hash table doesn't change refcount on adding/removing element
			
 
				- *
			
 
				- * - support long name hash table (for param-tree)
			
 
				- *
			
 
				- * - fix a bug for cfs_hash_rehash_key:
			
 
				- *   in old implementation, cfs_hash_rehash_key could screw up the
			
 
				- *   hash-table because @key is overwritten without any protection.
			
 
				- *   Now we need user to define hs_keycpy for those rehash enabled
			
 
				- *   hash tables, cfs_hash_rehash_key will overwrite hash-key
			
 
				- *   inside lock by calling hs_keycpy.
			
 
				- *
			
 
				- * - better hash iteration:
			
 
				- *   Now we support both locked iteration & lockless iteration of hash
			
 
				- *   table. Also, user can break the iteration by return 1 in callback.
			
 
				- */
			
 
				-#include <linux/seq_file.h>
			
 
				-#include <linux/log2.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/libcfs/libcfs_hash.h>
			
 
				-
			
 
				-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
			
 
				-static unsigned int warn_on_depth = 8;
			
 
				-module_param(warn_on_depth, uint, 0644);
			
 
				-MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
			
 
				-#endif
			
 
				-
			
 
				-struct workqueue_struct *cfs_rehash_wq;
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
			
 
				-	__acquires(&lock->spin)
			
 
				-{
			
 
				-	spin_lock(&lock->spin);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
			
 
				-	__releases(&lock->spin)
			
 
				-{
			
 
				-	spin_unlock(&lock->spin);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
			
 
				-	__acquires(&lock->rw)
			
 
				-{
			
 
				-	if (!exclusive)
			
 
				-		read_lock(&lock->rw);
			
 
				-	else
			
 
				-		write_lock(&lock->rw);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
			
 
				-	__releases(&lock->rw)
			
 
				-{
			
 
				-	if (!exclusive)
			
 
				-		read_unlock(&lock->rw);
			
 
				-	else
			
 
				-		write_unlock(&lock->rw);
			
 
				-}
			
 
				-
			
 
				-/** No lock hash */
			
 
				-static struct cfs_hash_lock_ops cfs_hash_nl_lops = {
			
 
				-	.hs_lock	= cfs_hash_nl_lock,
			
 
				-	.hs_unlock	= cfs_hash_nl_unlock,
			
 
				-	.hs_bkt_lock	= cfs_hash_nl_lock,
			
 
				-	.hs_bkt_unlock	= cfs_hash_nl_unlock,
			
 
				-};
			
 
				-
			
 
				-/** no bucket lock, one spinlock to protect everything */
			
 
				-static struct cfs_hash_lock_ops cfs_hash_nbl_lops = {
			
 
				-	.hs_lock	= cfs_hash_spin_lock,
			
 
				-	.hs_unlock	= cfs_hash_spin_unlock,
			
 
				-	.hs_bkt_lock	= cfs_hash_nl_lock,
			
 
				-	.hs_bkt_unlock	= cfs_hash_nl_unlock,
			
 
				-};
			
 
				-
			
 
				-/** spin bucket lock, rehash is enabled */
			
 
				-static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = {
			
 
				-	.hs_lock	= cfs_hash_rw_lock,
			
 
				-	.hs_unlock	= cfs_hash_rw_unlock,
			
 
				-	.hs_bkt_lock	= cfs_hash_spin_lock,
			
 
				-	.hs_bkt_unlock	= cfs_hash_spin_unlock,
			
 
				-};
			
 
				-
			
 
				-/** rw bucket lock, rehash is enabled */
			
 
				-static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = {
			
 
				-	.hs_lock	= cfs_hash_rw_lock,
			
 
				-	.hs_unlock	= cfs_hash_rw_unlock,
			
 
				-	.hs_bkt_lock	= cfs_hash_rw_lock,
			
 
				-	.hs_bkt_unlock	= cfs_hash_rw_unlock,
			
 
				-};
			
 
				-
			
 
				-/** spin bucket lock, rehash is disabled */
			
 
				-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = {
			
 
				-	.hs_lock	= cfs_hash_nl_lock,
			
 
				-	.hs_unlock	= cfs_hash_nl_unlock,
			
 
				-	.hs_bkt_lock	= cfs_hash_spin_lock,
			
 
				-	.hs_bkt_unlock	= cfs_hash_spin_unlock,
			
 
				-};
			
 
				-
			
 
				-/** rw bucket lock, rehash is disabled */
			
 
				-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = {
			
 
				-	.hs_lock	= cfs_hash_nl_lock,
			
 
				-	.hs_unlock	= cfs_hash_nl_unlock,
			
 
				-	.hs_bkt_lock	= cfs_hash_rw_lock,
			
 
				-	.hs_bkt_unlock	= cfs_hash_rw_unlock,
			
 
				-};
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_lock_setup(struct cfs_hash *hs)
			
 
				-{
			
 
				-	if (cfs_hash_with_no_lock(hs)) {
			
 
				-		hs->hs_lops = &cfs_hash_nl_lops;
			
 
				-
			
 
				-	} else if (cfs_hash_with_no_bktlock(hs)) {
			
 
				-		hs->hs_lops = &cfs_hash_nbl_lops;
			
 
				-		spin_lock_init(&hs->hs_lock.spin);
			
 
				-
			
 
				-	} else if (cfs_hash_with_rehash(hs)) {
			
 
				-		rwlock_init(&hs->hs_lock.rw);
			
 
				-
			
 
				-		if (cfs_hash_with_rw_bktlock(hs))
			
 
				-			hs->hs_lops = &cfs_hash_bkt_rw_lops;
			
 
				-		else if (cfs_hash_with_spin_bktlock(hs))
			
 
				-			hs->hs_lops = &cfs_hash_bkt_spin_lops;
			
 
				-		else
			
 
				-			LBUG();
			
 
				-	} else {
			
 
				-		if (cfs_hash_with_rw_bktlock(hs))
			
 
				-			hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
			
 
				-		else if (cfs_hash_with_spin_bktlock(hs))
			
 
				-			hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
			
 
				-		else
			
 
				-			LBUG();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Simple hash head without depth tracking
			
 
				- * new element is always added to head of hlist
			
 
				- */
			
 
				-struct cfs_hash_head {
			
 
				-	struct hlist_head	hh_head;	/**< entries list */
			
 
				-};
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_hh_hhead_size(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return sizeof(struct cfs_hash_head);
			
 
				-}
			
 
				-
			
 
				-static struct hlist_head *
			
 
				-cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	struct cfs_hash_head *head;
			
 
				-
			
 
				-	head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0];
			
 
				-	return &head[bd->bd_offset].hh_head;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
			
 
				-	return -1; /* unknown depth */
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	hlist_del_init(hnode);
			
 
				-	return -1; /* unknown depth */
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Simple hash head with depth tracking
			
 
				- * new element is always added to head of hlist
			
 
				- */
			
 
				-struct cfs_hash_head_dep {
			
 
				-	struct hlist_head	hd_head;	/**< entries list */
			
 
				-	unsigned int		hd_depth;	/**< list length */
			
 
				-};
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_hd_hhead_size(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return sizeof(struct cfs_hash_head_dep);
			
 
				-}
			
 
				-
			
 
				-static struct hlist_head *
			
 
				-cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	struct cfs_hash_head_dep *head;
			
 
				-
			
 
				-	head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
			
 
				-	return &head[bd->bd_offset].hd_head;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_head_dep *hh;
			
 
				-
			
 
				-	hh = container_of(cfs_hash_hd_hhead(hs, bd),
			
 
				-			  struct cfs_hash_head_dep, hd_head);
			
 
				-	hlist_add_head(hnode, &hh->hd_head);
			
 
				-	return ++hh->hd_depth;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_head_dep *hh;
			
 
				-
			
 
				-	hh = container_of(cfs_hash_hd_hhead(hs, bd),
			
 
				-			  struct cfs_hash_head_dep, hd_head);
			
 
				-	hlist_del_init(hnode);
			
 
				-	return --hh->hd_depth;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * double links hash head without depth tracking
			
 
				- * new element is always added to tail of hlist
			
 
				- */
			
 
				-struct cfs_hash_dhead {
			
 
				-	struct hlist_head	dh_head;	/**< entries list */
			
 
				-	struct hlist_node	*dh_tail;	/**< the last entry */
			
 
				-};
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_dh_hhead_size(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return sizeof(struct cfs_hash_dhead);
			
 
				-}
			
 
				-
			
 
				-static struct hlist_head *
			
 
				-cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	struct cfs_hash_dhead *head;
			
 
				-
			
 
				-	head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0];
			
 
				-	return &head[bd->bd_offset].dh_head;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_dhead *dh;
			
 
				-
			
 
				-	dh = container_of(cfs_hash_dh_hhead(hs, bd),
			
 
				-			  struct cfs_hash_dhead, dh_head);
			
 
				-	if (dh->dh_tail) /* not empty */
			
 
				-		hlist_add_behind(hnode, dh->dh_tail);
			
 
				-	else /* empty list */
			
 
				-		hlist_add_head(hnode, &dh->dh_head);
			
 
				-	dh->dh_tail = hnode;
			
 
				-	return -1; /* unknown depth */
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnd)
			
 
				-{
			
 
				-	struct cfs_hash_dhead *dh;
			
 
				-
			
 
				-	dh = container_of(cfs_hash_dh_hhead(hs, bd),
			
 
				-			  struct cfs_hash_dhead, dh_head);
			
 
				-	if (!hnd->next) { /* it's the tail */
			
 
				-		dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
			
 
				-			      container_of(hnd->pprev, struct hlist_node, next);
			
 
				-	}
			
 
				-	hlist_del_init(hnd);
			
 
				-	return -1; /* unknown depth */
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * double links hash head with depth tracking
			
 
				- * new element is always added to tail of hlist
			
 
				- */
			
 
				-struct cfs_hash_dhead_dep {
			
 
				-	struct hlist_head	dd_head;	/**< entries list */
			
 
				-	struct hlist_node	*dd_tail;	/**< the last entry */
			
 
				-	unsigned int		dd_depth;	/**< list length */
			
 
				-};
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_dd_hhead_size(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return sizeof(struct cfs_hash_dhead_dep);
			
 
				-}
			
 
				-
			
 
				-static struct hlist_head *
			
 
				-cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	struct cfs_hash_dhead_dep *head;
			
 
				-
			
 
				-	head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0];
			
 
				-	return &head[bd->bd_offset].dd_head;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_dhead_dep *dh;
			
 
				-
			
 
				-	dh = container_of(cfs_hash_dd_hhead(hs, bd),
			
 
				-			  struct cfs_hash_dhead_dep, dd_head);
			
 
				-	if (dh->dd_tail) /* not empty */
			
 
				-		hlist_add_behind(hnode, dh->dd_tail);
			
 
				-	else /* empty list */
			
 
				-		hlist_add_head(hnode, &dh->dd_head);
			
 
				-	dh->dd_tail = hnode;
			
 
				-	return ++dh->dd_depth;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		      struct hlist_node *hnd)
			
 
				-{
			
 
				-	struct cfs_hash_dhead_dep *dh;
			
 
				-
			
 
				-	dh = container_of(cfs_hash_dd_hhead(hs, bd),
			
 
				-			  struct cfs_hash_dhead_dep, dd_head);
			
 
				-	if (!hnd->next) { /* it's the tail */
			
 
				-		dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
			
 
				-			      container_of(hnd->pprev, struct hlist_node, next);
			
 
				-	}
			
 
				-	hlist_del_init(hnd);
			
 
				-	return --dh->dd_depth;
			
 
				-}
			
 
				-
			
 
				-static struct cfs_hash_hlist_ops cfs_hash_hh_hops = {
			
 
				-	.hop_hhead	= cfs_hash_hh_hhead,
			
 
				-	.hop_hhead_size	= cfs_hash_hh_hhead_size,
			
 
				-	.hop_hnode_add	= cfs_hash_hh_hnode_add,
			
 
				-	.hop_hnode_del	= cfs_hash_hh_hnode_del,
			
 
				-};
			
 
				-
			
 
				-static struct cfs_hash_hlist_ops cfs_hash_hd_hops = {
			
 
				-	.hop_hhead	= cfs_hash_hd_hhead,
			
 
				-	.hop_hhead_size	= cfs_hash_hd_hhead_size,
			
 
				-	.hop_hnode_add	= cfs_hash_hd_hnode_add,
			
 
				-	.hop_hnode_del	= cfs_hash_hd_hnode_del,
			
 
				-};
			
 
				-
			
 
				-static struct cfs_hash_hlist_ops cfs_hash_dh_hops = {
			
 
				-	.hop_hhead	= cfs_hash_dh_hhead,
			
 
				-	.hop_hhead_size	= cfs_hash_dh_hhead_size,
			
 
				-	.hop_hnode_add	= cfs_hash_dh_hnode_add,
			
 
				-	.hop_hnode_del	= cfs_hash_dh_hnode_del,
			
 
				-};
			
 
				-
			
 
				-static struct cfs_hash_hlist_ops cfs_hash_dd_hops = {
			
 
				-	.hop_hhead	= cfs_hash_dd_hhead,
			
 
				-	.hop_hhead_size	= cfs_hash_dd_hhead_size,
			
 
				-	.hop_hnode_add	= cfs_hash_dd_hnode_add,
			
 
				-	.hop_hnode_del	= cfs_hash_dd_hnode_del,
			
 
				-};
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_hlist_setup(struct cfs_hash *hs)
			
 
				-{
			
 
				-	if (cfs_hash_with_add_tail(hs)) {
			
 
				-		hs->hs_hops = cfs_hash_with_depth(hs) ?
			
 
				-			      &cfs_hash_dd_hops : &cfs_hash_dh_hops;
			
 
				-	} else {
			
 
				-		hs->hs_hops = cfs_hash_with_depth(hs) ?
			
 
				-			      &cfs_hash_hd_hops : &cfs_hash_hh_hops;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
			
 
				-		     unsigned int bits, const void *key, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
			
 
				-
			
 
				-	LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
			
 
				-
			
 
				-	bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
			
 
				-	bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
			
 
				-{
			
 
				-	/* NB: caller should hold hs->hs_rwlock if REHASH is set */
			
 
				-	if (likely(!hs->hs_rehash_buckets)) {
			
 
				-		cfs_hash_bd_from_key(hs, hs->hs_buckets,
			
 
				-				     hs->hs_cur_bits, key, bd);
			
 
				-	} else {
			
 
				-		LASSERT(hs->hs_rehash_bits);
			
 
				-		cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
			
 
				-				     hs->hs_rehash_bits, key, bd);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_bd_get);
			
 
				-
			
 
				-static inline void
			
 
				-cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
			
 
				-{
			
 
				-	if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
			
 
				-		return;
			
 
				-
			
 
				-	bd->bd_bucket->hsb_depmax = dep_cur;
			
 
				-# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
			
 
				-	if (likely(!warn_on_depth ||
			
 
				-		   max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
			
 
				-		return;
			
 
				-
			
 
				-	spin_lock(&hs->hs_dep_lock);
			
 
				-	hs->hs_dep_max = dep_cur;
			
 
				-	hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
			
 
				-	hs->hs_dep_off = bd->bd_offset;
			
 
				-	hs->hs_dep_bits = hs->hs_cur_bits;
			
 
				-	spin_unlock(&hs->hs_dep_lock);
			
 
				-
			
 
				-	queue_work(cfs_rehash_wq, &hs->hs_dep_work);
			
 
				-# endif
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		       struct hlist_node *hnode)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
			
 
				-	cfs_hash_bd_dep_record(hs, bd, rc);
			
 
				-	bd->bd_bucket->hsb_version++;
			
 
				-	if (unlikely(!bd->bd_bucket->hsb_version))
			
 
				-		bd->bd_bucket->hsb_version++;
			
 
				-	bd->bd_bucket->hsb_count++;
			
 
				-
			
 
				-	if (cfs_hash_with_counter(hs))
			
 
				-		atomic_inc(&hs->hs_count);
			
 
				-	if (!cfs_hash_with_no_itemref(hs))
			
 
				-		cfs_hash_get(hs, hnode);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_bd_add_locked);
			
 
				-
			
 
				-void
			
 
				-cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-		       struct hlist_node *hnode)
			
 
				-{
			
 
				-	hs->hs_hops->hop_hnode_del(hs, bd, hnode);
			
 
				-
			
 
				-	LASSERT(bd->bd_bucket->hsb_count > 0);
			
 
				-	bd->bd_bucket->hsb_count--;
			
 
				-	bd->bd_bucket->hsb_version++;
			
 
				-	if (unlikely(!bd->bd_bucket->hsb_version))
			
 
				-		bd->bd_bucket->hsb_version++;
			
 
				-
			
 
				-	if (cfs_hash_with_counter(hs)) {
			
 
				-		LASSERT(atomic_read(&hs->hs_count) > 0);
			
 
				-		atomic_dec(&hs->hs_count);
			
 
				-	}
			
 
				-	if (!cfs_hash_with_no_itemref(hs))
			
 
				-		cfs_hash_put_locked(hs, hnode);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_bd_del_locked);
			
 
				-
			
 
				-void
			
 
				-cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
			
 
				-			struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
			
 
				-	struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (!cfs_hash_bd_compare(bd_old, bd_new))
			
 
				-		return;
			
 
				-
			
 
				-	/* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
			
 
				-	 * in cfs_hash_bd_del/add_locked
			
 
				-	 */
			
 
				-	hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
			
 
				-	rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
			
 
				-	cfs_hash_bd_dep_record(hs, bd_new, rc);
			
 
				-
			
 
				-	LASSERT(obkt->hsb_count > 0);
			
 
				-	obkt->hsb_count--;
			
 
				-	obkt->hsb_version++;
			
 
				-	if (unlikely(!obkt->hsb_version))
			
 
				-		obkt->hsb_version++;
			
 
				-	nbkt->hsb_count++;
			
 
				-	nbkt->hsb_version++;
			
 
				-	if (unlikely(!nbkt->hsb_version))
			
 
				-		nbkt->hsb_version++;
			
 
				-}
			
 
				-
			
 
				-enum {
			
 
				-	/** always set, for sanity (avoid ZERO intent) */
			
 
				-	CFS_HS_LOOKUP_MASK_FIND	= BIT(0),
			
 
				-	/** return entry with a ref */
			
 
				-	CFS_HS_LOOKUP_MASK_REF	= BIT(1),
			
 
				-	/** add entry if not existing */
			
 
				-	CFS_HS_LOOKUP_MASK_ADD	= BIT(2),
			
 
				-	/** delete entry, ignore other masks */
			
 
				-	CFS_HS_LOOKUP_MASK_DEL	= BIT(3),
			
 
				-};
			
 
				-
			
 
				-enum cfs_hash_lookup_intent {
			
 
				-	/** return item w/o refcount */
			
 
				-	CFS_HS_LOOKUP_IT_PEEK	 = CFS_HS_LOOKUP_MASK_FIND,
			
 
				-	/** return item with refcount */
			
 
				-	CFS_HS_LOOKUP_IT_FIND	 = (CFS_HS_LOOKUP_MASK_FIND |
			
 
				-				    CFS_HS_LOOKUP_MASK_REF),
			
 
				-	/** return item w/o refcount if existed, otherwise add */
			
 
				-	CFS_HS_LOOKUP_IT_ADD	 = (CFS_HS_LOOKUP_MASK_FIND |
			
 
				-				    CFS_HS_LOOKUP_MASK_ADD),
			
 
				-	/** return item with refcount if existed, otherwise add */
			
 
				-	CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
			
 
				-				    CFS_HS_LOOKUP_MASK_ADD),
			
 
				-	/** delete if existed */
			
 
				-	CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
			
 
				-				    CFS_HS_LOOKUP_MASK_DEL)
			
 
				-};
			
 
				-
			
 
				-static struct hlist_node *
			
 
				-cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			  const void *key, struct hlist_node *hnode,
			
 
				-			  enum cfs_hash_lookup_intent intent)
			
 
				-
			
 
				-{
			
 
				-	struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
			
 
				-	struct hlist_node *ehnode;
			
 
				-	struct hlist_node *match;
			
 
				-	int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD;
			
 
				-
			
 
				-	/* with this function, we can avoid a lot of useless refcount ops,
			
 
				-	 * which are expensive atomic operations most time.
			
 
				-	 */
			
 
				-	match = intent_add ? NULL : hnode;
			
 
				-	hlist_for_each(ehnode, hhead) {
			
 
				-		if (!cfs_hash_keycmp(hs, key, ehnode))
			
 
				-			continue;
			
 
				-
			
 
				-		if (match && match != ehnode) /* can't match */
			
 
				-			continue;
			
 
				-
			
 
				-		/* match and ... */
			
 
				-		if (intent & CFS_HS_LOOKUP_MASK_DEL) {
			
 
				-			cfs_hash_bd_del_locked(hs, bd, ehnode);
			
 
				-			return ehnode;
			
 
				-		}
			
 
				-
			
 
				-		/* caller wants refcount? */
			
 
				-		if (intent & CFS_HS_LOOKUP_MASK_REF)
			
 
				-			cfs_hash_get(hs, ehnode);
			
 
				-		return ehnode;
			
 
				-	}
			
 
				-	/* no match item */
			
 
				-	if (!intent_add)
			
 
				-		return NULL;
			
 
				-
			
 
				-	LASSERT(hnode);
			
 
				-	cfs_hash_bd_add_locked(hs, bd, hnode);
			
 
				-	return hnode;
			
 
				-}
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			  const void *key)
			
 
				-{
			
 
				-	return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
			
 
				-					 CFS_HS_LOOKUP_IT_FIND);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			const void *key)
			
 
				-{
			
 
				-	return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
			
 
				-					 CFS_HS_LOOKUP_IT_PEEK);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-		       unsigned int n, int excl)
			
 
				-{
			
 
				-	struct cfs_hash_bucket *prev = NULL;
			
 
				-	int i;
			
 
				-
			
 
				-	/**
			
 
				-	 * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
			
 
				-	 * NB: it's possible that several bds point to the same bucket but
			
 
				-	 * have different bd::bd_offset, so need take care of deadlock.
			
 
				-	 */
			
 
				-	cfs_hash_for_each_bd(bds, n, i) {
			
 
				-		if (prev == bds[i].bd_bucket)
			
 
				-			continue;
			
 
				-
			
 
				-		LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index);
			
 
				-		cfs_hash_bd_lock(hs, &bds[i], excl);
			
 
				-		prev = bds[i].bd_bucket;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-			 unsigned int n, int excl)
			
 
				-{
			
 
				-	struct cfs_hash_bucket *prev = NULL;
			
 
				-	int i;
			
 
				-
			
 
				-	cfs_hash_for_each_bd(bds, n, i) {
			
 
				-		if (prev != bds[i].bd_bucket) {
			
 
				-			cfs_hash_bd_unlock(hs, &bds[i], excl);
			
 
				-			prev = bds[i].bd_bucket;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static struct hlist_node *
			
 
				-cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				unsigned int n, const void *key)
			
 
				-{
			
 
				-	struct hlist_node *ehnode;
			
 
				-	unsigned int i;
			
 
				-
			
 
				-	cfs_hash_for_each_bd(bds, n, i) {
			
 
				-		ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
			
 
				-						   CFS_HS_LOOKUP_IT_FIND);
			
 
				-		if (ehnode)
			
 
				-			return ehnode;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct hlist_node *
			
 
				-cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				 unsigned int n, const void *key,
			
 
				-				 struct hlist_node *hnode, int noref)
			
 
				-{
			
 
				-	struct hlist_node *ehnode;
			
 
				-	int intent;
			
 
				-	unsigned int i;
			
 
				-
			
 
				-	LASSERT(hnode);
			
 
				-	intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
			
 
				-
			
 
				-	cfs_hash_for_each_bd(bds, n, i) {
			
 
				-		ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
			
 
				-						   NULL, intent);
			
 
				-		if (ehnode)
			
 
				-			return ehnode;
			
 
				-	}
			
 
				-
			
 
				-	if (i == 1) { /* only one bucket */
			
 
				-		cfs_hash_bd_add_locked(hs, &bds[0], hnode);
			
 
				-	} else {
			
 
				-		struct cfs_hash_bd mybd;
			
 
				-
			
 
				-		cfs_hash_bd_get(hs, key, &mybd);
			
 
				-		cfs_hash_bd_add_locked(hs, &mybd, hnode);
			
 
				-	}
			
 
				-
			
 
				-	return hnode;
			
 
				-}
			
 
				-
			
 
				-static struct hlist_node *
			
 
				-cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				 unsigned int n, const void *key,
			
 
				-				 struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct hlist_node *ehnode;
			
 
				-	unsigned int i;
			
 
				-
			
 
				-	cfs_hash_for_each_bd(bds, n, i) {
			
 
				-		ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
			
 
				-						   CFS_HS_LOOKUP_IT_FINDDEL);
			
 
				-		if (ehnode)
			
 
				-			return ehnode;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	if (!bd2->bd_bucket)
			
 
				-		return;
			
 
				-
			
 
				-	if (!bd1->bd_bucket) {
			
 
				-		*bd1 = *bd2;
			
 
				-		bd2->bd_bucket = NULL;
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_hash_bd_compare(bd1, bd2);
			
 
				-	if (!rc)
			
 
				-		bd2->bd_bucket = NULL;
			
 
				-	else if (rc > 0)
			
 
				-		swap(*bd1, *bd2); /* swap bd1 and bd2 */
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
			
 
				-		     struct cfs_hash_bd *bds)
			
 
				-{
			
 
				-	/* NB: caller should hold hs_lock.rw if REHASH is set */
			
 
				-	cfs_hash_bd_from_key(hs, hs->hs_buckets,
			
 
				-			     hs->hs_cur_bits, key, &bds[0]);
			
 
				-	if (likely(!hs->hs_rehash_buckets)) {
			
 
				-		/* no rehash or not rehashing */
			
 
				-		bds[1].bd_bucket = NULL;
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(hs->hs_rehash_bits);
			
 
				-	cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
			
 
				-			     hs->hs_rehash_bits, key, &bds[1]);
			
 
				-
			
 
				-	cfs_hash_bd_order(&bds[0], &bds[1]);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
			
 
				-{
			
 
				-	cfs_hash_multi_bd_lock(hs, bds, 2, excl);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
			
 
				-{
			
 
				-	cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
			
 
				-}
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-			       const void *key)
			
 
				-{
			
 
				-	return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
			
 
				-}
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				const void *key, struct hlist_node *hnode,
			
 
				-				int noref)
			
 
				-{
			
 
				-	return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
			
 
				-						hnode, noref);
			
 
				-}
			
 
				-
			
 
				-struct hlist_node *
			
 
				-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
			
 
				-				const void *key, struct hlist_node *hnode)
			
 
				-{
			
 
				-	return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
			
 
				-		      int bkt_size, int prev_size, int size)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = prev_size; i < size; i++)
			
 
				-		kfree(buckets[i]);
			
 
				-
			
 
				-	kvfree(buckets);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Create or grow bucket memory. Return old_buckets if no allocation was
			
 
				- * needed, the newly allocated buckets if allocation was needed and
			
 
				- * successful, and NULL on error.
			
 
				- */
			
 
				-static struct cfs_hash_bucket **
			
 
				-cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
			
 
				-			 unsigned int old_size, unsigned int new_size)
			
 
				-{
			
 
				-	struct cfs_hash_bucket **new_bkts;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!old_size || old_bkts);
			
 
				-
			
 
				-	if (old_bkts && old_size == new_size)
			
 
				-		return old_bkts;
			
 
				-
			
 
				-	new_bkts = kvmalloc_array(new_size, sizeof(new_bkts[0]), GFP_KERNEL);
			
 
				-	if (!new_bkts)
			
 
				-		return NULL;
			
 
				-
			
 
				-	if (old_bkts) {
			
 
				-		memcpy(new_bkts, old_bkts,
			
 
				-		       min(old_size, new_size) * sizeof(*old_bkts));
			
 
				-	}
			
 
				-
			
 
				-	for (i = old_size; i < new_size; i++) {
			
 
				-		struct hlist_head *hhead;
			
 
				-		struct cfs_hash_bd bd;
			
 
				-
			
 
				-		new_bkts[i] = kzalloc(cfs_hash_bkt_size(hs), GFP_KERNEL);
			
 
				-		if (!new_bkts[i]) {
			
 
				-			cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
			
 
				-					      old_size, new_size);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-
			
 
				-		new_bkts[i]->hsb_index = i;
			
 
				-		new_bkts[i]->hsb_version = 1;	/* shouldn't be zero */
			
 
				-		new_bkts[i]->hsb_depmax = -1;	/* unknown */
			
 
				-		bd.bd_bucket = new_bkts[i];
			
 
				-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
			
 
				-			INIT_HLIST_HEAD(hhead);
			
 
				-
			
 
				-		if (cfs_hash_with_no_lock(hs) ||
			
 
				-		    cfs_hash_with_no_bktlock(hs))
			
 
				-			continue;
			
 
				-
			
 
				-		if (cfs_hash_with_rw_bktlock(hs))
			
 
				-			rwlock_init(&new_bkts[i]->hsb_lock.rw);
			
 
				-		else if (cfs_hash_with_spin_bktlock(hs))
			
 
				-			spin_lock_init(&new_bkts[i]->hsb_lock.spin);
			
 
				-		else
			
 
				-			LBUG(); /* invalid use-case */
			
 
				-	}
			
 
				-	return new_bkts;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Initialize new libcfs hash, where:
			
 
				- * @name     - Descriptive hash name
			
 
				- * @cur_bits - Initial hash table size, in bits
			
 
				- * @max_bits - Maximum allowed hash table resize, in bits
			
 
				- * @ops      - Registered hash table operations
			
 
				- * @flags    - CFS_HASH_REHASH enable synamic hash resizing
			
 
				- *	     - CFS_HASH_SORT enable chained hash sort
			
 
				- */
			
 
				-static void cfs_hash_rehash_worker(struct work_struct *work);
			
 
				-
			
 
				-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
			
 
				-static void cfs_hash_dep_print(struct work_struct *work)
			
 
				-{
			
 
				-	struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_dep_work);
			
 
				-	int dep;
			
 
				-	int bkt;
			
 
				-	int off;
			
 
				-	int bits;
			
 
				-
			
 
				-	spin_lock(&hs->hs_dep_lock);
			
 
				-	dep = hs->hs_dep_max;
			
 
				-	bkt = hs->hs_dep_bkt;
			
 
				-	off = hs->hs_dep_off;
			
 
				-	bits = hs->hs_dep_bits;
			
 
				-	spin_unlock(&hs->hs_dep_lock);
			
 
				-
			
 
				-	LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
			
 
				-		      hs->hs_name, bits, dep, bkt, off);
			
 
				-	spin_lock(&hs->hs_dep_lock);
			
 
				-	hs->hs_dep_bits = 0; /* mark as workitem done */
			
 
				-	spin_unlock(&hs->hs_dep_lock);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
			
 
				-{
			
 
				-	spin_lock_init(&hs->hs_dep_lock);
			
 
				-	INIT_WORK(&hs->hs_dep_work, cfs_hash_dep_print);
			
 
				-}
			
 
				-
			
 
				-static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
			
 
				-{
			
 
				-	cancel_work_sync(&hs->hs_dep_work);
			
 
				-}
			
 
				-
			
 
				-#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
			
 
				-
			
 
				-static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
			
 
				-static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
			
 
				-
			
 
				-#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
			
 
				-
			
 
				-struct cfs_hash *
			
 
				-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
			
 
				-		unsigned int bkt_bits, unsigned int extra_bytes,
			
 
				-		unsigned int min_theta, unsigned int max_theta,
			
 
				-		struct cfs_hash_ops *ops, unsigned int flags)
			
 
				-{
			
 
				-	struct cfs_hash *hs;
			
 
				-	int len;
			
 
				-
			
 
				-	BUILD_BUG_ON(CFS_HASH_THETA_BITS >= 15);
			
 
				-
			
 
				-	LASSERT(name);
			
 
				-	LASSERT(ops->hs_key);
			
 
				-	LASSERT(ops->hs_hash);
			
 
				-	LASSERT(ops->hs_object);
			
 
				-	LASSERT(ops->hs_keycmp);
			
 
				-	LASSERT(ops->hs_get);
			
 
				-	LASSERT(ops->hs_put || ops->hs_put_locked);
			
 
				-
			
 
				-	if (flags & CFS_HASH_REHASH)
			
 
				-		flags |= CFS_HASH_COUNTER; /* must have counter */
			
 
				-
			
 
				-	LASSERT(cur_bits > 0);
			
 
				-	LASSERT(cur_bits >= bkt_bits);
			
 
				-	LASSERT(max_bits >= cur_bits && max_bits < 31);
			
 
				-	LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits));
			
 
				-	LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK)));
			
 
				-	LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy));
			
 
				-
			
 
				-	len = !(flags & CFS_HASH_BIGNAME) ?
			
 
				-	      CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
			
 
				-	hs = kzalloc(offsetof(struct cfs_hash, hs_name[len]), GFP_KERNEL);
			
 
				-	if (!hs)
			
 
				-		return NULL;
			
 
				-
			
 
				-	strlcpy(hs->hs_name, name, len);
			
 
				-	hs->hs_flags = flags;
			
 
				-
			
 
				-	atomic_set(&hs->hs_refcount, 1);
			
 
				-	atomic_set(&hs->hs_count, 0);
			
 
				-
			
 
				-	cfs_hash_lock_setup(hs);
			
 
				-	cfs_hash_hlist_setup(hs);
			
 
				-
			
 
				-	hs->hs_cur_bits = (u8)cur_bits;
			
 
				-	hs->hs_min_bits = (u8)cur_bits;
			
 
				-	hs->hs_max_bits = (u8)max_bits;
			
 
				-	hs->hs_bkt_bits = (u8)bkt_bits;
			
 
				-
			
 
				-	hs->hs_ops = ops;
			
 
				-	hs->hs_extra_bytes = extra_bytes;
			
 
				-	hs->hs_rehash_bits = 0;
			
 
				-	INIT_WORK(&hs->hs_rehash_work, cfs_hash_rehash_worker);
			
 
				-	cfs_hash_depth_wi_init(hs);
			
 
				-
			
 
				-	if (cfs_hash_with_rehash(hs))
			
 
				-		__cfs_hash_set_theta(hs, min_theta, max_theta);
			
 
				-
			
 
				-	hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
			
 
				-						  CFS_HASH_NBKT(hs));
			
 
				-	if (hs->hs_buckets)
			
 
				-		return hs;
			
 
				-
			
 
				-	kfree(hs);
			
 
				-	return NULL;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_create);
			
 
				-
			
 
				-/**
			
 
				- * Cleanup libcfs hash @hs.
			
 
				- */
			
 
				-static void
			
 
				-cfs_hash_destroy(struct cfs_hash *hs)
			
 
				-{
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct hlist_node *pos;
			
 
				-	struct cfs_hash_bd bd;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(hs);
			
 
				-	LASSERT(!cfs_hash_is_exiting(hs) &&
			
 
				-		!cfs_hash_is_iterating(hs));
			
 
				-
			
 
				-	/**
			
 
				-	 * prohibit further rehashes, don't need any lock because
			
 
				-	 * I'm the only (last) one can change it.
			
 
				-	 */
			
 
				-	hs->hs_exiting = 1;
			
 
				-	if (cfs_hash_with_rehash(hs))
			
 
				-		cfs_hash_rehash_cancel(hs);
			
 
				-
			
 
				-	cfs_hash_depth_wi_cancel(hs);
			
 
				-	/* rehash should be done/canceled */
			
 
				-	LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets);
			
 
				-
			
 
				-	cfs_hash_for_each_bucket(hs, &bd, i) {
			
 
				-		struct hlist_head *hhead;
			
 
				-
			
 
				-		LASSERT(bd.bd_bucket);
			
 
				-		/* no need to take this lock, just for consistent code */
			
 
				-		cfs_hash_bd_lock(hs, &bd, 1);
			
 
				-
			
 
				-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
			
 
				-			hlist_for_each_safe(hnode, pos, hhead) {
			
 
				-				LASSERTF(!cfs_hash_with_assert_empty(hs),
			
 
				-					 "hash %s bucket %u(%u) is not empty: %u items left\n",
			
 
				-					 hs->hs_name, bd.bd_bucket->hsb_index,
			
 
				-					 bd.bd_offset, bd.bd_bucket->hsb_count);
			
 
				-				/* can't assert key valicate, because we
			
 
				-				 * can interrupt rehash
			
 
				-				 */
			
 
				-				cfs_hash_bd_del_locked(hs, &bd, hnode);
			
 
				-				cfs_hash_exit(hs, hnode);
			
 
				-			}
			
 
				-		}
			
 
				-		LASSERT(!bd.bd_bucket->hsb_count);
			
 
				-		cfs_hash_bd_unlock(hs, &bd, 1);
			
 
				-		cond_resched();
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!atomic_read(&hs->hs_count));
			
 
				-
			
 
				-	cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
			
 
				-			      0, CFS_HASH_NBKT(hs));
			
 
				-	i = cfs_hash_with_bigname(hs) ?
			
 
				-	    CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
			
 
				-	kfree(hs);
			
 
				-}
			
 
				-
			
 
				-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
			
 
				-{
			
 
				-	if (atomic_inc_not_zero(&hs->hs_refcount))
			
 
				-		return hs;
			
 
				-	return NULL;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_getref);
			
 
				-
			
 
				-void cfs_hash_putref(struct cfs_hash *hs)
			
 
				-{
			
 
				-	if (atomic_dec_and_test(&hs->hs_refcount))
			
 
				-		cfs_hash_destroy(hs);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_putref);
			
 
				-
			
 
				-static inline int
			
 
				-cfs_hash_rehash_bits(struct cfs_hash *hs)
			
 
				-{
			
 
				-	if (cfs_hash_with_no_lock(hs) ||
			
 
				-	    !cfs_hash_with_rehash(hs))
			
 
				-		return -EOPNOTSUPP;
			
 
				-
			
 
				-	if (unlikely(cfs_hash_is_exiting(hs)))
			
 
				-		return -ESRCH;
			
 
				-
			
 
				-	if (unlikely(cfs_hash_is_rehashing(hs)))
			
 
				-		return -EALREADY;
			
 
				-
			
 
				-	if (unlikely(cfs_hash_is_iterating(hs)))
			
 
				-		return -EAGAIN;
			
 
				-
			
 
				-	/* XXX: need to handle case with max_theta != 2.0
			
 
				-	 *      and the case with min_theta != 0.5
			
 
				-	 */
			
 
				-	if ((hs->hs_cur_bits < hs->hs_max_bits) &&
			
 
				-	    (__cfs_hash_theta(hs) > hs->hs_max_theta))
			
 
				-		return hs->hs_cur_bits + 1;
			
 
				-
			
 
				-	if (!cfs_hash_with_shrink(hs))
			
 
				-		return 0;
			
 
				-
			
 
				-	if ((hs->hs_cur_bits > hs->hs_min_bits) &&
			
 
				-	    (__cfs_hash_theta(hs) < hs->hs_min_theta))
			
 
				-		return hs->hs_cur_bits - 1;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * don't allow inline rehash if:
			
 
				- * - user wants non-blocking change (add/del) on hash table
			
 
				- * - too many elements
			
 
				- */
			
 
				-static inline int
			
 
				-cfs_hash_rehash_inline(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return !cfs_hash_with_nblk_change(hs) &&
			
 
				-	       atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Add item @hnode to libcfs hash @hs using @key.  The registered
			
 
				- * ops->hs_get function will be called when the item is added.
			
 
				- */
			
 
				-void
			
 
				-cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_bd bd;
			
 
				-	int bits;
			
 
				-
			
 
				-	LASSERT(hlist_unhashed(hnode));
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
			
 
				-
			
 
				-	cfs_hash_key_validate(hs, key, hnode);
			
 
				-	cfs_hash_bd_add_locked(hs, &bd, hnode);
			
 
				-
			
 
				-	cfs_hash_bd_unlock(hs, &bd, 1);
			
 
				-
			
 
				-	bits = cfs_hash_rehash_bits(hs);
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-	if (bits > 0)
			
 
				-		cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_add);
			
 
				-
			
 
				-static struct hlist_node *
			
 
				-cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
			
 
				-		     struct hlist_node *hnode, int noref)
			
 
				-{
			
 
				-	struct hlist_node *ehnode;
			
 
				-	struct cfs_hash_bd bds[2];
			
 
				-	int bits = 0;
			
 
				-
			
 
				-	LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode);
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
			
 
				-
			
 
				-	cfs_hash_key_validate(hs, key, hnode);
			
 
				-	ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
			
 
				-						 hnode, noref);
			
 
				-	cfs_hash_dual_bd_unlock(hs, bds, 1);
			
 
				-
			
 
				-	if (ehnode == hnode)	/* new item added */
			
 
				-		bits = cfs_hash_rehash_bits(hs);
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-	if (bits > 0)
			
 
				-		cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
			
 
				-
			
 
				-	return ehnode;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Add item @hnode to libcfs hash @hs using @key.  The registered
			
 
				- * ops->hs_get function will be called if the item was added.
			
 
				- * Returns 0 on success or -EALREADY on key collisions.
			
 
				- */
			
 
				-int
			
 
				-cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
			
 
				-		    struct hlist_node *hnode)
			
 
				-{
			
 
				-	return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
			
 
				-	       -EALREADY : 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_add_unique);
			
 
				-
			
 
				-/**
			
 
				- * Add item @hnode to libcfs hash @hs using @key.  If this @key
			
 
				- * already exists in the hash then ops->hs_get will be called on the
			
 
				- * conflicting entry and that entry will be returned to the caller.
			
 
				- * Otherwise ops->hs_get is called on the item which was added.
			
 
				- */
			
 
				-void *
			
 
				-cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
			
 
				-			struct hlist_node *hnode)
			
 
				-{
			
 
				-	hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
			
 
				-
			
 
				-	return cfs_hash_object(hs, hnode);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_findadd_unique);
			
 
				-
			
 
				-/**
			
 
				- * Delete item @hnode from the libcfs hash @hs using @key.  The @key
			
 
				- * is required to ensure the correct hash bucket is locked since there
			
 
				- * is no direct linkage from the item to the bucket.  The object
			
 
				- * removed from the hash will be returned and obs->hs_put is called
			
 
				- * on the removed object.
			
 
				- */
			
 
				-void *
			
 
				-cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
			
 
				-{
			
 
				-	void *obj = NULL;
			
 
				-	int bits = 0;
			
 
				-	struct cfs_hash_bd bds[2];
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
			
 
				-
			
 
				-	/* NB: do nothing if @hnode is not in hash table */
			
 
				-	if (!hnode || !hlist_unhashed(hnode)) {
			
 
				-		if (!bds[1].bd_bucket && hnode) {
			
 
				-			cfs_hash_bd_del_locked(hs, &bds[0], hnode);
			
 
				-		} else {
			
 
				-			hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
			
 
				-								key, hnode);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (hnode) {
			
 
				-		obj = cfs_hash_object(hs, hnode);
			
 
				-		bits = cfs_hash_rehash_bits(hs);
			
 
				-	}
			
 
				-
			
 
				-	cfs_hash_dual_bd_unlock(hs, bds, 1);
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-	if (bits > 0)
			
 
				-		cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
			
 
				-
			
 
				-	return obj;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_del);
			
 
				-
			
 
				-/**
			
 
				- * Delete item given @key in libcfs hash @hs.  The first @key found in
			
 
				- * the hash will be removed, if the key exists multiple times in the hash
			
 
				- * @hs this function must be called once per key.  The removed object
			
 
				- * will be returned and ops->hs_put is called on the removed object.
			
 
				- */
			
 
				-void *
			
 
				-cfs_hash_del_key(struct cfs_hash *hs, const void *key)
			
 
				-{
			
 
				-	return cfs_hash_del(hs, key, NULL);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_del_key);
			
 
				-
			
 
				-/**
			
 
				- * Lookup an item using @key in the libcfs hash @hs and return it.
			
 
				- * If the @key is found in the hash hs->hs_get() is called and the
			
 
				- * matching objects is returned.  It is the callers responsibility
			
 
				- * to call the counterpart ops->hs_put using the cfs_hash_put() macro
			
 
				- * when when finished with the object.  If the @key was not found
			
 
				- * in the hash @hs NULL is returned.
			
 
				- */
			
 
				-void *
			
 
				-cfs_hash_lookup(struct cfs_hash *hs, const void *key)
			
 
				-{
			
 
				-	void *obj = NULL;
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct cfs_hash_bd bds[2];
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
			
 
				-
			
 
				-	hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
			
 
				-	if (hnode)
			
 
				-		obj = cfs_hash_object(hs, hnode);
			
 
				-
			
 
				-	cfs_hash_dual_bd_unlock(hs, bds, 0);
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-
			
 
				-	return obj;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_lookup);
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_for_each_enter(struct cfs_hash *hs)
			
 
				-{
			
 
				-	LASSERT(!cfs_hash_is_exiting(hs));
			
 
				-
			
 
				-	if (!cfs_hash_with_rehash(hs))
			
 
				-		return;
			
 
				-	/*
			
 
				-	 * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
			
 
				-	 * because it's just an unreliable signal to rehash-thread,
			
 
				-	 * rehash-thread will try to finish rehash ASAP when seeing this.
			
 
				-	 */
			
 
				-	hs->hs_iterating = 1;
			
 
				-
			
 
				-	cfs_hash_lock(hs, 1);
			
 
				-	hs->hs_iterators++;
			
 
				-	cfs_hash_unlock(hs, 1);
			
 
				-
			
 
				-	/* NB: iteration is mostly called by service thread,
			
 
				-	 * we tend to cancel pending rehash-request, instead of
			
 
				-	 * blocking service thread, we will relaunch rehash request
			
 
				-	 * after iteration
			
 
				-	 */
			
 
				-	if (cfs_hash_is_rehashing(hs))
			
 
				-		cfs_hash_rehash_cancel(hs);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_for_each_exit(struct cfs_hash *hs)
			
 
				-{
			
 
				-	int remained;
			
 
				-	int bits;
			
 
				-
			
 
				-	if (!cfs_hash_with_rehash(hs))
			
 
				-		return;
			
 
				-	cfs_hash_lock(hs, 1);
			
 
				-	remained = --hs->hs_iterators;
			
 
				-	bits = cfs_hash_rehash_bits(hs);
			
 
				-	cfs_hash_unlock(hs, 1);
			
 
				-	/* NB: it's race on cfs_has_t::hs_iterating, see above */
			
 
				-	if (!remained)
			
 
				-		hs->hs_iterating = 0;
			
 
				-	if (bits > 0) {
			
 
				-		cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
			
 
				-				    CFS_HASH_LOOP_HOG);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * For each item in the libcfs hash @hs call the passed callback @func
			
 
				- * and pass to it as an argument each hash item and the private @data.
			
 
				- *
			
 
				- * a) the function may sleep!
			
 
				- * b) during the callback:
			
 
				- *    . the bucket lock is held so the callback must never sleep.
			
 
				- *    . if @removal_safe is true, use can remove current item by
			
 
				- *      cfs_hash_bd_del_locked
			
 
				- */
			
 
				-static u64
			
 
				-cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
			
 
				-			void *data, int remove_safe)
			
 
				-{
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct hlist_node *pos;
			
 
				-	struct cfs_hash_bd bd;
			
 
				-	u64 count = 0;
			
 
				-	int excl = !!remove_safe;
			
 
				-	int loop = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	cfs_hash_for_each_enter(hs);
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	LASSERT(!cfs_hash_is_rehashing(hs));
			
 
				-
			
 
				-	cfs_hash_for_each_bucket(hs, &bd, i) {
			
 
				-		struct hlist_head *hhead;
			
 
				-
			
 
				-		cfs_hash_bd_lock(hs, &bd, excl);
			
 
				-		if (!func) { /* only glimpse size */
			
 
				-			count += bd.bd_bucket->hsb_count;
			
 
				-			cfs_hash_bd_unlock(hs, &bd, excl);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
			
 
				-			hlist_for_each_safe(hnode, pos, hhead) {
			
 
				-				cfs_hash_bucket_validate(hs, &bd, hnode);
			
 
				-				count++;
			
 
				-				loop++;
			
 
				-				if (func(hs, &bd, hnode, data)) {
			
 
				-					cfs_hash_bd_unlock(hs, &bd, excl);
			
 
				-					goto out;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-		cfs_hash_bd_unlock(hs, &bd, excl);
			
 
				-		if (loop < CFS_HASH_LOOP_HOG)
			
 
				-			continue;
			
 
				-		loop = 0;
			
 
				-		cfs_hash_unlock(hs, 0);
			
 
				-		cond_resched();
			
 
				-		cfs_hash_lock(hs, 0);
			
 
				-	}
			
 
				- out:
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-
			
 
				-	cfs_hash_for_each_exit(hs);
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-struct cfs_hash_cond_arg {
			
 
				-	cfs_hash_cond_opt_cb_t	func;
			
 
				-	void			*arg;
			
 
				-};
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-			 struct hlist_node *hnode, void *data)
			
 
				-{
			
 
				-	struct cfs_hash_cond_arg *cond = data;
			
 
				-
			
 
				-	if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
			
 
				-		cfs_hash_bd_del_locked(hs, bd, hnode);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Delete item from the libcfs hash @hs when @func return true.
			
 
				- * The write lock being hold during loop for each bucket to avoid
			
 
				- * any object be reference.
			
 
				- */
			
 
				-void
			
 
				-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
			
 
				-{
			
 
				-	struct cfs_hash_cond_arg arg = {
			
 
				-		.func	= func,
			
 
				-		.arg	= data,
			
 
				-	};
			
 
				-
			
 
				-	cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_cond_del);
			
 
				-
			
 
				-void
			
 
				-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
			
 
				-		  void *data)
			
 
				-{
			
 
				-	cfs_hash_for_each_tight(hs, func, data, 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_for_each);
			
 
				-
			
 
				-void
			
 
				-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
			
 
				-		       void *data)
			
 
				-{
			
 
				-	cfs_hash_for_each_tight(hs, func, data, 1);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_for_each_safe);
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
			
 
				-	      struct hlist_node *hnode, void *data)
			
 
				-{
			
 
				-	*(int *)data = 0;
			
 
				-	return 1; /* return 1 to break the loop */
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-cfs_hash_is_empty(struct cfs_hash *hs)
			
 
				-{
			
 
				-	int empty = 1;
			
 
				-
			
 
				-	cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
			
 
				-	return empty;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_is_empty);
			
 
				-
			
 
				-u64
			
 
				-cfs_hash_size_get(struct cfs_hash *hs)
			
 
				-{
			
 
				-	return cfs_hash_with_counter(hs) ?
			
 
				-	       atomic_read(&hs->hs_count) :
			
 
				-	       cfs_hash_for_each_tight(hs, NULL, NULL, 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_size_get);
			
 
				-
			
 
				-/*
			
 
				- * cfs_hash_for_each_relax:
			
 
				- * Iterate the hash table and call @func on each item without
			
 
				- * any lock. This function can't guarantee to finish iteration
			
 
				- * if these features are enabled:
			
 
				- *
			
 
				- *  a. if rehash_key is enabled, an item can be moved from
			
 
				- *     one bucket to another bucket
			
 
				- *  b. user can remove non-zero-ref item from hash-table,
			
 
				- *     so the item can be removed from hash-table, even worse,
			
 
				- *     it's possible that user changed key and insert to another
			
 
				- *     hash bucket.
			
 
				- * there's no way for us to finish iteration correctly on previous
			
 
				- * two cases, so iteration has to be stopped on change.
			
 
				- */
			
 
				-static int
			
 
				-cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
			
 
				-			void *data, int start)
			
 
				-{
			
 
				-	struct hlist_node *next = NULL;
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct cfs_hash_bd bd;
			
 
				-	u32 version;
			
 
				-	int count = 0;
			
 
				-	int stop_on_change;
			
 
				-	int has_put_locked;
			
 
				-	int end = -1;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	stop_on_change = cfs_hash_with_rehash_key(hs) ||
			
 
				-			 !cfs_hash_with_no_itemref(hs);
			
 
				-	has_put_locked = hs->hs_ops->hs_put_locked != NULL;
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-again:
			
 
				-	LASSERT(!cfs_hash_is_rehashing(hs));
			
 
				-
			
 
				-	cfs_hash_for_each_bucket(hs, &bd, i) {
			
 
				-		struct hlist_head *hhead;
			
 
				-
			
 
				-		if (i < start)
			
 
				-			continue;
			
 
				-		else if (end > 0 && i >= end)
			
 
				-			break;
			
 
				-
			
 
				-		cfs_hash_bd_lock(hs, &bd, 0);
			
 
				-		version = cfs_hash_bd_version_get(&bd);
			
 
				-
			
 
				-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
			
 
				-			hnode = hhead->first;
			
 
				-			if (!hnode)
			
 
				-				continue;
			
 
				-			cfs_hash_get(hs, hnode);
			
 
				-
			
 
				-			for (; hnode; hnode = next) {
			
 
				-				cfs_hash_bucket_validate(hs, &bd, hnode);
			
 
				-				next = hnode->next;
			
 
				-				if (next)
			
 
				-					cfs_hash_get(hs, next);
			
 
				-				cfs_hash_bd_unlock(hs, &bd, 0);
			
 
				-				cfs_hash_unlock(hs, 0);
			
 
				-
			
 
				-				rc = func(hs, &bd, hnode, data);
			
 
				-				if (stop_on_change || !has_put_locked)
			
 
				-					cfs_hash_put(hs, hnode);
			
 
				-				cond_resched();
			
 
				-				count++;
			
 
				-
			
 
				-				cfs_hash_lock(hs, 0);
			
 
				-				cfs_hash_bd_lock(hs, &bd, 0);
			
 
				-				if (stop_on_change) {
			
 
				-					if (version !=
			
 
				-					    cfs_hash_bd_version_get(&bd))
			
 
				-						rc = -EINTR;
			
 
				-				} else if (has_put_locked) {
			
 
				-					cfs_hash_put_locked(hs, hnode);
			
 
				-				}
			
 
				-				if (rc) /* callback wants to break iteration */
			
 
				-					break;
			
 
				-			}
			
 
				-			if (next) {
			
 
				-				if (has_put_locked) {
			
 
				-					cfs_hash_put_locked(hs, next);
			
 
				-					next = NULL;
			
 
				-				}
			
 
				-				break;
			
 
				-			} else if (rc) {
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		cfs_hash_bd_unlock(hs, &bd, 0);
			
 
				-		if (next && !has_put_locked) {
			
 
				-			cfs_hash_put(hs, next);
			
 
				-			next = NULL;
			
 
				-		}
			
 
				-		if (rc) /* callback wants to break iteration */
			
 
				-			break;
			
 
				-	}
			
 
				-	if (start > 0 && !rc) {
			
 
				-		end = start;
			
 
				-		start = 0;
			
 
				-		goto again;
			
 
				-	}
			
 
				-
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
			
 
				-			 void *data, int start)
			
 
				-{
			
 
				-	if (cfs_hash_with_no_lock(hs) ||
			
 
				-	    cfs_hash_with_rehash_key(hs) ||
			
 
				-	    !cfs_hash_with_no_itemref(hs))
			
 
				-		return -EOPNOTSUPP;
			
 
				-
			
 
				-	if (!hs->hs_ops->hs_get ||
			
 
				-	    (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
			
 
				-		return -EOPNOTSUPP;
			
 
				-
			
 
				-	cfs_hash_for_each_enter(hs);
			
 
				-	cfs_hash_for_each_relax(hs, func, data, start);
			
 
				-	cfs_hash_for_each_exit(hs);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_for_each_nolock);
			
 
				-
			
 
				-/**
			
 
				- * For each hash bucket in the libcfs hash @hs call the passed callback
			
 
				- * @func until all the hash buckets are empty.  The passed callback @func
			
 
				- * or the previously registered callback hs->hs_put must remove the item
			
 
				- * from the hash.  You may either use the cfs_hash_del() or hlist_del()
			
 
				- * functions.  No rwlocks will be held during the callback @func it is
			
 
				- * safe to sleep if needed.  This function will not terminate until the
			
 
				- * hash is empty.  Note it is still possible to concurrently add new
			
 
				- * items in to the hash.  It is the callers responsibility to ensure
			
 
				- * the required locking is in place to prevent concurrent insertions.
			
 
				- */
			
 
				-int
			
 
				-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
			
 
				-			void *data)
			
 
				-{
			
 
				-	unsigned int i = 0;
			
 
				-
			
 
				-	if (cfs_hash_with_no_lock(hs))
			
 
				-		return -EOPNOTSUPP;
			
 
				-
			
 
				-	if (!hs->hs_ops->hs_get ||
			
 
				-	    (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
			
 
				-		return -EOPNOTSUPP;
			
 
				-
			
 
				-	cfs_hash_for_each_enter(hs);
			
 
				-	while (cfs_hash_for_each_relax(hs, func, data, 0)) {
			
 
				-		CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
			
 
				-		       hs->hs_name, i++);
			
 
				-	}
			
 
				-	cfs_hash_for_each_exit(hs);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_for_each_empty);
			
 
				-
			
 
				-void
			
 
				-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
			
 
				-			cfs_hash_for_each_cb_t func, void *data)
			
 
				-{
			
 
				-	struct hlist_head *hhead;
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct cfs_hash_bd bd;
			
 
				-
			
 
				-	cfs_hash_for_each_enter(hs);
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	if (hindex >= CFS_HASH_NHLIST(hs))
			
 
				-		goto out;
			
 
				-
			
 
				-	cfs_hash_bd_index_set(hs, hindex, &bd);
			
 
				-
			
 
				-	cfs_hash_bd_lock(hs, &bd, 0);
			
 
				-	hhead = cfs_hash_bd_hhead(hs, &bd);
			
 
				-	hlist_for_each(hnode, hhead) {
			
 
				-		if (func(hs, &bd, hnode, data))
			
 
				-			break;
			
 
				-	}
			
 
				-	cfs_hash_bd_unlock(hs, &bd, 0);
			
 
				-out:
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-	cfs_hash_for_each_exit(hs);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_hlist_for_each);
			
 
				-
			
 
				-/*
			
 
				- * For each item in the libcfs hash @hs which matches the @key call
			
 
				- * the passed callback @func and pass to it as an argument each hash
			
 
				- * item and the private @data. During the callback the bucket lock
			
 
				- * is held so the callback must never sleep.
			
 
				- */
			
 
				-void
			
 
				-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
			
 
				-		      cfs_hash_for_each_cb_t func, void *data)
			
 
				-{
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct cfs_hash_bd bds[2];
			
 
				-	unsigned int i;
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-
			
 
				-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
			
 
				-
			
 
				-	cfs_hash_for_each_bd(bds, 2, i) {
			
 
				-		struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
			
 
				-
			
 
				-		hlist_for_each(hnode, hlist) {
			
 
				-			cfs_hash_bucket_validate(hs, &bds[i], hnode);
			
 
				-
			
 
				-			if (cfs_hash_keycmp(hs, key, hnode)) {
			
 
				-				if (func(hs, &bds[i], hnode, data))
			
 
				-					break;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	cfs_hash_dual_bd_unlock(hs, bds, 0);
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_for_each_key);
			
 
				-
			
 
				-/**
			
 
				- * Rehash the libcfs hash @hs to the given @bits.  This can be used
			
 
				- * to grow the hash size when excessive chaining is detected, or to
			
 
				- * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
			
 
				- * flag is set in @hs the libcfs hash may be dynamically rehashed
			
 
				- * during addition or removal if the hash's theta value exceeds
			
 
				- * either the hs->hs_min_theta or hs->max_theta values.  By default
			
 
				- * these values are tuned to keep the chained hash depth small, and
			
 
				- * this approach assumes a reasonably uniform hashing function.  The
			
 
				- * theta thresholds for @hs are tunable via cfs_hash_set_theta().
			
 
				- */
			
 
				-void
			
 
				-cfs_hash_rehash_cancel(struct cfs_hash *hs)
			
 
				-{
			
 
				-	LASSERT(cfs_hash_with_rehash(hs));
			
 
				-	cancel_work_sync(&hs->hs_rehash_work);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
			
 
				-
			
 
				-	cfs_hash_lock(hs, 1);
			
 
				-
			
 
				-	rc = cfs_hash_rehash_bits(hs);
			
 
				-	if (rc <= 0) {
			
 
				-		cfs_hash_unlock(hs, 1);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	hs->hs_rehash_bits = rc;
			
 
				-	if (!do_rehash) {
			
 
				-		/* launch and return */
			
 
				-		queue_work(cfs_rehash_wq, &hs->hs_rehash_work);
			
 
				-		cfs_hash_unlock(hs, 1);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* rehash right now */
			
 
				-	cfs_hash_unlock(hs, 1);
			
 
				-
			
 
				-	cfs_hash_rehash_worker(&hs->hs_rehash_work);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
			
 
				-{
			
 
				-	struct cfs_hash_bd new;
			
 
				-	struct hlist_head *hhead;
			
 
				-	struct hlist_node *hnode;
			
 
				-	struct hlist_node *pos;
			
 
				-	void *key;
			
 
				-	int c = 0;
			
 
				-
			
 
				-	/* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
			
 
				-	cfs_hash_bd_for_each_hlist(hs, old, hhead) {
			
 
				-		hlist_for_each_safe(hnode, pos, hhead) {
			
 
				-			key = cfs_hash_key(hs, hnode);
			
 
				-			LASSERT(key);
			
 
				-			/* Validate hnode is in the correct bucket. */
			
 
				-			cfs_hash_bucket_validate(hs, old, hnode);
			
 
				-			/*
			
 
				-			 * Delete from old hash bucket; move to new bucket.
			
 
				-			 * ops->hs_key must be defined.
			
 
				-			 */
			
 
				-			cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
			
 
				-					     hs->hs_rehash_bits, key, &new);
			
 
				-			cfs_hash_bd_move_locked(hs, old, &new, hnode);
			
 
				-			c++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return c;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-cfs_hash_rehash_worker(struct work_struct *work)
			
 
				-{
			
 
				-	struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_rehash_work);
			
 
				-	struct cfs_hash_bucket **bkts;
			
 
				-	struct cfs_hash_bd bd;
			
 
				-	unsigned int old_size;
			
 
				-	unsigned int new_size;
			
 
				-	int bsize;
			
 
				-	int count = 0;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(hs && cfs_hash_with_rehash(hs));
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	LASSERT(cfs_hash_is_rehashing(hs));
			
 
				-
			
 
				-	old_size = CFS_HASH_NBKT(hs);
			
 
				-	new_size = CFS_HASH_RH_NBKT(hs);
			
 
				-
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * don't need hs::hs_rwlock for hs::hs_buckets,
			
 
				-	 * because nobody can change bkt-table except me.
			
 
				-	 */
			
 
				-	bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
			
 
				-					old_size, new_size);
			
 
				-	cfs_hash_lock(hs, 1);
			
 
				-	if (!bkts) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (bkts == hs->hs_buckets) {
			
 
				-		bkts = NULL; /* do nothing */
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = __cfs_hash_theta(hs);
			
 
				-	if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
			
 
				-		/* free the new allocated bkt-table */
			
 
				-		old_size = new_size;
			
 
				-		new_size = CFS_HASH_NBKT(hs);
			
 
				-		rc = -EALREADY;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!hs->hs_rehash_buckets);
			
 
				-	hs->hs_rehash_buckets = bkts;
			
 
				-
			
 
				-	rc = 0;
			
 
				-	cfs_hash_for_each_bucket(hs, &bd, i) {
			
 
				-		if (cfs_hash_is_exiting(hs)) {
			
 
				-			rc = -ESRCH;
			
 
				-			/* someone wants to destroy the hash, abort now */
			
 
				-			if (old_size < new_size) /* OK to free old bkt-table */
			
 
				-				break;
			
 
				-			/* it's shrinking, need free new bkt-table */
			
 
				-			hs->hs_rehash_buckets = NULL;
			
 
				-			old_size = new_size;
			
 
				-			new_size = CFS_HASH_NBKT(hs);
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		count += cfs_hash_rehash_bd(hs, &bd);
			
 
				-		if (count < CFS_HASH_LOOP_HOG ||
			
 
				-		    cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		count = 0;
			
 
				-		cfs_hash_unlock(hs, 1);
			
 
				-		cond_resched();
			
 
				-		cfs_hash_lock(hs, 1);
			
 
				-	}
			
 
				-
			
 
				-	hs->hs_rehash_count++;
			
 
				-
			
 
				-	bkts = hs->hs_buckets;
			
 
				-	hs->hs_buckets = hs->hs_rehash_buckets;
			
 
				-	hs->hs_rehash_buckets = NULL;
			
 
				-
			
 
				-	hs->hs_cur_bits = hs->hs_rehash_bits;
			
 
				-out:
			
 
				-	hs->hs_rehash_bits = 0;
			
 
				-	bsize = cfs_hash_bkt_size(hs);
			
 
				-	cfs_hash_unlock(hs, 1);
			
 
				-	/* can't refer to @hs anymore because it could be destroyed */
			
 
				-	if (bkts)
			
 
				-		cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
			
 
				-	if (rc)
			
 
				-		CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
			
 
				- * @old_key must be provided to locate the objects previous location
			
 
				- * in the hash, and the @new_key will be used to reinsert the object.
			
 
				- * Use this function instead of a cfs_hash_add() + cfs_hash_del()
			
 
				- * combo when it is critical that there is no window in time where the
			
 
				- * object is missing from the hash.  When an object is being rehashed
			
 
				- * the registered cfs_hash_get() and cfs_hash_put() functions will
			
 
				- * not be called.
			
 
				- */
			
 
				-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
			
 
				-			 void *new_key, struct hlist_node *hnode)
			
 
				-{
			
 
				-	struct cfs_hash_bd bds[3];
			
 
				-	struct cfs_hash_bd old_bds[2];
			
 
				-	struct cfs_hash_bd new_bd;
			
 
				-
			
 
				-	LASSERT(!hlist_unhashed(hnode));
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-
			
 
				-	cfs_hash_dual_bd_get(hs, old_key, old_bds);
			
 
				-	cfs_hash_bd_get(hs, new_key, &new_bd);
			
 
				-
			
 
				-	bds[0] = old_bds[0];
			
 
				-	bds[1] = old_bds[1];
			
 
				-	bds[2] = new_bd;
			
 
				-
			
 
				-	/* NB: bds[0] and bds[1] are ordered already */
			
 
				-	cfs_hash_bd_order(&bds[1], &bds[2]);
			
 
				-	cfs_hash_bd_order(&bds[0], &bds[1]);
			
 
				-
			
 
				-	cfs_hash_multi_bd_lock(hs, bds, 3, 1);
			
 
				-	if (likely(!old_bds[1].bd_bucket)) {
			
 
				-		cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
			
 
				-	} else {
			
 
				-		cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
			
 
				-		cfs_hash_bd_add_locked(hs, &new_bd, hnode);
			
 
				-	}
			
 
				-	/* overwrite key inside locks, otherwise may screw up with
			
 
				-	 * other operations, i.e: rehash
			
 
				-	 */
			
 
				-	cfs_hash_keycpy(hs, hnode, new_key);
			
 
				-
			
 
				-	cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_rehash_key);
			
 
				-
			
 
				-void cfs_hash_debug_header(struct seq_file *m)
			
 
				-{
			
 
				-	seq_printf(m, "%-*s   cur   min   max theta t-min t-max flags rehash   count  maxdep maxdepb distribution\n",
			
 
				-		   CFS_HASH_BIGNAME_LEN, "name");
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_debug_header);
			
 
				-
			
 
				-static struct cfs_hash_bucket **
			
 
				-cfs_hash_full_bkts(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* NB: caller should hold hs->hs_rwlock if REHASH is set */
			
 
				-	if (!hs->hs_rehash_buckets)
			
 
				-		return hs->hs_buckets;
			
 
				-
			
 
				-	LASSERT(hs->hs_rehash_bits);
			
 
				-	return hs->hs_rehash_bits > hs->hs_cur_bits ?
			
 
				-	       hs->hs_rehash_buckets : hs->hs_buckets;
			
 
				-}
			
 
				-
			
 
				-static unsigned int
			
 
				-cfs_hash_full_nbkt(struct cfs_hash *hs)
			
 
				-{
			
 
				-	/* NB: caller should hold hs->hs_rwlock if REHASH is set */
			
 
				-	if (!hs->hs_rehash_buckets)
			
 
				-		return CFS_HASH_NBKT(hs);
			
 
				-
			
 
				-	LASSERT(hs->hs_rehash_bits);
			
 
				-	return hs->hs_rehash_bits > hs->hs_cur_bits ?
			
 
				-	       CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
			
 
				-}
			
 
				-
			
 
				-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
			
 
				-{
			
 
				-	int dist[8] = { 0, };
			
 
				-	int maxdep = -1;
			
 
				-	int maxdepb = -1;
			
 
				-	int total = 0;
			
 
				-	int theta;
			
 
				-	int i;
			
 
				-
			
 
				-	cfs_hash_lock(hs, 0);
			
 
				-	theta = __cfs_hash_theta(hs);
			
 
				-
			
 
				-	seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d  0x%02x %6d ",
			
 
				-		   CFS_HASH_BIGNAME_LEN, hs->hs_name,
			
 
				-		   1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
			
 
				-		   1 << hs->hs_max_bits,
			
 
				-		   __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
			
 
				-		   __cfs_hash_theta_int(hs->hs_min_theta),
			
 
				-		   __cfs_hash_theta_frac(hs->hs_min_theta),
			
 
				-		   __cfs_hash_theta_int(hs->hs_max_theta),
			
 
				-		   __cfs_hash_theta_frac(hs->hs_max_theta),
			
 
				-		   hs->hs_flags, hs->hs_rehash_count);
			
 
				-
			
 
				-	/*
			
 
				-	 * The distribution is a summary of the chained hash depth in
			
 
				-	 * each of the libcfs hash buckets.  Each buckets hsb_count is
			
 
				-	 * divided by the hash theta value and used to generate a
			
 
				-	 * histogram of the hash distribution.  A uniform hash will
			
 
				-	 * result in all hash buckets being close to the average thus
			
 
				-	 * only the first few entries in the histogram will be non-zero.
			
 
				-	 * If you hash function results in a non-uniform hash the will
			
 
				-	 * be observable by outlier bucks in the distribution histogram.
			
 
				-	 *
			
 
				-	 * Uniform hash distribution:		128/128/0/0/0/0/0/0
			
 
				-	 * Non-Uniform hash distribution:	128/125/0/0/0/0/2/1
			
 
				-	 */
			
 
				-	for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
			
 
				-		struct cfs_hash_bd bd;
			
 
				-
			
 
				-		bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
			
 
				-		cfs_hash_bd_lock(hs, &bd, 0);
			
 
				-		if (maxdep < bd.bd_bucket->hsb_depmax) {
			
 
				-			maxdep  = bd.bd_bucket->hsb_depmax;
			
 
				-			maxdepb = ffz(~maxdep);
			
 
				-		}
			
 
				-		total += bd.bd_bucket->hsb_count;
			
 
				-		dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
			
 
				-		cfs_hash_bd_unlock(hs, &bd, 0);
			
 
				-	}
			
 
				-
			
 
				-	seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
			
 
				-	for (i = 0; i < 8; i++)
			
 
				-		seq_printf(m, "%d%c",  dist[i], (i == 7) ? '\n' : '/');
			
 
				-
			
 
				-	cfs_hash_unlock(hs, 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_hash_debug_str);
			
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -1,1086 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
			
 
				- *
			
 
				- * Author: liang@whamcloud.com
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/cpu.h>
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/cache.h>
			
 
				-
			
 
				-#include <linux/libcfs/libcfs_cpu.h>
			
 
				-#include <linux/libcfs/libcfs_string.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-
			
 
				-/** Global CPU partition table */
			
 
				-struct cfs_cpt_table   *cfs_cpt_tab __read_mostly;
			
 
				-EXPORT_SYMBOL(cfs_cpt_tab);
			
 
				-
			
 
				-/**
			
 
				- * modparam for setting number of partitions
			
 
				- *
			
 
				- *  0 : estimate best value based on cores or NUMA nodes
			
 
				- *  1 : disable multiple partitions
			
 
				- * >1 : specify number of partitions
			
 
				- */
			
 
				-static int	cpu_npartitions;
			
 
				-module_param(cpu_npartitions, int, 0444);
			
 
				-MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
			
 
				-
			
 
				-/**
			
 
				- * modparam for setting CPU partitions patterns:
			
 
				- *
			
 
				- * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
			
 
				- *      number in bracket is processor ID (core or HT)
			
 
				- *
			
 
				- * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
			
 
				- *       are NUMA node ID, number before bracket is CPU partition ID.
			
 
				- *
			
 
				- * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
			
 
				- *
			
 
				- * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
			
 
				- */
			
 
				-static char	*cpu_pattern = "N";
			
 
				-module_param(cpu_pattern, charp, 0444);
			
 
				-MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
			
 
				-
			
 
				-static struct cfs_cpt_data {
			
 
				-	/* serialize hotplug etc */
			
 
				-	spinlock_t		cpt_lock;
			
 
				-	/* reserved for hotplug */
			
 
				-	unsigned long		cpt_version;
			
 
				-	/* mutex to protect cpt_cpumask */
			
 
				-	struct mutex		cpt_mutex;
			
 
				-	/* scratch buffer for set/unset_node */
			
 
				-	cpumask_var_t		cpt_cpumask;
			
 
				-} cpt_data;
			
 
				-
			
 
				-#define CFS_CPU_VERSION_MAGIC	   0xbabecafe
			
 
				-
			
 
				-struct cfs_cpt_table *
			
 
				-cfs_cpt_table_alloc(unsigned int ncpt)
			
 
				-{
			
 
				-	struct cfs_cpt_table *cptab;
			
 
				-	int i;
			
 
				-
			
 
				-	cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
			
 
				-	if (!cptab)
			
 
				-		return NULL;
			
 
				-
			
 
				-	cptab->ctb_nparts = ncpt;
			
 
				-
			
 
				-	cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
			
 
				-				      GFP_NOFS);
			
 
				-	if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
			
 
				-	    !cptab->ctb_nodemask)
			
 
				-		goto failed;
			
 
				-
			
 
				-	cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
			
 
				-					    sizeof(cptab->ctb_cpu2cpt[0]),
			
 
				-					    GFP_KERNEL);
			
 
				-	if (!cptab->ctb_cpu2cpt)
			
 
				-		goto failed;
			
 
				-
			
 
				-	memset(cptab->ctb_cpu2cpt, -1,
			
 
				-	       num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
			
 
				-
			
 
				-	cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
			
 
				-					  GFP_KERNEL);
			
 
				-	if (!cptab->ctb_parts)
			
 
				-		goto failed;
			
 
				-
			
 
				-	for (i = 0; i < ncpt; i++) {
			
 
				-		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
			
 
				-
			
 
				-		part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
			
 
				-					     GFP_NOFS);
			
 
				-		if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
			
 
				-		    !part->cpt_nodemask)
			
 
				-			goto failed;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&cpt_data.cpt_lock);
			
 
				-	/* Reserved for hotplug */
			
 
				-	cptab->ctb_version = cpt_data.cpt_version;
			
 
				-	spin_unlock(&cpt_data.cpt_lock);
			
 
				-
			
 
				-	return cptab;
			
 
				-
			
 
				- failed:
			
 
				-	cfs_cpt_table_free(cptab);
			
 
				-	return NULL;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_table_alloc);
			
 
				-
			
 
				-void
			
 
				-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	kvfree(cptab->ctb_cpu2cpt);
			
 
				-
			
 
				-	for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
			
 
				-		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
			
 
				-
			
 
				-		kfree(part->cpt_nodemask);
			
 
				-		free_cpumask_var(part->cpt_cpumask);
			
 
				-	}
			
 
				-
			
 
				-	kvfree(cptab->ctb_parts);
			
 
				-
			
 
				-	kfree(cptab->ctb_nodemask);
			
 
				-	free_cpumask_var(cptab->ctb_cpumask);
			
 
				-
			
 
				-	kfree(cptab);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_table_free);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
			
 
				-{
			
 
				-	char *tmp = buf;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	for (i = 0; i < cptab->ctb_nparts; i++) {
			
 
				-		if (len > 0) {
			
 
				-			rc = snprintf(tmp, len, "%d\t: ", i);
			
 
				-			len -= rc;
			
 
				-		}
			
 
				-
			
 
				-		if (len <= 0) {
			
 
				-			rc = -EFBIG;
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		tmp += rc;
			
 
				-		for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
			
 
				-			rc = snprintf(tmp, len, "%d ", j);
			
 
				-			len -= rc;
			
 
				-			if (len <= 0) {
			
 
				-				rc = -EFBIG;
			
 
				-				goto out;
			
 
				-			}
			
 
				-			tmp += rc;
			
 
				-		}
			
 
				-
			
 
				-		*tmp = '\n';
			
 
				-		tmp++;
			
 
				-		len--;
			
 
				-	}
			
 
				-
			
 
				- out:
			
 
				-	if (rc < 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	return tmp - buf;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_table_print);
			
 
				-
			
 
				-static void
			
 
				-cfs_node_to_cpumask(int node, cpumask_t *mask)
			
 
				-{
			
 
				-	const cpumask_t *tmp = cpumask_of_node(node);
			
 
				-
			
 
				-	if (tmp)
			
 
				-		cpumask_copy(mask, tmp);
			
 
				-	else
			
 
				-		cpumask_clear(mask);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_number(struct cfs_cpt_table *cptab)
			
 
				-{
			
 
				-	return cptab->ctb_nparts;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_number);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
			
 
				-
			
 
				-	return cpt == CFS_CPT_ANY ?
			
 
				-	       cpumask_weight(cptab->ctb_cpumask) :
			
 
				-	       cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_weight);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
			
 
				-
			
 
				-	return cpt == CFS_CPT_ANY ?
			
 
				-	       cpumask_any_and(cptab->ctb_cpumask,
			
 
				-			       cpu_online_mask) < nr_cpu_ids :
			
 
				-	       cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
			
 
				-			       cpu_online_mask) < nr_cpu_ids;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_online);
			
 
				-
			
 
				-cpumask_var_t *
			
 
				-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
			
 
				-
			
 
				-	return cpt == CFS_CPT_ANY ?
			
 
				-	       &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_cpumask);
			
 
				-
			
 
				-nodemask_t *
			
 
				-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
			
 
				-
			
 
				-	return cpt == CFS_CPT_ANY ?
			
 
				-	       cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_nodemask);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
			
 
				-{
			
 
				-	int node;
			
 
				-
			
 
				-	LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
			
 
				-
			
 
				-	if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
			
 
				-		CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (cptab->ctb_cpu2cpt[cpu] != -1) {
			
 
				-		CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
			
 
				-		       cpu, cptab->ctb_cpu2cpt[cpu]);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	cptab->ctb_cpu2cpt[cpu] = cpt;
			
 
				-
			
 
				-	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
			
 
				-	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
			
 
				-
			
 
				-	cpumask_set_cpu(cpu, cptab->ctb_cpumask);
			
 
				-	cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
			
 
				-
			
 
				-	node = cpu_to_node(cpu);
			
 
				-
			
 
				-	/* first CPU of @node in this CPT table */
			
 
				-	if (!node_isset(node, *cptab->ctb_nodemask))
			
 
				-		node_set(node, *cptab->ctb_nodemask);
			
 
				-
			
 
				-	/* first CPU of @node in this partition */
			
 
				-	if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
			
 
				-		node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_set_cpu);
			
 
				-
			
 
				-void
			
 
				-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
			
 
				-{
			
 
				-	int node;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
			
 
				-
			
 
				-	if (cpu < 0 || cpu >= nr_cpu_ids) {
			
 
				-		CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (cpt == CFS_CPT_ANY) {
			
 
				-		/* caller doesn't know the partition ID */
			
 
				-		cpt = cptab->ctb_cpu2cpt[cpu];
			
 
				-		if (cpt < 0) { /* not set in this CPT-table */
			
 
				-			CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
			
 
				-			       cpt, cptab);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-	} else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
			
 
				-		CDEBUG(D_INFO,
			
 
				-		       "CPU %d is not in cpu-partition %d\n", cpu, cpt);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
			
 
				-	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
			
 
				-
			
 
				-	cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
			
 
				-	cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
			
 
				-	cptab->ctb_cpu2cpt[cpu] = -1;
			
 
				-
			
 
				-	node = cpu_to_node(cpu);
			
 
				-
			
 
				-	LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
			
 
				-	LASSERT(node_isset(node, *cptab->ctb_nodemask));
			
 
				-
			
 
				-	for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
			
 
				-		/* this CPT has other CPU belonging to this node? */
			
 
				-		if (cpu_to_node(i) == node)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (i >= nr_cpu_ids)
			
 
				-		node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
			
 
				-
			
 
				-	for_each_cpu(i, cptab->ctb_cpumask) {
			
 
				-		/* this CPT-table has other CPU belonging to this node? */
			
 
				-		if (cpu_to_node(i) == node)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (i >= nr_cpu_ids)
			
 
				-		node_clear(node, *cptab->ctb_nodemask);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (!cpumask_weight(mask) ||
			
 
				-	    cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
			
 
				-		CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
			
 
				-		       cpt);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	for_each_cpu(i, mask) {
			
 
				-		if (!cfs_cpt_set_cpu(cptab, cpt, i))
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
			
 
				-
			
 
				-void
			
 
				-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for_each_cpu(i, mask)
			
 
				-		cfs_cpt_unset_cpu(cptab, cpt, i);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	if (node < 0 || node >= MAX_NUMNODES) {
			
 
				-		CDEBUG(D_INFO,
			
 
				-		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&cpt_data.cpt_mutex);
			
 
				-
			
 
				-	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
			
 
				-
			
 
				-	rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
			
 
				-
			
 
				-	mutex_unlock(&cpt_data.cpt_mutex);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_set_node);
			
 
				-
			
 
				-void
			
 
				-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
			
 
				-{
			
 
				-	if (node < 0 || node >= MAX_NUMNODES) {
			
 
				-		CDEBUG(D_INFO,
			
 
				-		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&cpt_data.cpt_mutex);
			
 
				-
			
 
				-	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
			
 
				-
			
 
				-	cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
			
 
				-
			
 
				-	mutex_unlock(&cpt_data.cpt_mutex);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_unset_node);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for_each_node_mask(i, *mask) {
			
 
				-		if (!cfs_cpt_set_node(cptab, cpt, i))
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
			
 
				-
			
 
				-void
			
 
				-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for_each_node_mask(i, *mask)
			
 
				-		cfs_cpt_unset_node(cptab, cpt, i);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
			
 
				-
			
 
				-void
			
 
				-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	int last;
			
 
				-	int i;
			
 
				-
			
 
				-	if (cpt == CFS_CPT_ANY) {
			
 
				-		last = cptab->ctb_nparts - 1;
			
 
				-		cpt = 0;
			
 
				-	} else {
			
 
				-		last = cpt;
			
 
				-	}
			
 
				-
			
 
				-	for (; cpt <= last; cpt++) {
			
 
				-		for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
			
 
				-			cfs_cpt_unset_cpu(cptab, cpt, i);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_clear);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	nodemask_t *mask;
			
 
				-	int weight;
			
 
				-	int rotor;
			
 
				-	int node;
			
 
				-
			
 
				-	/* convert CPU partition ID to HW node id */
			
 
				-
			
 
				-	if (cpt < 0 || cpt >= cptab->ctb_nparts) {
			
 
				-		mask = cptab->ctb_nodemask;
			
 
				-		rotor = cptab->ctb_spread_rotor++;
			
 
				-	} else {
			
 
				-		mask = cptab->ctb_parts[cpt].cpt_nodemask;
			
 
				-		rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
			
 
				-	}
			
 
				-
			
 
				-	weight = nodes_weight(*mask);
			
 
				-	LASSERT(weight > 0);
			
 
				-
			
 
				-	rotor %= weight;
			
 
				-
			
 
				-	for_each_node_mask(node, *mask) {
			
 
				-		if (!rotor--)
			
 
				-			return node;
			
 
				-	}
			
 
				-
			
 
				-	LBUG();
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_spread_node);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	int cpt;
			
 
				-
			
 
				-	preempt_disable();
			
 
				-	cpu = smp_processor_id();
			
 
				-	cpt = cptab->ctb_cpu2cpt[cpu];
			
 
				-
			
 
				-	if (cpt < 0 && remap) {
			
 
				-		/* don't return negative value for safety of upper layer,
			
 
				-		 * instead we shadow the unknown cpu to a valid partition ID
			
 
				-		 */
			
 
				-		cpt = cpu % cptab->ctb_nparts;
			
 
				-	}
			
 
				-	preempt_enable();
			
 
				-	return cpt;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_current);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
			
 
				-{
			
 
				-	LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
			
 
				-
			
 
				-	return cptab->ctb_cpu2cpt[cpu];
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_of_cpu);
			
 
				-
			
 
				-int
			
 
				-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
			
 
				-{
			
 
				-	cpumask_var_t *cpumask;
			
 
				-	nodemask_t *nodemask;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
			
 
				-
			
 
				-	if (cpt == CFS_CPT_ANY) {
			
 
				-		cpumask = &cptab->ctb_cpumask;
			
 
				-		nodemask = cptab->ctb_nodemask;
			
 
				-	} else {
			
 
				-		cpumask = &cptab->ctb_parts[cpt].cpt_cpumask;
			
 
				-		nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
			
 
				-	}
			
 
				-
			
 
				-	if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
			
 
				-		CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
			
 
				-		       cpt);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	for_each_online_cpu(i) {
			
 
				-		if (cpumask_test_cpu(i, *cpumask))
			
 
				-			continue;
			
 
				-
			
 
				-		rc = set_cpus_allowed_ptr(current, *cpumask);
			
 
				-		set_mems_allowed(*nodemask);
			
 
				-		if (!rc)
			
 
				-			schedule(); /* switch to allowed CPU */
			
 
				-
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* don't need to set affinity because all online CPUs are covered */
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_cpt_bind);
			
 
				-
			
 
				-/**
			
 
				- * Choose max to \a number CPUs from \a node and set them in \a cpt.
			
 
				- * We always prefer to choose CPU in the same core/socket.
			
 
				- */
			
 
				-static int
			
 
				-cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
			
 
				-		     cpumask_t *node, int number)
			
 
				-{
			
 
				-	cpumask_var_t socket;
			
 
				-	cpumask_var_t core;
			
 
				-	int rc = 0;
			
 
				-	int cpu;
			
 
				-
			
 
				-	LASSERT(number > 0);
			
 
				-
			
 
				-	if (number >= cpumask_weight(node)) {
			
 
				-		while (!cpumask_empty(node)) {
			
 
				-			cpu = cpumask_first(node);
			
 
				-
			
 
				-			rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
			
 
				-			if (!rc)
			
 
				-				return -EINVAL;
			
 
				-			cpumask_clear_cpu(cpu, node);
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate scratch buffers
			
 
				-	 * As we cannot initialize a cpumask_var_t, we need
			
 
				-	 * to alloc both before we can risk trying to free either
			
 
				-	 */
			
 
				-	if (!zalloc_cpumask_var(&socket, GFP_NOFS))
			
 
				-		rc = -ENOMEM;
			
 
				-	if (!zalloc_cpumask_var(&core, GFP_NOFS))
			
 
				-		rc = -ENOMEM;
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	while (!cpumask_empty(node)) {
			
 
				-		cpu = cpumask_first(node);
			
 
				-
			
 
				-		/* get cpumask for cores in the same socket */
			
 
				-		cpumask_copy(socket, topology_core_cpumask(cpu));
			
 
				-		cpumask_and(socket, socket, node);
			
 
				-
			
 
				-		LASSERT(!cpumask_empty(socket));
			
 
				-
			
 
				-		while (!cpumask_empty(socket)) {
			
 
				-			int i;
			
 
				-
			
 
				-			/* get cpumask for hts in the same core */
			
 
				-			cpumask_copy(core, topology_sibling_cpumask(cpu));
			
 
				-			cpumask_and(core, core, node);
			
 
				-
			
 
				-			LASSERT(!cpumask_empty(core));
			
 
				-
			
 
				-			for_each_cpu(i, core) {
			
 
				-				cpumask_clear_cpu(i, socket);
			
 
				-				cpumask_clear_cpu(i, node);
			
 
				-
			
 
				-				rc = cfs_cpt_set_cpu(cptab, cpt, i);
			
 
				-				if (!rc) {
			
 
				-					rc = -EINVAL;
			
 
				-					goto out;
			
 
				-				}
			
 
				-
			
 
				-				if (!--number)
			
 
				-					goto out;
			
 
				-			}
			
 
				-			cpu = cpumask_first(socket);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	free_cpumask_var(socket);
			
 
				-	free_cpumask_var(core);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-#define CPT_WEIGHT_MIN  4u
			
 
				-
			
 
				-static unsigned int
			
 
				-cfs_cpt_num_estimate(void)
			
 
				-{
			
 
				-	unsigned int nnode = num_online_nodes();
			
 
				-	unsigned int ncpu = num_online_cpus();
			
 
				-	unsigned int ncpt;
			
 
				-
			
 
				-	if (ncpu <= CPT_WEIGHT_MIN) {
			
 
				-		ncpt = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/* generate reasonable number of CPU partitions based on total number
			
 
				-	 * of CPUs, Preferred N should be power2 and match this condition:
			
 
				-	 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
			
 
				-	 */
			
 
				-	for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
			
 
				-		;
			
 
				-
			
 
				-	if (ncpt <= nnode) { /* fat numa system */
			
 
				-		while (nnode > ncpt)
			
 
				-			nnode >>= 1;
			
 
				-
			
 
				-	} else { /* ncpt > nnode */
			
 
				-		while ((nnode << 1) <= ncpt)
			
 
				-			nnode <<= 1;
			
 
				-	}
			
 
				-
			
 
				-	ncpt = nnode;
			
 
				-
			
 
				-out:
			
 
				-#if (BITS_PER_LONG == 32)
			
 
				-	/* config many CPU partitions on 32-bit system could consume
			
 
				-	 * too much memory
			
 
				-	 */
			
 
				-	ncpt = min(2U, ncpt);
			
 
				-#endif
			
 
				-	while (ncpu % ncpt)
			
 
				-		ncpt--; /* worst case is 1 */
			
 
				-
			
 
				-	return ncpt;
			
 
				-}
			
 
				-
			
 
				-static struct cfs_cpt_table *
			
 
				-cfs_cpt_table_create(int ncpt)
			
 
				-{
			
 
				-	struct cfs_cpt_table *cptab = NULL;
			
 
				-	cpumask_var_t mask;
			
 
				-	int cpt = 0;
			
 
				-	int num;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	rc = cfs_cpt_num_estimate();
			
 
				-	if (ncpt <= 0)
			
 
				-		ncpt = rc;
			
 
				-
			
 
				-	if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
			
 
				-		CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
			
 
				-		      ncpt, rc);
			
 
				-	}
			
 
				-
			
 
				-	if (num_online_cpus() % ncpt) {
			
 
				-		CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
			
 
				-		       (int)num_online_cpus(), ncpt);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	cptab = cfs_cpt_table_alloc(ncpt);
			
 
				-	if (!cptab) {
			
 
				-		CERROR("Failed to allocate CPU map(%d)\n", ncpt);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	num = num_online_cpus() / ncpt;
			
 
				-	if (!num) {
			
 
				-		CERROR("CPU changed while setting CPU partition\n");
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (!zalloc_cpumask_var(&mask, GFP_NOFS)) {
			
 
				-		CERROR("Failed to allocate scratch cpumask\n");
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	for_each_online_node(i) {
			
 
				-		cfs_node_to_cpumask(i, mask);
			
 
				-
			
 
				-		while (!cpumask_empty(mask)) {
			
 
				-			struct cfs_cpu_partition *part;
			
 
				-			int n;
			
 
				-
			
 
				-			/*
			
 
				-			 * Each emulated NUMA node has all allowed CPUs in
			
 
				-			 * the mask.
			
 
				-			 * End loop when all partitions have assigned CPUs.
			
 
				-			 */
			
 
				-			if (cpt == ncpt)
			
 
				-				break;
			
 
				-
			
 
				-			part = &cptab->ctb_parts[cpt];
			
 
				-
			
 
				-			n = num - cpumask_weight(part->cpt_cpumask);
			
 
				-			LASSERT(n > 0);
			
 
				-
			
 
				-			rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
			
 
				-			if (rc < 0)
			
 
				-				goto failed_mask;
			
 
				-
			
 
				-			LASSERT(num >= cpumask_weight(part->cpt_cpumask));
			
 
				-			if (num == cpumask_weight(part->cpt_cpumask))
			
 
				-				cpt++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (cpt != ncpt ||
			
 
				-	    num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
			
 
				-		CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
			
 
				-		       cptab->ctb_nparts, num, cpt,
			
 
				-		       cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
			
 
				-		goto failed_mask;
			
 
				-	}
			
 
				-
			
 
				-	free_cpumask_var(mask);
			
 
				-
			
 
				-	return cptab;
			
 
				-
			
 
				- failed_mask:
			
 
				-	free_cpumask_var(mask);
			
 
				- failed:
			
 
				-	CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
			
 
				-	       ncpt, num_online_nodes(), num_online_cpus());
			
 
				-
			
 
				-	if (cptab)
			
 
				-		cfs_cpt_table_free(cptab);
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct cfs_cpt_table *
			
 
				-cfs_cpt_table_create_pattern(char *pattern)
			
 
				-{
			
 
				-	struct cfs_cpt_table *cptab;
			
 
				-	char *str;
			
 
				-	int node = 0;
			
 
				-	int high;
			
 
				-	int ncpt = 0;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-	int c;
			
 
				-	int i;
			
 
				-
			
 
				-	str = strim(pattern);
			
 
				-	if (*str == 'n' || *str == 'N') {
			
 
				-		pattern = str + 1;
			
 
				-		if (*pattern != '\0') {
			
 
				-			node = 1;
			
 
				-		} else { /* shortcut to create CPT from NUMA & CPU topology */
			
 
				-			node = -1;
			
 
				-			ncpt = num_online_nodes();
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (!ncpt) { /* scanning bracket which is mark of partition */
			
 
				-		for (str = pattern;; str++, ncpt++) {
			
 
				-			str = strchr(str, '[');
			
 
				-			if (!str)
			
 
				-				break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (!ncpt ||
			
 
				-	    (node && ncpt > num_online_nodes()) ||
			
 
				-	    (!node && ncpt > num_online_cpus())) {
			
 
				-		CERROR("Invalid pattern %s, or too many partitions %d\n",
			
 
				-		       pattern, ncpt);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	cptab = cfs_cpt_table_alloc(ncpt);
			
 
				-	if (!cptab) {
			
 
				-		CERROR("Failed to allocate cpu partition table\n");
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
			
 
				-		cpt = 0;
			
 
				-
			
 
				-		for_each_online_node(i) {
			
 
				-			if (cpt >= ncpt) {
			
 
				-				CERROR("CPU changed while setting CPU partition table, %d/%d\n",
			
 
				-				       cpt, ncpt);
			
 
				-				goto failed;
			
 
				-			}
			
 
				-
			
 
				-			rc = cfs_cpt_set_node(cptab, cpt++, i);
			
 
				-			if (!rc)
			
 
				-				goto failed;
			
 
				-		}
			
 
				-		return cptab;
			
 
				-	}
			
 
				-
			
 
				-	high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
			
 
				-
			
 
				-	for (str = strim(pattern), c = 0;; c++) {
			
 
				-		struct cfs_range_expr *range;
			
 
				-		struct cfs_expr_list *el;
			
 
				-		char *bracket = strchr(str, '[');
			
 
				-		int n;
			
 
				-
			
 
				-		if (!bracket) {
			
 
				-			if (*str) {
			
 
				-				CERROR("Invalid pattern %s\n", str);
			
 
				-				goto failed;
			
 
				-			}
			
 
				-			if (c != ncpt) {
			
 
				-				CERROR("expect %d partitions but found %d\n",
			
 
				-				       ncpt, c);
			
 
				-				goto failed;
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (sscanf(str, "%d%n", &cpt, &n) < 1) {
			
 
				-			CERROR("Invalid cpu pattern %s\n", str);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		if (cpt < 0 || cpt >= ncpt) {
			
 
				-			CERROR("Invalid partition id %d, total partitions %d\n",
			
 
				-			       cpt, ncpt);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		if (cfs_cpt_weight(cptab, cpt)) {
			
 
				-			CERROR("Partition %d has already been set.\n", cpt);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		str = strim(str + n);
			
 
				-		if (str != bracket) {
			
 
				-			CERROR("Invalid pattern %s\n", str);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		bracket = strchr(str, ']');
			
 
				-		if (!bracket) {
			
 
				-			CERROR("missing right bracket for cpt %d, %s\n",
			
 
				-			       cpt, str);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		if (cfs_expr_list_parse(str, (bracket - str) + 1,
			
 
				-					0, high, &el)) {
			
 
				-			CERROR("Can't parse number range: %s\n", str);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		list_for_each_entry(range, &el->el_exprs, re_link) {
			
 
				-			for (i = range->re_lo; i <= range->re_hi; i++) {
			
 
				-				if ((i - range->re_lo) % range->re_stride)
			
 
				-					continue;
			
 
				-
			
 
				-				rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
			
 
				-					    cfs_cpt_set_cpu(cptab, cpt, i);
			
 
				-				if (!rc) {
			
 
				-					cfs_expr_list_free(el);
			
 
				-					goto failed;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		cfs_expr_list_free(el);
			
 
				-
			
 
				-		if (!cfs_cpt_online(cptab, cpt)) {
			
 
				-			CERROR("No online CPU is found on partition %d\n", cpt);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		str = strim(bracket + 1);
			
 
				-	}
			
 
				-
			
 
				-	return cptab;
			
 
				-
			
 
				- failed:
			
 
				-	cfs_cpt_table_free(cptab);
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-#ifdef CONFIG_HOTPLUG_CPU
			
 
				-static enum cpuhp_state lustre_cpu_online;
			
 
				-
			
 
				-static void cfs_cpu_incr_cpt_version(void)
			
 
				-{
			
 
				-	spin_lock(&cpt_data.cpt_lock);
			
 
				-	cpt_data.cpt_version++;
			
 
				-	spin_unlock(&cpt_data.cpt_lock);
			
 
				-}
			
 
				-
			
 
				-static int cfs_cpu_online(unsigned int cpu)
			
 
				-{
			
 
				-	cfs_cpu_incr_cpt_version();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int cfs_cpu_dead(unsigned int cpu)
			
 
				-{
			
 
				-	bool warn;
			
 
				-
			
 
				-	cfs_cpu_incr_cpt_version();
			
 
				-
			
 
				-	mutex_lock(&cpt_data.cpt_mutex);
			
 
				-	/* if all HTs in a core are offline, it may break affinity */
			
 
				-	cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
			
 
				-	warn = cpumask_any_and(cpt_data.cpt_cpumask,
			
 
				-			       cpu_online_mask) >= nr_cpu_ids;
			
 
				-	mutex_unlock(&cpt_data.cpt_mutex);
			
 
				-	CDEBUG(warn ? D_WARNING : D_INFO,
			
 
				-	       "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
			
 
				-	       cpu);
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-void
			
 
				-cfs_cpu_fini(void)
			
 
				-{
			
 
				-	if (cfs_cpt_tab)
			
 
				-		cfs_cpt_table_free(cfs_cpt_tab);
			
 
				-
			
 
				-#ifdef CONFIG_HOTPLUG_CPU
			
 
				-	if (lustre_cpu_online > 0)
			
 
				-		cpuhp_remove_state_nocalls(lustre_cpu_online);
			
 
				-	cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
			
 
				-#endif
			
 
				-	free_cpumask_var(cpt_data.cpt_cpumask);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-cfs_cpu_init(void)
			
 
				-{
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	LASSERT(!cfs_cpt_tab);
			
 
				-
			
 
				-	memset(&cpt_data, 0, sizeof(cpt_data));
			
 
				-
			
 
				-	if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) {
			
 
				-		CERROR("Failed to allocate scratch buffer\n");
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_init(&cpt_data.cpt_lock);
			
 
				-	mutex_init(&cpt_data.cpt_mutex);
			
 
				-
			
 
				-#ifdef CONFIG_HOTPLUG_CPU
			
 
				-	ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
			
 
				-					"staging/lustre/cfe:dead", NULL,
			
 
				-					cfs_cpu_dead);
			
 
				-	if (ret < 0)
			
 
				-		goto failed;
			
 
				-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
			
 
				-					"staging/lustre/cfe:online",
			
 
				-					cfs_cpu_online, NULL);
			
 
				-	if (ret < 0)
			
 
				-		goto failed;
			
 
				-	lustre_cpu_online = ret;
			
 
				-#endif
			
 
				-	ret = -EINVAL;
			
 
				-
			
 
				-	if (*cpu_pattern) {
			
 
				-		char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
			
 
				-
			
 
				-		if (!cpu_pattern_dup) {
			
 
				-			CERROR("Failed to duplicate cpu_pattern\n");
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern_dup);
			
 
				-		kfree(cpu_pattern_dup);
			
 
				-		if (!cfs_cpt_tab) {
			
 
				-			CERROR("Failed to create cptab from pattern %s\n",
			
 
				-			       cpu_pattern);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-	} else {
			
 
				-		cfs_cpt_tab = cfs_cpt_table_create(cpu_npartitions);
			
 
				-		if (!cfs_cpt_tab) {
			
 
				-			CERROR("Failed to create ptable with npartitions %d\n",
			
 
				-			       cpu_npartitions);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&cpt_data.cpt_lock);
			
 
				-	if (cfs_cpt_tab->ctb_version != cpt_data.cpt_version) {
			
 
				-		spin_unlock(&cpt_data.cpt_lock);
			
 
				-		CERROR("CPU hotplug/unplug during setup\n");
			
 
				-		goto failed;
			
 
				-	}
			
 
				-	spin_unlock(&cpt_data.cpt_lock);
			
 
				-
			
 
				-	LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
			
 
				-		 num_online_nodes(), num_online_cpus(),
			
 
				-		 cfs_cpt_number(cfs_cpt_tab));
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	cfs_cpu_fini();
			
 
				-	return ret;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
@@ -1,155 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Copyright (c) 2012, 2015 Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * Author: liang@whamcloud.com
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-#include <linux/libcfs/libcfs_cpu.h>
			
 
				-
			
 
				-/** destroy cpu-partition lock, see libcfs_private.h for more detail */
			
 
				-void
			
 
				-cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
			
 
				-{
			
 
				-	LASSERT(pcl->pcl_locks);
			
 
				-	LASSERT(!pcl->pcl_locked);
			
 
				-
			
 
				-	cfs_percpt_free(pcl->pcl_locks);
			
 
				-	kfree(pcl);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_lock_free);
			
 
				-
			
 
				-/**
			
 
				- * create cpu-partition lock, see libcfs_private.h for more detail.
			
 
				- *
			
 
				- * cpu-partition lock is designed for large-scale SMP system, so we need to
			
 
				- * reduce cacheline conflict as possible as we can, that's the
			
 
				- * reason we always allocate cacheline-aligned memory block.
			
 
				- */
			
 
				-struct cfs_percpt_lock *
			
 
				-cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
			
 
				-		       struct lock_class_key *keys)
			
 
				-{
			
 
				-	struct cfs_percpt_lock *pcl;
			
 
				-	spinlock_t *lock;
			
 
				-	int i;
			
 
				-
			
 
				-	/* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
			
 
				-	pcl = kzalloc(sizeof(*pcl), GFP_NOFS);
			
 
				-	if (!pcl)
			
 
				-		return NULL;
			
 
				-
			
 
				-	pcl->pcl_cptab = cptab;
			
 
				-	pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
			
 
				-	if (!pcl->pcl_locks) {
			
 
				-		kfree(pcl);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (!keys)
			
 
				-		CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n");
			
 
				-
			
 
				-	cfs_percpt_for_each(lock, i, pcl->pcl_locks) {
			
 
				-		spin_lock_init(lock);
			
 
				-		if (keys)
			
 
				-			lockdep_set_class(lock, &keys[i]);
			
 
				-	}
			
 
				-
			
 
				-	return pcl;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_lock_create);
			
 
				-
			
 
				-/**
			
 
				- * lock a CPU partition
			
 
				- *
			
 
				- * \a index != CFS_PERCPT_LOCK_EX
			
 
				- *     hold private lock indexed by \a index
			
 
				- *
			
 
				- * \a index == CFS_PERCPT_LOCK_EX
			
 
				- *     exclusively lock @pcl and nobody can take private lock
			
 
				- */
			
 
				-void
			
 
				-cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
			
 
				-	__acquires(pcl->pcl_locks)
			
 
				-{
			
 
				-	int ncpt = cfs_cpt_number(pcl->pcl_cptab);
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
			
 
				-
			
 
				-	if (ncpt == 1) {
			
 
				-		index = 0;
			
 
				-	} else { /* serialize with exclusive lock */
			
 
				-		while (pcl->pcl_locked)
			
 
				-			cpu_relax();
			
 
				-	}
			
 
				-
			
 
				-	if (likely(index != CFS_PERCPT_LOCK_EX)) {
			
 
				-		spin_lock(pcl->pcl_locks[index]);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* exclusive lock request */
			
 
				-	for (i = 0; i < ncpt; i++) {
			
 
				-		spin_lock(pcl->pcl_locks[i]);
			
 
				-		if (!i) {
			
 
				-			LASSERT(!pcl->pcl_locked);
			
 
				-			/* nobody should take private lock after this
			
 
				-			 * so I wouldn't starve for too long time
			
 
				-			 */
			
 
				-			pcl->pcl_locked = 1;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_lock);
			
 
				-
			
 
				-/** unlock a CPU partition */
			
 
				-void
			
 
				-cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
			
 
				-	__releases(pcl->pcl_locks)
			
 
				-{
			
 
				-	int ncpt = cfs_cpt_number(pcl->pcl_cptab);
			
 
				-	int i;
			
 
				-
			
 
				-	index = ncpt == 1 ? 0 : index;
			
 
				-
			
 
				-	if (likely(index != CFS_PERCPT_LOCK_EX)) {
			
 
				-		spin_unlock(pcl->pcl_locks[index]);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	for (i = ncpt - 1; i >= 0; i--) {
			
 
				-		if (!i) {
			
 
				-			LASSERT(pcl->pcl_locked);
			
 
				-			pcl->pcl_locked = 0;
			
 
				-		}
			
 
				-		spin_unlock(pcl->pcl_locks[i]);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_unlock);
			
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
@@ -1,171 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * Author: liang@whamcloud.com
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/libcfs/libcfs_cpu.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/mm.h>
			
 
				-
			
 
				-struct cfs_var_array {
			
 
				-	unsigned int		va_count;	/* # of buffers */
			
 
				-	unsigned int		va_size;	/* size of each var */
			
 
				-	struct cfs_cpt_table	*va_cptab;	/* cpu partition table */
			
 
				-	void			*va_ptrs[0];	/* buffer addresses */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * free per-cpu data, see more detail in cfs_percpt_free
			
 
				- */
			
 
				-void
			
 
				-cfs_percpt_free(void *vars)
			
 
				-{
			
 
				-	struct cfs_var_array *arr;
			
 
				-	int i;
			
 
				-
			
 
				-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
			
 
				-
			
 
				-	for (i = 0; i < arr->va_count; i++)
			
 
				-		kfree(arr->va_ptrs[i]);
			
 
				-
			
 
				-	kvfree(arr);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_free);
			
 
				-
			
 
				-/*
			
 
				- * allocate per cpu-partition variables, returned value is an array of pointers,
			
 
				- * variable can be indexed by CPU partition ID, i.e:
			
 
				- *
			
 
				- *	arr = cfs_percpt_alloc(cfs_cpu_pt, size);
			
 
				- *	then caller can access memory block for CPU 0 by arr[0],
			
 
				- *	memory block for CPU 1 by arr[1]...
			
 
				- *	memory block for CPU N by arr[N]...
			
 
				- *
			
 
				- * cacheline aligned.
			
 
				- */
			
 
				-void *
			
 
				-cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
			
 
				-{
			
 
				-	struct cfs_var_array *arr;
			
 
				-	int count;
			
 
				-	int i;
			
 
				-
			
 
				-	count = cfs_cpt_number(cptab);
			
 
				-
			
 
				-	arr = kvzalloc(offsetof(struct cfs_var_array, va_ptrs[count]),
			
 
				-		       GFP_KERNEL);
			
 
				-	if (!arr)
			
 
				-		return NULL;
			
 
				-
			
 
				-	size = L1_CACHE_ALIGN(size);
			
 
				-	arr->va_size = size;
			
 
				-	arr->va_count = count;
			
 
				-	arr->va_cptab = cptab;
			
 
				-
			
 
				-	for (i = 0; i < count; i++) {
			
 
				-		arr->va_ptrs[i] = kzalloc_node(size, GFP_KERNEL,
			
 
				-					       cfs_cpt_spread_node(cptab, i));
			
 
				-		if (!arr->va_ptrs[i]) {
			
 
				-			cfs_percpt_free((void *)&arr->va_ptrs[0]);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return (void *)&arr->va_ptrs[0];
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_alloc);
			
 
				-
			
 
				-/*
			
 
				- * return number of CPUs (or number of elements in per-cpu data)
			
 
				- * according to cptab of @vars
			
 
				- */
			
 
				-int
			
 
				-cfs_percpt_number(void *vars)
			
 
				-{
			
 
				-	struct cfs_var_array *arr;
			
 
				-
			
 
				-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
			
 
				-
			
 
				-	return arr->va_count;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_percpt_number);
			
 
				-
			
 
				-/*
			
 
				- * free variable array, see more detail in cfs_array_alloc
			
 
				- */
			
 
				-void
			
 
				-cfs_array_free(void *vars)
			
 
				-{
			
 
				-	struct cfs_var_array *arr;
			
 
				-	int i;
			
 
				-
			
 
				-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
			
 
				-
			
 
				-	for (i = 0; i < arr->va_count; i++) {
			
 
				-		if (!arr->va_ptrs[i])
			
 
				-			continue;
			
 
				-
			
 
				-		kvfree(arr->va_ptrs[i]);
			
 
				-	}
			
 
				-	kvfree(arr);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_array_free);
			
 
				-
			
 
				-/*
			
 
				- * allocate a variable array, returned value is an array of pointers.
			
 
				- * Caller can specify length of array by @count, @size is size of each
			
 
				- * memory block in array.
			
 
				- */
			
 
				-void *
			
 
				-cfs_array_alloc(int count, unsigned int size)
			
 
				-{
			
 
				-	struct cfs_var_array *arr;
			
 
				-	int i;
			
 
				-
			
 
				-	arr = kvmalloc(offsetof(struct cfs_var_array, va_ptrs[count]), GFP_KERNEL);
			
 
				-	if (!arr)
			
 
				-		return NULL;
			
 
				-
			
 
				-	arr->va_count = count;
			
 
				-	arr->va_size = size;
			
 
				-
			
 
				-	for (i = 0; i < count; i++) {
			
 
				-		arr->va_ptrs[i] = kvzalloc(size, GFP_KERNEL);
			
 
				-
			
 
				-		if (!arr->va_ptrs[i]) {
			
 
				-			cfs_array_free((void *)&arr->va_ptrs[0]);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return (void *)&arr->va_ptrs[0];
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_array_alloc);
			
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -1,562 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015 Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * String manipulation functions.
			
 
				- *
			
 
				- * libcfs/libcfs/libcfs_string.c
			
 
				- *
			
 
				- * Author: Nathan Rutman <nathan.rutman@sun.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/ctype.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/errno.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-#include <linux/libcfs/libcfs_string.h>
			
 
				-
			
 
				-/* Convert a text string to a bitmask */
			
 
				-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
			
 
				-		 int *oldmask, int minmask, int allmask)
			
 
				-{
			
 
				-	const char *debugstr;
			
 
				-	char op = '\0';
			
 
				-	int newmask = minmask, i, len, found = 0;
			
 
				-
			
 
				-	/* <str> must be a list of tokens separated by whitespace
			
 
				-	 * and optionally an operator ('+' or '-').  If an operator
			
 
				-	 * appears first in <str>, '*oldmask' is used as the starting point
			
 
				-	 * (relative), otherwise minmask is used (absolute).  An operator
			
 
				-	 * applies to all following tokens up to the next operator.
			
 
				-	 */
			
 
				-	while (*str != '\0') {
			
 
				-		while (isspace(*str))
			
 
				-			str++;
			
 
				-		if (*str == '\0')
			
 
				-			break;
			
 
				-		if (*str == '+' || *str == '-') {
			
 
				-			op = *str++;
			
 
				-			if (!found)
			
 
				-				/* only if first token is relative */
			
 
				-				newmask = *oldmask;
			
 
				-			while (isspace(*str))
			
 
				-				str++;
			
 
				-			if (*str == '\0')  /* trailing op */
			
 
				-				return -EINVAL;
			
 
				-		}
			
 
				-
			
 
				-		/* find token length */
			
 
				-		len = 0;
			
 
				-		while (str[len] != '\0' && !isspace(str[len]) &&
			
 
				-		       str[len] != '+' && str[len] != '-')
			
 
				-			len++;
			
 
				-
			
 
				-		/* match token */
			
 
				-		found = 0;
			
 
				-		for (i = 0; i < 32; i++) {
			
 
				-			debugstr = bit2str(i);
			
 
				-			if (debugstr && strlen(debugstr) == len &&
			
 
				-			    !strncasecmp(str, debugstr, len)) {
			
 
				-				if (op == '-')
			
 
				-					newmask &= ~(1 << i);
			
 
				-				else
			
 
				-					newmask |= (1 << i);
			
 
				-				found = 1;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if (!found && len == 3 &&
			
 
				-		    !strncasecmp(str, "ALL", len)) {
			
 
				-			if (op == '-')
			
 
				-				newmask = minmask;
			
 
				-			else
			
 
				-				newmask = allmask;
			
 
				-			found = 1;
			
 
				-		}
			
 
				-		if (!found) {
			
 
				-			CWARN("unknown mask '%.*s'.\n"
			
 
				-			      "mask usage: [+|-]<all|type> ...\n", len, str);
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				-		str += len;
			
 
				-	}
			
 
				-
			
 
				-	*oldmask = newmask;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/* get the first string out of @str */
			
 
				-char *cfs_firststr(char *str, size_t size)
			
 
				-{
			
 
				-	size_t i = 0;
			
 
				-	char *end;
			
 
				-
			
 
				-	/* trim leading spaces */
			
 
				-	while (i < size && *str && isspace(*str)) {
			
 
				-		++i;
			
 
				-		++str;
			
 
				-	}
			
 
				-
			
 
				-	/* string with all spaces */
			
 
				-	if (*str == '\0')
			
 
				-		goto out;
			
 
				-
			
 
				-	end = str;
			
 
				-	while (i < size && *end != '\0' && !isspace(*end)) {
			
 
				-		++i;
			
 
				-		++end;
			
 
				-	}
			
 
				-
			
 
				-	*end = '\0';
			
 
				-out:
			
 
				-	return str;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_firststr);
			
 
				-
			
 
				-/**
			
 
				- * Extracts tokens from strings.
			
 
				- *
			
 
				- * Looks for \a delim in string \a next, sets \a res to point to
			
 
				- * substring before the delimiter, sets \a next right after the found
			
 
				- * delimiter.
			
 
				- *
			
 
				- * \retval 1 if \a res points to a string of non-whitespace characters
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-int
			
 
				-cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
			
 
				-{
			
 
				-	char *end;
			
 
				-
			
 
				-	if (!next->ls_str)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* skip leading white spaces */
			
 
				-	while (next->ls_len) {
			
 
				-		if (!isspace(*next->ls_str))
			
 
				-			break;
			
 
				-		next->ls_str++;
			
 
				-		next->ls_len--;
			
 
				-	}
			
 
				-
			
 
				-	if (!next->ls_len) /* whitespaces only */
			
 
				-		return 0;
			
 
				-
			
 
				-	if (*next->ls_str == delim) {
			
 
				-		/* first non-writespace is the delimiter */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	res->ls_str = next->ls_str;
			
 
				-	end = memchr(next->ls_str, delim, next->ls_len);
			
 
				-	if (!end) {
			
 
				-		/* there is no the delimeter in the string */
			
 
				-		end = next->ls_str + next->ls_len;
			
 
				-		next->ls_str = NULL;
			
 
				-	} else {
			
 
				-		next->ls_str = end + 1;
			
 
				-		next->ls_len -= (end - res->ls_str + 1);
			
 
				-	}
			
 
				-
			
 
				-	/* skip ending whitespaces */
			
 
				-	while (--end != res->ls_str) {
			
 
				-		if (!isspace(*end))
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	res->ls_len = end - res->ls_str + 1;
			
 
				-	return 1;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_gettok);
			
 
				-
			
 
				-/**
			
 
				- * Converts string to integer.
			
 
				- *
			
 
				- * Accepts decimal and hexadecimal number recordings.
			
 
				- *
			
 
				- * \retval 1 if first \a nob chars of \a str convert to decimal or
			
 
				- * hexadecimal integer in the range [\a min, \a max]
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-int
			
 
				-cfs_str2num_check(char *str, int nob, unsigned int *num,
			
 
				-		  unsigned int min, unsigned int max)
			
 
				-{
			
 
				-	bool all_numbers = true;
			
 
				-	char *endp, cache;
			
 
				-	int rc;
			
 
				-
			
 
				-	/**
			
 
				-	 * kstrouint can only handle strings composed
			
 
				-	 * of only numbers. We need to scan the string
			
 
				-	 * passed in for the first non-digit character
			
 
				-	 * and end the string at that location. If we
			
 
				-	 * don't find any non-digit character we still
			
 
				-	 * need to place a '\0' at position nob since
			
 
				-	 * we are not interested in the rest of the
			
 
				-	 * string which is longer than nob in size.
			
 
				-	 * After we are done the character at the
			
 
				-	 * position we placed '\0' must be restored.
			
 
				-	 */
			
 
				-	for (endp = str; endp < str + nob; endp++) {
			
 
				-		if (!isdigit(*endp)) {
			
 
				-			all_numbers = false;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	cache = *endp;
			
 
				-	*endp = '\0';
			
 
				-
			
 
				-	rc = kstrtouint(str, 10, num);
			
 
				-	*endp = cache;
			
 
				-	if (rc || !all_numbers)
			
 
				-		return 0;
			
 
				-
			
 
				-	return (*num >= min && *num <= max);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_str2num_check);
			
 
				-
			
 
				-/**
			
 
				- * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
			
 
				- * \a src should only have a single token which can be \<number\> or  \*
			
 
				- *
			
 
				- * \retval pointer to allocated range_expr and initialized
			
 
				- * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
			
 
				- `* src parses to
			
 
				- * \<number\> |
			
 
				- * \<number\> '-' \<number\> |
			
 
				- * \<number\> '-' \<number\> '/' \<number\>
			
 
				- * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
			
 
				- * -ENOMEM will be returned.
			
 
				- */
			
 
				-static int
			
 
				-cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max,
			
 
				-		     int bracketed, struct cfs_range_expr **expr)
			
 
				-{
			
 
				-	struct cfs_range_expr *re;
			
 
				-	struct cfs_lstr tok;
			
 
				-
			
 
				-	re = kzalloc(sizeof(*re), GFP_NOFS);
			
 
				-	if (!re)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	if (src->ls_len == 1 && src->ls_str[0] == '*') {
			
 
				-		re->re_lo = min;
			
 
				-		re->re_hi = max;
			
 
				-		re->re_stride = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (cfs_str2num_check(src->ls_str, src->ls_len,
			
 
				-			      &re->re_lo, min, max)) {
			
 
				-		/* <number> is parsed */
			
 
				-		re->re_hi = re->re_lo;
			
 
				-		re->re_stride = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (!bracketed || !cfs_gettok(src, '-', &tok))
			
 
				-		goto failed;
			
 
				-
			
 
				-	if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
			
 
				-			       &re->re_lo, min, max))
			
 
				-		goto failed;
			
 
				-
			
 
				-	/* <number> - */
			
 
				-	if (cfs_str2num_check(src->ls_str, src->ls_len,
			
 
				-			      &re->re_hi, min, max)) {
			
 
				-		/* <number> - <number> is parsed */
			
 
				-		re->re_stride = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/* go to check <number> '-' <number> '/' <number> */
			
 
				-	if (cfs_gettok(src, '/', &tok)) {
			
 
				-		if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
			
 
				-				       &re->re_hi, min, max))
			
 
				-			goto failed;
			
 
				-
			
 
				-		/* <number> - <number> / ... */
			
 
				-		if (cfs_str2num_check(src->ls_str, src->ls_len,
			
 
				-				      &re->re_stride, min, max)) {
			
 
				-			/* <number> - <number> / <number> is parsed */
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				- out:
			
 
				-	*expr = re;
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	kfree(re);
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Print the range expression \a re into specified \a buffer.
			
 
				- * If \a bracketed is true, expression does not need additional
			
 
				- * brackets.
			
 
				- *
			
 
				- * \retval number of characters written
			
 
				- */
			
 
				-static int
			
 
				-cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr,
			
 
				-		     bool bracketed)
			
 
				-{
			
 
				-	int i;
			
 
				-	char s[] = "[";
			
 
				-	char e[] = "]";
			
 
				-
			
 
				-	if (bracketed) {
			
 
				-		s[0] = '\0';
			
 
				-		e[0] = '\0';
			
 
				-	}
			
 
				-
			
 
				-	if (expr->re_lo == expr->re_hi)
			
 
				-		i = scnprintf(buffer, count, "%u", expr->re_lo);
			
 
				-	else if (expr->re_stride == 1)
			
 
				-		i = scnprintf(buffer, count, "%s%u-%u%s",
			
 
				-			      s, expr->re_lo, expr->re_hi, e);
			
 
				-	else
			
 
				-		i = scnprintf(buffer, count, "%s%u-%u/%u%s",
			
 
				-			      s, expr->re_lo, expr->re_hi, expr->re_stride, e);
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Print a list of range expressions (\a expr_list) into specified \a buffer.
			
 
				- * If the list contains several expressions, separate them with comma
			
 
				- * and surround the list with brackets.
			
 
				- *
			
 
				- * \retval number of characters written
			
 
				- */
			
 
				-int
			
 
				-cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
			
 
				-{
			
 
				-	struct cfs_range_expr *expr;
			
 
				-	int i = 0, j = 0;
			
 
				-	int numexprs = 0;
			
 
				-
			
 
				-	if (count <= 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	list_for_each_entry(expr, &expr_list->el_exprs, re_link)
			
 
				-		numexprs++;
			
 
				-
			
 
				-	if (numexprs > 1)
			
 
				-		i += scnprintf(buffer + i, count - i, "[");
			
 
				-
			
 
				-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
			
 
				-		if (j++)
			
 
				-			i += scnprintf(buffer + i, count - i, ",");
			
 
				-		i += cfs_range_expr_print(buffer + i, count - i, expr,
			
 
				-					  numexprs > 1);
			
 
				-	}
			
 
				-
			
 
				-	if (numexprs > 1)
			
 
				-		i += scnprintf(buffer + i, count - i, "]");
			
 
				-
			
 
				-	return i;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_expr_list_print);
			
 
				-
			
 
				-/**
			
 
				- * Matches value (\a value) against ranges expression list \a expr_list.
			
 
				- *
			
 
				- * \retval 1 if \a value matches
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-int
			
 
				-cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list)
			
 
				-{
			
 
				-	struct cfs_range_expr *expr;
			
 
				-
			
 
				-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
			
 
				-		if (value >= expr->re_lo && value <= expr->re_hi &&
			
 
				-		    !((value - expr->re_lo) % expr->re_stride))
			
 
				-			return 1;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_expr_list_match);
			
 
				-
			
 
				-/**
			
 
				- * Convert express list (\a expr_list) to an array of all matched values
			
 
				- *
			
 
				- * \retval N N is total number of all matched values
			
 
				- * \retval 0 if expression list is empty
			
 
				- * \retval < 0 for failure
			
 
				- */
			
 
				-int
			
 
				-cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp)
			
 
				-{
			
 
				-	struct cfs_range_expr *expr;
			
 
				-	u32 *val;
			
 
				-	int count = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
			
 
				-		for (i = expr->re_lo; i <= expr->re_hi; i++) {
			
 
				-			if (!((i - expr->re_lo) % expr->re_stride))
			
 
				-				count++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (!count) /* empty expression list */
			
 
				-		return 0;
			
 
				-
			
 
				-	if (count > max) {
			
 
				-		CERROR("Number of values %d exceeds max allowed %d\n",
			
 
				-		       max, count);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	val = kvmalloc_array(count, sizeof(val[0]), GFP_KERNEL | __GFP_ZERO);
			
 
				-	if (!val)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	count = 0;
			
 
				-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
			
 
				-		for (i = expr->re_lo; i <= expr->re_hi; i++) {
			
 
				-			if (!((i - expr->re_lo) % expr->re_stride))
			
 
				-				val[count++] = i;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	*valpp = val;
			
 
				-	return count;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_expr_list_values);
			
 
				-
			
 
				-/**
			
 
				- * Frees cfs_range_expr structures of \a expr_list.
			
 
				- *
			
 
				- * \retval none
			
 
				- */
			
 
				-void
			
 
				-cfs_expr_list_free(struct cfs_expr_list *expr_list)
			
 
				-{
			
 
				-	while (!list_empty(&expr_list->el_exprs)) {
			
 
				-		struct cfs_range_expr *expr;
			
 
				-
			
 
				-		expr = list_entry(expr_list->el_exprs.next,
			
 
				-				  struct cfs_range_expr, re_link);
			
 
				-		list_del(&expr->re_link);
			
 
				-		kfree(expr);
			
 
				-	}
			
 
				-
			
 
				-	kfree(expr_list);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_expr_list_free);
			
 
				-
			
 
				-/**
			
 
				- * Parses \<cfs_expr_list\> token of the syntax.
			
 
				- *
			
 
				- * \retval 0 if \a str parses to \<number\> | \<expr_list\>
			
 
				- * \retval -errno otherwise
			
 
				- */
			
 
				-int
			
 
				-cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
			
 
				-		    struct cfs_expr_list **elpp)
			
 
				-{
			
 
				-	struct cfs_expr_list *expr_list;
			
 
				-	struct cfs_range_expr *expr;
			
 
				-	struct cfs_lstr	src;
			
 
				-	int rc;
			
 
				-
			
 
				-	expr_list = kzalloc(sizeof(*expr_list), GFP_NOFS);
			
 
				-	if (!expr_list)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	src.ls_str = str;
			
 
				-	src.ls_len = len;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&expr_list->el_exprs);
			
 
				-
			
 
				-	if (src.ls_str[0] == '[' &&
			
 
				-	    src.ls_str[src.ls_len - 1] == ']') {
			
 
				-		src.ls_str++;
			
 
				-		src.ls_len -= 2;
			
 
				-
			
 
				-		rc = -EINVAL;
			
 
				-		while (src.ls_str) {
			
 
				-			struct cfs_lstr tok;
			
 
				-
			
 
				-			if (!cfs_gettok(&src, ',', &tok)) {
			
 
				-				rc = -EINVAL;
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
			
 
				-			if (rc)
			
 
				-				break;
			
 
				-
			
 
				-			list_add_tail(&expr->re_link, &expr_list->el_exprs);
			
 
				-		}
			
 
				-	} else {
			
 
				-		rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
			
 
				-		if (!rc)
			
 
				-			list_add_tail(&expr->re_link, &expr_list->el_exprs);
			
 
				-	}
			
 
				-
			
 
				-	if (rc)
			
 
				-		cfs_expr_list_free(expr_list);
			
 
				-	else
			
 
				-		*elpp = expr_list;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_expr_list_parse);
			
 
				-
			
 
				-/**
			
 
				- * Frees cfs_expr_list structures of \a list.
			
 
				- *
			
 
				- * For each struct cfs_expr_list structure found on \a list it frees
			
 
				- * range_expr list attached to it and frees the cfs_expr_list itself.
			
 
				- *
			
 
				- * \retval none
			
 
				- */
			
 
				-void
			
 
				-cfs_expr_list_free_list(struct list_head *list)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-
			
 
				-	while (!list_empty(list)) {
			
 
				-		el = list_entry(list->next, struct cfs_expr_list, el_link);
			
 
				-		list_del(&el->el_link);
			
 
				-		cfs_expr_list_free(el);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_expr_list_free_list);
			
--- a/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c
@@ -1,139 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/* GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				- * information or have any questions.
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Copyright 2012 Xyratex Technology Limited
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * This is crypto api shash wrappers to zlib_adler32.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/zutil.h>
			
 
				-#include <crypto/internal/hash.h>
			
 
				-#include "linux-crypto.h"
			
 
				-
			
 
				-#define CHKSUM_BLOCK_SIZE	1
			
 
				-#define CHKSUM_DIGEST_SIZE	4
			
 
				-
			
 
				-static int adler32_cra_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	u32 *key = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	*key = 1;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
			
 
				-			  unsigned int keylen)
			
 
				-{
			
 
				-	u32 *mctx = crypto_shash_ctx(hash);
			
 
				-
			
 
				-	if (keylen != sizeof(u32)) {
			
 
				-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-	*mctx = *(u32 *)key;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int adler32_init(struct shash_desc *desc)
			
 
				-{
			
 
				-	u32 *mctx = crypto_shash_ctx(desc->tfm);
			
 
				-	u32 *cksump = shash_desc_ctx(desc);
			
 
				-
			
 
				-	*cksump = *mctx;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int adler32_update(struct shash_desc *desc, const u8 *data,
			
 
				-			  unsigned int len)
			
 
				-{
			
 
				-	u32 *cksump = shash_desc_ctx(desc);
			
 
				-
			
 
				-	*cksump = zlib_adler32(*cksump, data, len);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
			
 
				-			   u8 *out)
			
 
				-{
			
 
				-	*(u32 *)out = zlib_adler32(*cksump, data, len);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int adler32_finup(struct shash_desc *desc, const u8 *data,
			
 
				-			 unsigned int len, u8 *out)
			
 
				-{
			
 
				-	return __adler32_finup(shash_desc_ctx(desc), data, len, out);
			
 
				-}
			
 
				-
			
 
				-static int adler32_final(struct shash_desc *desc, u8 *out)
			
 
				-{
			
 
				-	u32 *cksump = shash_desc_ctx(desc);
			
 
				-
			
 
				-	*(u32 *)out = *cksump;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int adler32_digest(struct shash_desc *desc, const u8 *data,
			
 
				-			  unsigned int len, u8 *out)
			
 
				-{
			
 
				-	return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
			
 
				-				    out);
			
 
				-}
			
 
				-
			
 
				-static struct shash_alg alg = {
			
 
				-	.setkey		= adler32_setkey,
			
 
				-	.init		= adler32_init,
			
 
				-	.update		= adler32_update,
			
 
				-	.final		= adler32_final,
			
 
				-	.finup		= adler32_finup,
			
 
				-	.digest		= adler32_digest,
			
 
				-	.descsize	= sizeof(u32),
			
 
				-	.digestsize	= CHKSUM_DIGEST_SIZE,
			
 
				-	.base		= {
			
 
				-		.cra_name		= "adler32",
			
 
				-		.cra_driver_name	= "adler32-zlib",
			
 
				-		.cra_priority		= 100,
			
 
				-		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
			
 
				-		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
			
 
				-		.cra_ctxsize		= sizeof(u32),
			
 
				-		.cra_module		= THIS_MODULE,
			
 
				-		.cra_init		= adler32_cra_init,
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-int cfs_crypto_adler32_register(void)
			
 
				-{
			
 
				-	return crypto_register_shash(&alg);
			
 
				-}
			
 
				-
			
 
				-void cfs_crypto_adler32_unregister(void)
			
 
				-{
			
 
				-	crypto_unregister_shash(&alg);
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/libcfs/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux-crypto.c
@@ -1,447 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/* GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				- * information or have any questions.
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Copyright 2012 Xyratex Technology Limited
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-
			
 
				-#include <crypto/hash.h>
			
 
				-#include <linux/scatterlist.h>
			
 
				-#include <linux/highmem.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/libcfs/libcfs_crypto.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-#include "linux-crypto.h"
			
 
				-
			
 
				-/**
			
 
				- *  Array of hash algorithm speed in MByte per second
			
 
				- */
			
 
				-static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
			
 
				-
			
 
				-/**
			
 
				- * Initialize the state descriptor for the specified hash algorithm.
			
 
				- *
			
 
				- * An internal routine to allocate the hash-specific state in \a req for
			
 
				- * use with cfs_crypto_hash_digest() to compute the hash of a single message,
			
 
				- * though possibly in multiple chunks.  The descriptor internal state should
			
 
				- * be freed with cfs_crypto_hash_final().
			
 
				- *
			
 
				- * \param[in]	  hash_alg	hash algorithm id (CFS_HASH_ALG_*)
			
 
				- * \param[out]	  type		pointer to the hash description in hash_types[]
			
 
				- *				array
			
 
				- * \param[in,out] req		hash state descriptor to be initialized
			
 
				- * \param[in]	  key		initial hash value/state, NULL to use default
			
 
				- *				value
			
 
				- * \param[in]	  key_len	length of \a key
			
 
				- *
			
 
				- * \retval			0 on success
			
 
				- * \retval			negative errno on failure
			
 
				- */
			
 
				-static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
			
 
				-				 const struct cfs_crypto_hash_type **type,
			
 
				-				 struct ahash_request **req,
			
 
				-				 unsigned char *key,
			
 
				-				 unsigned int key_len)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm;
			
 
				-	int err = 0;
			
 
				-
			
 
				-	*type = cfs_crypto_hash_type(hash_alg);
			
 
				-
			
 
				-	if (!*type) {
			
 
				-		CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
			
 
				-		      hash_alg, CFS_HASH_ALG_MAX);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-	tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC);
			
 
				-
			
 
				-	if (IS_ERR(tfm)) {
			
 
				-		CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
			
 
				-		       (*type)->cht_name);
			
 
				-		return PTR_ERR(tfm);
			
 
				-	}
			
 
				-
			
 
				-	*req = ahash_request_alloc(tfm, GFP_KERNEL);
			
 
				-	if (!*req) {
			
 
				-		CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n",
			
 
				-		       (*type)->cht_name);
			
 
				-		crypto_free_ahash(tfm);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	ahash_request_set_callback(*req, 0, NULL, NULL);
			
 
				-
			
 
				-	if (key)
			
 
				-		err = crypto_ahash_setkey(tfm, key, key_len);
			
 
				-	else if ((*type)->cht_key)
			
 
				-		err = crypto_ahash_setkey(tfm,
			
 
				-					  (unsigned char *)&((*type)->cht_key),
			
 
				-					  (*type)->cht_size);
			
 
				-
			
 
				-	if (err) {
			
 
				-		ahash_request_free(*req);
			
 
				-		crypto_free_ahash(tfm);
			
 
				-		return err;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
			
 
				-	       crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm),
			
 
				-	       cfs_crypto_hash_speeds[hash_alg]);
			
 
				-
			
 
				-	err = crypto_ahash_init(*req);
			
 
				-	if (err) {
			
 
				-		ahash_request_free(*req);
			
 
				-		crypto_free_ahash(tfm);
			
 
				-	}
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Calculate hash digest for the passed buffer.
			
 
				- *
			
 
				- * This should be used when computing the hash on a single contiguous buffer.
			
 
				- * It combines the hash initialization, computation, and cleanup.
			
 
				- *
			
 
				- * \param[in]	  hash_alg	id of hash algorithm (CFS_HASH_ALG_*)
			
 
				- * \param[in]	  buf		data buffer on which to compute hash
			
 
				- * \param[in]	  buf_len	length of \a buf in bytes
			
 
				- * \param[in]	  key		initial value/state for algorithm,
			
 
				- *				if \a key = NULL use default initial value
			
 
				- * \param[in]	  key_len	length of \a key in bytes
			
 
				- * \param[out]	  hash		pointer to computed hash value,
			
 
				- *				if \a hash = NULL then \a hash_len is to digest
			
 
				- *				size in bytes, retval -ENOSPC
			
 
				- * \param[in,out] hash_len	size of \a hash buffer
			
 
				- *
			
 
				- * \retval -EINVAL		\a buf, \a buf_len, \a hash_len,
			
 
				- *				\a hash_alg invalid
			
 
				- * \retval -ENOENT		\a hash_alg is unsupported
			
 
				- * \retval -ENOSPC		\a hash is NULL, or \a hash_len less than
			
 
				- *				digest size
			
 
				- * \retval			0 for success
			
 
				- * \retval			negative errno for other errors from lower
			
 
				- *				layers.
			
 
				- */
			
 
				-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
			
 
				-			   const void *buf, unsigned int buf_len,
			
 
				-			   unsigned char *key, unsigned int key_len,
			
 
				-			   unsigned char *hash, unsigned int *hash_len)
			
 
				-{
			
 
				-	struct scatterlist sl;
			
 
				-	struct ahash_request *req;
			
 
				-	int err;
			
 
				-	const struct cfs_crypto_hash_type *type;
			
 
				-
			
 
				-	if (!buf || !buf_len || !hash_len)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	if (!hash || *hash_len < type->cht_size) {
			
 
				-		*hash_len = type->cht_size;
			
 
				-		crypto_free_ahash(crypto_ahash_reqtfm(req));
			
 
				-		ahash_request_free(req);
			
 
				-		return -ENOSPC;
			
 
				-	}
			
 
				-	sg_init_one(&sl, buf, buf_len);
			
 
				-
			
 
				-	ahash_request_set_crypt(req, &sl, hash, sl.length);
			
 
				-	err = crypto_ahash_digest(req);
			
 
				-	crypto_free_ahash(crypto_ahash_reqtfm(req));
			
 
				-	ahash_request_free(req);
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_crypto_hash_digest);
			
 
				-
			
 
				-/**
			
 
				- * Allocate and initialize descriptor for hash algorithm.
			
 
				- *
			
 
				- * This should be used to initialize a hash descriptor for multiple calls
			
 
				- * to a single hash function when computing the hash across multiple
			
 
				- * separate buffers or pages using cfs_crypto_hash_update{,_page}().
			
 
				- *
			
 
				- * The hash descriptor should be freed with cfs_crypto_hash_final().
			
 
				- *
			
 
				- * \param[in] hash_alg	algorithm id (CFS_HASH_ALG_*)
			
 
				- * \param[in] key	initial value/state for algorithm, if \a key = NULL
			
 
				- *			use default initial value
			
 
				- * \param[in] key_len	length of \a key in bytes
			
 
				- *
			
 
				- * \retval		pointer to descriptor of hash instance
			
 
				- * \retval		ERR_PTR(errno) in case of error
			
 
				- */
			
 
				-struct ahash_request *
			
 
				-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
			
 
				-		     unsigned char *key, unsigned int key_len)
			
 
				-{
			
 
				-	struct ahash_request *req;
			
 
				-	int err;
			
 
				-	const struct cfs_crypto_hash_type *type;
			
 
				-
			
 
				-	err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
			
 
				-
			
 
				-	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				-	return req;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_crypto_hash_init);
			
 
				-
			
 
				-/**
			
 
				- * Update hash digest computed on data within the given \a page
			
 
				- *
			
 
				- * \param[in] hreq	hash state descriptor
			
 
				- * \param[in] page	data page on which to compute the hash
			
 
				- * \param[in] offset	offset within \a page at which to start hash
			
 
				- * \param[in] len	length of data on which to compute hash
			
 
				- *
			
 
				- * \retval		0 for success
			
 
				- * \retval		negative errno on failure
			
 
				- */
			
 
				-int cfs_crypto_hash_update_page(struct ahash_request *req,
			
 
				-				struct page *page, unsigned int offset,
			
 
				-				unsigned int len)
			
 
				-{
			
 
				-	struct scatterlist sl;
			
 
				-
			
 
				-	sg_init_table(&sl, 1);
			
 
				-	sg_set_page(&sl, page, len, offset & ~PAGE_MASK);
			
 
				-
			
 
				-	ahash_request_set_crypt(req, &sl, NULL, sl.length);
			
 
				-	return crypto_ahash_update(req);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_crypto_hash_update_page);
			
 
				-
			
 
				-/**
			
 
				- * Update hash digest computed on the specified data
			
 
				- *
			
 
				- * \param[in] req	hash state descriptor
			
 
				- * \param[in] buf	data buffer on which to compute the hash
			
 
				- * \param[in] buf_len	length of \buf on which to compute hash
			
 
				- *
			
 
				- * \retval		0 for success
			
 
				- * \retval		negative errno on failure
			
 
				- */
			
 
				-int cfs_crypto_hash_update(struct ahash_request *req,
			
 
				-			   const void *buf, unsigned int buf_len)
			
 
				-{
			
 
				-	struct scatterlist sl;
			
 
				-
			
 
				-	sg_init_one(&sl, buf, buf_len);
			
 
				-
			
 
				-	ahash_request_set_crypt(req, &sl, NULL, sl.length);
			
 
				-	return crypto_ahash_update(req);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_crypto_hash_update);
			
 
				-
			
 
				-/**
			
 
				- * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor
			
 
				- *
			
 
				- * \param[in]	  req		hash descriptor
			
 
				- * \param[out]	  hash		pointer to hash buffer to store hash digest
			
 
				- * \param[in,out] hash_len	pointer to hash buffer size, if \a req = NULL
			
 
				- *				only free \a req instead of computing the hash
			
 
				- *
			
 
				- * \retval	0 for success
			
 
				- * \retval	-EOVERFLOW if hash_len is too small for the hash digest
			
 
				- * \retval	negative errno for other errors from lower layers
			
 
				- */
			
 
				-int cfs_crypto_hash_final(struct ahash_request *req,
			
 
				-			  unsigned char *hash, unsigned int *hash_len)
			
 
				-{
			
 
				-	int err;
			
 
				-	int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
			
 
				-
			
 
				-	if (!hash || !hash_len) {
			
 
				-		err = 0;
			
 
				-		goto free_ahash;
			
 
				-	}
			
 
				-	if (*hash_len < size) {
			
 
				-		err = -EOVERFLOW;
			
 
				-		goto free_ahash;
			
 
				-	}
			
 
				-
			
 
				-	ahash_request_set_crypt(req, NULL, hash, 0);
			
 
				-	err = crypto_ahash_final(req);
			
 
				-	if (!err)
			
 
				-		*hash_len = size;
			
 
				-free_ahash:
			
 
				-	crypto_free_ahash(crypto_ahash_reqtfm(req));
			
 
				-	ahash_request_free(req);
			
 
				-	return err;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_crypto_hash_final);
			
 
				-
			
 
				-/**
			
 
				- * Compute the speed of specified hash function
			
 
				- *
			
 
				- * Run a speed test on the given hash algorithm on buffer of the given size.
			
 
				- * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and
			
 
				- * is available through the cfs_crypto_hash_speed() function.
			
 
				- *
			
 
				- * \param[in] hash_alg	hash algorithm id (CFS_HASH_ALG_*)
			
 
				- * \param[in] buf	data buffer on which to compute the hash
			
 
				- * \param[in] buf_len	length of \buf on which to compute hash
			
 
				- */
			
 
				-static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg)
			
 
				-{
			
 
				-	int buf_len = max(PAGE_SIZE, 1048576UL);
			
 
				-	void *buf;
			
 
				-	unsigned long start, end;
			
 
				-	int bcount, err = 0;
			
 
				-	struct page *page;
			
 
				-	unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
			
 
				-	unsigned int hash_len = sizeof(hash);
			
 
				-
			
 
				-	page = alloc_page(GFP_KERNEL);
			
 
				-	if (!page) {
			
 
				-		err = -ENOMEM;
			
 
				-		goto out_err;
			
 
				-	}
			
 
				-
			
 
				-	buf = kmap(page);
			
 
				-	memset(buf, 0xAD, PAGE_SIZE);
			
 
				-	kunmap(page);
			
 
				-
			
 
				-	for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC),
			
 
				-	     bcount = 0; time_before(jiffies, end); bcount++) {
			
 
				-		struct ahash_request *hdesc;
			
 
				-		int i;
			
 
				-
			
 
				-		hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0);
			
 
				-		if (IS_ERR(hdesc)) {
			
 
				-			err = PTR_ERR(hdesc);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		for (i = 0; i < buf_len / PAGE_SIZE; i++) {
			
 
				-			err = cfs_crypto_hash_update_page(hdesc, page, 0,
			
 
				-							  PAGE_SIZE);
			
 
				-			if (err)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		err = cfs_crypto_hash_final(hdesc, hash, &hash_len);
			
 
				-		if (err)
			
 
				-			break;
			
 
				-	}
			
 
				-	end = jiffies;
			
 
				-	__free_page(page);
			
 
				-out_err:
			
 
				-	if (err) {
			
 
				-		cfs_crypto_hash_speeds[hash_alg] = err;
			
 
				-		CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n",
			
 
				-		       cfs_crypto_hash_name(hash_alg), err);
			
 
				-	} else {
			
 
				-		unsigned long tmp;
			
 
				-
			
 
				-		tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
			
 
				-		       1000) / (1024 * 1024);
			
 
				-		cfs_crypto_hash_speeds[hash_alg] = (int)tmp;
			
 
				-		CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n",
			
 
				-		       cfs_crypto_hash_name(hash_alg),
			
 
				-		       cfs_crypto_hash_speeds[hash_alg]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * hash speed in Mbytes per second for valid hash algorithm
			
 
				- *
			
 
				- * Return the performance of the specified \a hash_alg that was previously
			
 
				- * computed using cfs_crypto_performance_test().
			
 
				- *
			
 
				- * \param[in] hash_alg	hash algorithm id (CFS_HASH_ALG_*)
			
 
				- *
			
 
				- * \retval		positive speed of the hash function in MB/s
			
 
				- * \retval		-ENOENT if \a hash_alg is unsupported
			
 
				- * \retval		negative errno if \a hash_alg speed is unavailable
			
 
				- */
			
 
				-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg)
			
 
				-{
			
 
				-	if (hash_alg < CFS_HASH_ALG_MAX)
			
 
				-		return cfs_crypto_hash_speeds[hash_alg];
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_crypto_hash_speed);
			
 
				-
			
 
				-/**
			
 
				- * Run the performance test for all hash algorithms.
			
 
				- *
			
 
				- * Run the cfs_crypto_performance_test() benchmark for all of the available
			
 
				- * hash functions using a 1MB buffer size.  This is a reasonable buffer size
			
 
				- * for Lustre RPCs, even if the actual RPC size is larger or smaller.
			
 
				- *
			
 
				- * Since the setup cost and computation speed of various hash algorithms is
			
 
				- * a function of the buffer size (and possibly internal contention of offload
			
 
				- * engines), this speed only represents an estimate of the actual speed under
			
 
				- * actual usage, but is reasonable for comparing available algorithms.
			
 
				- *
			
 
				- * The actual speeds are available via cfs_crypto_hash_speed() for later
			
 
				- * comparison.
			
 
				- *
			
 
				- * \retval	0 on success
			
 
				- * \retval	-ENOMEM if no memory is available for test buffer
			
 
				- */
			
 
				-static int cfs_crypto_test_hashes(void)
			
 
				-{
			
 
				-	enum cfs_crypto_hash_alg hash_alg;
			
 
				-
			
 
				-	for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
			
 
				-		cfs_crypto_performance_test(hash_alg);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int adler32;
			
 
				-
			
 
				-/**
			
 
				- * Register available hash functions
			
 
				- *
			
 
				- * \retval	0
			
 
				- */
			
 
				-int cfs_crypto_register(void)
			
 
				-{
			
 
				-	request_module("crc32c");
			
 
				-
			
 
				-	if (cfs_crypto_adler32_register() == 0)
			
 
				-		adler32 = 1;
			
 
				-
			
 
				-	/* check all algorithms and do performance test */
			
 
				-	cfs_crypto_test_hashes();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Unregister previously registered hash functions
			
 
				- */
			
 
				-void cfs_crypto_unregister(void)
			
 
				-{
			
 
				-	if (adler32)
			
 
				-		cfs_crypto_adler32_unregister();
			
 
				-	adler32 = 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/libcfs/linux-crypto.h
+++ b/drivers/staging/lustre/lnet/libcfs/linux-crypto.h
@@ -1,30 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				- * information or have any questions.
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Functions for start/stop shash adler32 algorithm.
			
 
				- */
			
 
				-int cfs_crypto_adler32_register(void);
			
 
				-void cfs_crypto_adler32_unregister(void);
			
--- a/drivers/staging/lustre/lnet/libcfs/linux-debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux-debug.c
@@ -1,142 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/libcfs/linux/linux-debug.c
			
 
				- *
			
 
				- * Author: Phil Schwan <phil@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/kmod.h>
			
 
				-#include <linux/notifier.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/stat.h>
			
 
				-#include <linux/errno.h>
			
 
				-#include <linux/unistd.h>
			
 
				-#include <linux/interrupt.h>
			
 
				-#include <linux/completion.h>
			
 
				-#include <linux/fs.h>
			
 
				-#include <linux/uaccess.h>
			
 
				-
			
 
				-# define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include "tracefile.h"
			
 
				-
			
 
				-#include <linux/kallsyms.h>
			
 
				-
			
 
				-char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
			
 
				-
			
 
				-/**
			
 
				- * Upcall function once a Lustre log has been dumped.
			
 
				- *
			
 
				- * \param file  path of the dumped log
			
 
				- */
			
 
				-void libcfs_run_debug_log_upcall(char *file)
			
 
				-{
			
 
				-	char *argv[3];
			
 
				-	int rc;
			
 
				-	static const char * const envp[] = {
			
 
				-		"HOME=/",
			
 
				-		"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
			
 
				-		NULL
			
 
				-	};
			
 
				-
			
 
				-	argv[0] = lnet_debug_log_upcall;
			
 
				-
			
 
				-	LASSERTF(file, "called on a null filename\n");
			
 
				-	argv[1] = file; /* only need to pass the path of the file */
			
 
				-
			
 
				-	argv[2] = NULL;
			
 
				-
			
 
				-	rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
			
 
				-	if (rc < 0 && rc != -ENOENT) {
			
 
				-		CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
			
 
				-		       rc, argv[0], argv[1]);
			
 
				-	} else {
			
 
				-		CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
			
 
				-		       argv[0], argv[1]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* coverity[+kill] */
			
 
				-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
			
 
				-{
			
 
				-	libcfs_catastrophe = 1;
			
 
				-	libcfs_debug_msg(msgdata, "LBUG\n");
			
 
				-
			
 
				-	if (in_interrupt()) {
			
 
				-		panic("LBUG in interrupt.\n");
			
 
				-		/* not reached */
			
 
				-	}
			
 
				-
			
 
				-	dump_stack();
			
 
				-	if (!libcfs_panic_on_lbug)
			
 
				-		libcfs_debug_dumplog();
			
 
				-	if (libcfs_panic_on_lbug)
			
 
				-		panic("LBUG");
			
 
				-	set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-	while (1)
			
 
				-		schedule();
			
 
				-}
			
 
				-EXPORT_SYMBOL(lbug_with_loc);
			
 
				-
			
 
				-static int panic_notifier(struct notifier_block *self, unsigned long unused1,
			
 
				-			  void *unused2)
			
 
				-{
			
 
				-	if (libcfs_panic_in_progress)
			
 
				-		return 0;
			
 
				-
			
 
				-	libcfs_panic_in_progress = 1;
			
 
				-	mb();
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct notifier_block libcfs_panic_notifier = {
			
 
				-	.notifier_call	= panic_notifier,
			
 
				-	.next		= NULL,
			
 
				-	.priority	= 10000,
			
 
				-};
			
 
				-
			
 
				-void libcfs_register_panic_notifier(void)
			
 
				-{
			
 
				-	atomic_notifier_chain_register(&panic_notifier_list,
			
 
				-				       &libcfs_panic_notifier);
			
 
				-}
			
 
				-
			
 
				-void libcfs_unregister_panic_notifier(void)
			
 
				-{
			
 
				-	atomic_notifier_chain_unregister(&panic_notifier_list,
			
 
				-					 &libcfs_panic_notifier);
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c
@@ -1,258 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-#define LUSTRE_TRACEFILE_PRIVATE
			
 
				-
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include "tracefile.h"
			
 
				-
			
 
				-/* percents to share the total debug memory for each type */
			
 
				-static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
			
 
				-	80,  /* 80% pages for CFS_TCD_TYPE_PROC */
			
 
				-	10,  /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
			
 
				-	10   /* 10% pages for CFS_TCD_TYPE_IRQ */
			
 
				-};
			
 
				-
			
 
				-char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
			
 
				-
			
 
				-static DECLARE_RWSEM(cfs_tracefile_sem);
			
 
				-
			
 
				-int cfs_tracefile_init_arch(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-
			
 
				-	/* initialize trace_data */
			
 
				-	memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
			
 
				-	for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
			
 
				-		cfs_trace_data[i] =
			
 
				-			kmalloc_array(num_possible_cpus(),
			
 
				-				      sizeof(union cfs_trace_data_union),
			
 
				-				      GFP_KERNEL);
			
 
				-		if (!cfs_trace_data[i])
			
 
				-			goto out;
			
 
				-	}
			
 
				-
			
 
				-	/* arch related info initialized */
			
 
				-	cfs_tcd_for_each(tcd, i, j) {
			
 
				-		spin_lock_init(&tcd->tcd_lock);
			
 
				-		tcd->tcd_pages_factor = pages_factor[i];
			
 
				-		tcd->tcd_type = i;
			
 
				-		tcd->tcd_cpu = j;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < num_possible_cpus(); i++)
			
 
				-		for (j = 0; j < 3; j++) {
			
 
				-			cfs_trace_console_buffers[i][j] =
			
 
				-				kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
			
 
				-					GFP_KERNEL);
			
 
				-
			
 
				-			if (!cfs_trace_console_buffers[i][j])
			
 
				-				goto out;
			
 
				-		}
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-out:
			
 
				-	cfs_tracefile_fini_arch();
			
 
				-	pr_err("lnet: Not enough memory\n");
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-void cfs_tracefile_fini_arch(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	for (i = 0; i < num_possible_cpus(); i++)
			
 
				-		for (j = 0; j < 3; j++) {
			
 
				-			kfree(cfs_trace_console_buffers[i][j]);
			
 
				-			cfs_trace_console_buffers[i][j] = NULL;
			
 
				-		}
			
 
				-
			
 
				-	for (i = 0; cfs_trace_data[i]; i++) {
			
 
				-		kfree(cfs_trace_data[i]);
			
 
				-		cfs_trace_data[i] = NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void cfs_tracefile_read_lock(void)
			
 
				-{
			
 
				-	down_read(&cfs_tracefile_sem);
			
 
				-}
			
 
				-
			
 
				-void cfs_tracefile_read_unlock(void)
			
 
				-{
			
 
				-	up_read(&cfs_tracefile_sem);
			
 
				-}
			
 
				-
			
 
				-void cfs_tracefile_write_lock(void)
			
 
				-{
			
 
				-	down_write(&cfs_tracefile_sem);
			
 
				-}
			
 
				-
			
 
				-void cfs_tracefile_write_unlock(void)
			
 
				-{
			
 
				-	up_write(&cfs_tracefile_sem);
			
 
				-}
			
 
				-
			
 
				-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
			
 
				-{
			
 
				-	if (in_irq())
			
 
				-		return CFS_TCD_TYPE_IRQ;
			
 
				-	if (in_softirq())
			
 
				-		return CFS_TCD_TYPE_SOFTIRQ;
			
 
				-	return CFS_TCD_TYPE_PROC;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * The walking argument indicates the locking comes from all tcd types
			
 
				- * iterator and we must lock it and dissable local irqs to avoid deadlocks
			
 
				- * with other interrupt locks that might be happening. See LU-1311
			
 
				- * for details.
			
 
				- */
			
 
				-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
			
 
				-	__acquires(&tcd->tc_lock)
			
 
				-{
			
 
				-	__LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
			
 
				-	if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
			
 
				-		spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
			
 
				-	else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
			
 
				-		spin_lock_bh(&tcd->tcd_lock);
			
 
				-	else if (unlikely(walking))
			
 
				-		spin_lock_irq(&tcd->tcd_lock);
			
 
				-	else
			
 
				-		spin_lock(&tcd->tcd_lock);
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
			
 
				-	__releases(&tcd->tcd_lock)
			
 
				-{
			
 
				-	__LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
			
 
				-	if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
			
 
				-		spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
			
 
				-	else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
			
 
				-		spin_unlock_bh(&tcd->tcd_lock);
			
 
				-	else if (unlikely(walking))
			
 
				-		spin_unlock_irq(&tcd->tcd_lock);
			
 
				-	else
			
 
				-		spin_unlock(&tcd->tcd_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-cfs_set_ptldebug_header(struct ptldebug_header *header,
			
 
				-			struct libcfs_debug_msg_data *msgdata,
			
 
				-			unsigned long stack)
			
 
				-{
			
 
				-	struct timespec64 ts;
			
 
				-
			
 
				-	ktime_get_real_ts64(&ts);
			
 
				-
			
 
				-	header->ph_subsys = msgdata->msg_subsys;
			
 
				-	header->ph_mask = msgdata->msg_mask;
			
 
				-	header->ph_cpu_id = smp_processor_id();
			
 
				-	header->ph_type = cfs_trace_buf_idx_get();
			
 
				-	/* y2038 safe since all user space treats this as unsigned, but
			
 
				-	 * will overflow in 2106
			
 
				-	 */
			
 
				-	header->ph_sec = (u32)ts.tv_sec;
			
 
				-	header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
			
 
				-	header->ph_stack = stack;
			
 
				-	header->ph_pid = current->pid;
			
 
				-	header->ph_line_num = msgdata->msg_line;
			
 
				-	header->ph_extern_pid = 0;
			
 
				-}
			
 
				-
			
 
				-static char *
			
 
				-dbghdr_to_err_string(struct ptldebug_header *hdr)
			
 
				-{
			
 
				-	switch (hdr->ph_subsys) {
			
 
				-	case S_LND:
			
 
				-	case S_LNET:
			
 
				-		return "LNetError";
			
 
				-	default:
			
 
				-		return "LustreError";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static char *
			
 
				-dbghdr_to_info_string(struct ptldebug_header *hdr)
			
 
				-{
			
 
				-	switch (hdr->ph_subsys) {
			
 
				-	case S_LND:
			
 
				-	case S_LNET:
			
 
				-		return "LNet";
			
 
				-	default:
			
 
				-		return "Lustre";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
			
 
				-			  const char *buf, int len, const char *file,
			
 
				-			  const char *fn)
			
 
				-{
			
 
				-	char *prefix = "Lustre", *ptype = NULL;
			
 
				-
			
 
				-	if (mask & D_EMERG) {
			
 
				-		prefix = dbghdr_to_err_string(hdr);
			
 
				-		ptype = KERN_EMERG;
			
 
				-	} else if (mask & D_ERROR) {
			
 
				-		prefix = dbghdr_to_err_string(hdr);
			
 
				-		ptype = KERN_ERR;
			
 
				-	} else if (mask & D_WARNING) {
			
 
				-		prefix = dbghdr_to_info_string(hdr);
			
 
				-		ptype = KERN_WARNING;
			
 
				-	} else if (mask & (D_CONSOLE | libcfs_printk)) {
			
 
				-		prefix = dbghdr_to_info_string(hdr);
			
 
				-		ptype = KERN_INFO;
			
 
				-	}
			
 
				-
			
 
				-	if (mask & D_CONSOLE) {
			
 
				-		pr_info("%s%s: %.*s", ptype, prefix, len, buf);
			
 
				-	} else {
			
 
				-		pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
			
 
				-			hdr->ph_pid, hdr->ph_extern_pid, file,
			
 
				-			hdr->ph_line_num, fn, len, buf);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_max_debug_mb(void)
			
 
				-{
			
 
				-	int  total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
			
 
				-
			
 
				-	return max(512, (total_mb * 80) / 100);
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -1,758 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015 Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-#include <linux/miscdevice.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/stat.h>
			
 
				-#include <linux/errno.h>
			
 
				-#include <linux/unistd.h>
			
 
				-#include <net/sock.h>
			
 
				-#include <linux/uio.h>
			
 
				-
			
 
				-#include <linux/uaccess.h>
			
 
				-
			
 
				-#include <linux/fs.h>
			
 
				-#include <linux/file.h>
			
 
				-#include <linux/list.h>
			
 
				-
			
 
				-#include <linux/sysctl.h>
			
 
				-#include <linux/debugfs.h>
			
 
				-
			
 
				-# define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <asm/div64.h>
			
 
				-
			
 
				-#include <linux/libcfs/libcfs_crypto.h>
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <uapi/linux/lnet/lnet-dlc.h>
			
 
				-#include "tracefile.h"
			
 
				-
			
 
				-struct lnet_debugfs_symlink_def {
			
 
				-	char *name;
			
 
				-	char *target;
			
 
				-};
			
 
				-
			
 
				-static struct dentry *lnet_debugfs_root;
			
 
				-
			
 
				-BLOCKING_NOTIFIER_HEAD(libcfs_ioctl_list);
			
 
				-EXPORT_SYMBOL(libcfs_ioctl_list);
			
 
				-
			
 
				-static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
			
 
				-{
			
 
				-	size_t len = sizeof(*data);
			
 
				-
			
 
				-	len += cfs_size_round(data->ioc_inllen1);
			
 
				-	len += cfs_size_round(data->ioc_inllen2);
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
			
 
				-{
			
 
				-	if (data->ioc_hdr.ioc_len > BIT(30)) {
			
 
				-		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_inllen1 > BIT(30)) {
			
 
				-		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_inllen2 > BIT(30)) {
			
 
				-		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
			
 
				-		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
			
 
				-		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_pbuf1 && !data->ioc_plen1) {
			
 
				-		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_pbuf2 && !data->ioc_plen2) {
			
 
				-		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_plen1 && !data->ioc_pbuf1) {
			
 
				-		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_plen2 && !data->ioc_pbuf2) {
			
 
				-		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
			
 
				-		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_inllen1 &&
			
 
				-	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
			
 
				-		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	if (data->ioc_inllen2 &&
			
 
				-	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
			
 
				-			   data->ioc_inllen2 - 1] != '\0') {
			
 
				-		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
			
 
				-		return true;
			
 
				-	}
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-static int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
			
 
				-{
			
 
				-	if (libcfs_ioctl_is_invalid(data)) {
			
 
				-		CERROR("libcfs ioctl: parameter not correctly formatted\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (data->ioc_inllen1)
			
 
				-		data->ioc_inlbuf1 = &data->ioc_bulk[0];
			
 
				-
			
 
				-	if (data->ioc_inllen2)
			
 
				-		data->ioc_inlbuf2 = &data->ioc_bulk[0] +
			
 
				-			cfs_size_round(data->ioc_inllen1);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
			
 
				-				const struct libcfs_ioctl_hdr __user *uhdr)
			
 
				-{
			
 
				-	struct libcfs_ioctl_hdr hdr;
			
 
				-	int err;
			
 
				-
			
 
				-	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
			
 
				-	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
			
 
				-		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
			
 
				-		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (hdr.ioc_len < sizeof(hdr)) {
			
 
				-		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
			
 
				-		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
			
 
				-		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	*hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
			
 
				-	if (!*hdr_pp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
			
 
				-		err = -EFAULT;
			
 
				-		goto free;
			
 
				-	}
			
 
				-
			
 
				-	if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
			
 
				-	    (*hdr_pp)->ioc_len != hdr.ioc_len) {
			
 
				-		err = -EINVAL;
			
 
				-		goto free;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-free:
			
 
				-	kvfree(*hdr_pp);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
			
 
				-{
			
 
				-	struct libcfs_ioctl_data *data = NULL;
			
 
				-	struct libcfs_ioctl_hdr *hdr;
			
 
				-	int err;
			
 
				-
			
 
				-	/* 'cmd' and permissions get checked in our arch-specific caller */
			
 
				-	err = libcfs_ioctl_getdata(&hdr, uparam);
			
 
				-	if (err) {
			
 
				-		CDEBUG_LIMIT(D_ERROR,
			
 
				-			     "libcfs ioctl: data header error %d\n", err);
			
 
				-		return err;
			
 
				-	}
			
 
				-
			
 
				-	if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
			
 
				-		/*
			
 
				-		 * The libcfs_ioctl_data_adjust() function performs adjustment
			
 
				-		 * operations on the libcfs_ioctl_data structure to make
			
 
				-		 * it usable by the code.  This doesn't need to be called
			
 
				-		 * for new data structures added.
			
 
				-		 */
			
 
				-		data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
			
 
				-		err = libcfs_ioctl_data_adjust(data);
			
 
				-		if (err)
			
 
				-			goto out;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd);
			
 
				-	switch (cmd) {
			
 
				-	case IOC_LIBCFS_CLEAR_DEBUG:
			
 
				-		libcfs_debug_clear_buffer();
			
 
				-		break;
			
 
				-
			
 
				-	case IOC_LIBCFS_MARK_DEBUG:
			
 
				-		if (!data || !data->ioc_inlbuf1 ||
			
 
				-		    data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') {
			
 
				-			err = -EINVAL;
			
 
				-			goto out;
			
 
				-		}
			
 
				-		libcfs_debug_mark_buffer(data->ioc_inlbuf1);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		err = blocking_notifier_call_chain(&libcfs_ioctl_list,
			
 
				-						   cmd, hdr);
			
 
				-		if (!(err & NOTIFY_STOP_MASK))
			
 
				-			/* No-one claimed the ioctl */
			
 
				-			err = -EINVAL;
			
 
				-		else
			
 
				-			err = notifier_to_errno(err);
			
 
				-		if (!err)
			
 
				-			if (copy_to_user(uparam, hdr, hdr->ioc_len))
			
 
				-				err = -EFAULT;
			
 
				-		break;
			
 
				-	}
			
 
				-out:
			
 
				-	kvfree(hdr);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static long
			
 
				-libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
			
 
				-{
			
 
				-	if (!capable(CAP_SYS_ADMIN))
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
			
 
				-	    _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
			
 
				-	    _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
			
 
				-		CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
			
 
				-		       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return libcfs_ioctl(cmd, (void __user *)arg);
			
 
				-}
			
 
				-
			
 
				-static const struct file_operations libcfs_fops = {
			
 
				-	.owner		= THIS_MODULE,
			
 
				-	.unlocked_ioctl	= libcfs_psdev_ioctl,
			
 
				-};
			
 
				-
			
 
				-static struct miscdevice libcfs_dev = {
			
 
				-	.minor = MISC_DYNAMIC_MINOR,
			
 
				-	.name = "lnet",
			
 
				-	.fops = &libcfs_fops,
			
 
				-};
			
 
				-
			
 
				-static int libcfs_dev_registered;
			
 
				-
			
 
				-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
			
 
				-			 void __user *buffer, size_t *lenp,
			
 
				-			 int (*handler)(void *data, int write, loff_t pos,
			
 
				-					void __user *buffer, int len))
			
 
				-{
			
 
				-	int rc = handler(data, write, *ppos, buffer, *lenp);
			
 
				-
			
 
				-	if (rc < 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (write) {
			
 
				-		*ppos += *lenp;
			
 
				-	} else {
			
 
				-		*lenp = rc;
			
 
				-		*ppos += rc;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lprocfs_call_handler);
			
 
				-
			
 
				-static int __proc_dobitmasks(void *data, int write,
			
 
				-			     loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	const int tmpstrlen = 512;
			
 
				-	char *tmpstr;
			
 
				-	int rc;
			
 
				-	unsigned int *mask = data;
			
 
				-	int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
			
 
				-	int is_printk = (mask == &libcfs_printk) ? 1 : 0;
			
 
				-
			
 
				-	rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
			
 
				-	if (rc < 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (!write) {
			
 
				-		libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
			
 
				-		rc = strlen(tmpstr);
			
 
				-
			
 
				-		if (pos >= rc) {
			
 
				-			rc = 0;
			
 
				-		} else {
			
 
				-			rc = cfs_trace_copyout_string(buffer, nob,
			
 
				-						      tmpstr + pos, "\n");
			
 
				-		}
			
 
				-	} else {
			
 
				-		rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
			
 
				-		if (rc < 0) {
			
 
				-			kfree(tmpstr);
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
			
 
				-		/* Always print LBUG/LASSERT to console, so keep this mask */
			
 
				-		if (is_printk)
			
 
				-			*mask |= D_EMERG;
			
 
				-	}
			
 
				-
			
 
				-	kfree(tmpstr);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_dobitmasks(struct ctl_table *table, int write,
			
 
				-			   void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_dobitmasks);
			
 
				-}
			
 
				-
			
 
				-static int __proc_dump_kernel(void *data, int write,
			
 
				-			      loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	if (!write)
			
 
				-		return 0;
			
 
				-
			
 
				-	return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
			
 
				-}
			
 
				-
			
 
				-static int proc_dump_kernel(struct ctl_table *table, int write,
			
 
				-			    void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_dump_kernel);
			
 
				-}
			
 
				-
			
 
				-static int __proc_daemon_file(void *data, int write,
			
 
				-			      loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	if (!write) {
			
 
				-		int len = strlen(cfs_tracefile);
			
 
				-
			
 
				-		if (pos >= len)
			
 
				-			return 0;
			
 
				-
			
 
				-		return cfs_trace_copyout_string(buffer, nob,
			
 
				-						cfs_tracefile + pos, "\n");
			
 
				-	}
			
 
				-
			
 
				-	return cfs_trace_daemon_command_usrstr(buffer, nob);
			
 
				-}
			
 
				-
			
 
				-static int proc_daemon_file(struct ctl_table *table, int write,
			
 
				-			    void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_daemon_file);
			
 
				-}
			
 
				-
			
 
				-static int libcfs_force_lbug(struct ctl_table *table, int write,
			
 
				-			     void __user *buffer,
			
 
				-			     size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	if (write)
			
 
				-		LBUG();
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int proc_fail_loc(struct ctl_table *table, int write,
			
 
				-			 void __user *buffer,
			
 
				-			 size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	int rc;
			
 
				-	long old_fail_loc = cfs_fail_loc;
			
 
				-
			
 
				-	rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
			
 
				-	if (old_fail_loc != cfs_fail_loc)
			
 
				-		wake_up(&cfs_race_waitq);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int __proc_cpt_table(void *data, int write,
			
 
				-			    loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	char *buf = NULL;
			
 
				-	int len = 4096;
			
 
				-	int rc  = 0;
			
 
				-
			
 
				-	if (write)
			
 
				-		return -EPERM;
			
 
				-
			
 
				-	while (1) {
			
 
				-		buf = kzalloc(len, GFP_KERNEL);
			
 
				-		if (!buf)
			
 
				-			return -ENOMEM;
			
 
				-
			
 
				-		rc = cfs_cpt_table_print(cfs_cpt_tab, buf, len);
			
 
				-		if (rc >= 0)
			
 
				-			break;
			
 
				-
			
 
				-		if (rc == -EFBIG) {
			
 
				-			kfree(buf);
			
 
				-			len <<= 1;
			
 
				-			continue;
			
 
				-		}
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (pos >= rc) {
			
 
				-		rc = 0;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
			
 
				- out:
			
 
				-	kfree(buf);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_cpt_table(struct ctl_table *table, int write,
			
 
				-			  void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_cpt_table);
			
 
				-}
			
 
				-
			
 
				-static struct ctl_table lnet_table[] = {
			
 
				-	{
			
 
				-		.procname = "debug",
			
 
				-		.data     = &libcfs_debug,
			
 
				-		.maxlen   = sizeof(int),
			
 
				-		.mode     = 0644,
			
 
				-		.proc_handler = &proc_dobitmasks,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "subsystem_debug",
			
 
				-		.data     = &libcfs_subsystem_debug,
			
 
				-		.maxlen   = sizeof(int),
			
 
				-		.mode     = 0644,
			
 
				-		.proc_handler = &proc_dobitmasks,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "printk",
			
 
				-		.data     = &libcfs_printk,
			
 
				-		.maxlen   = sizeof(int),
			
 
				-		.mode     = 0644,
			
 
				-		.proc_handler = &proc_dobitmasks,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "cpu_partition_table",
			
 
				-		.maxlen   = 128,
			
 
				-		.mode     = 0444,
			
 
				-		.proc_handler = &proc_cpt_table,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "debug_log_upcall",
			
 
				-		.data     = lnet_debug_log_upcall,
			
 
				-		.maxlen   = sizeof(lnet_debug_log_upcall),
			
 
				-		.mode     = 0644,
			
 
				-		.proc_handler = &proc_dostring,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "catastrophe",
			
 
				-		.data     = &libcfs_catastrophe,
			
 
				-		.maxlen   = sizeof(int),
			
 
				-		.mode     = 0444,
			
 
				-		.proc_handler = &proc_dointvec,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "dump_kernel",
			
 
				-		.maxlen   = 256,
			
 
				-		.mode     = 0200,
			
 
				-		.proc_handler = &proc_dump_kernel,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "daemon_file",
			
 
				-		.mode     = 0644,
			
 
				-		.maxlen   = 256,
			
 
				-		.proc_handler = &proc_daemon_file,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "force_lbug",
			
 
				-		.data     = NULL,
			
 
				-		.maxlen   = 0,
			
 
				-		.mode     = 0200,
			
 
				-		.proc_handler = &libcfs_force_lbug
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "fail_loc",
			
 
				-		.data     = &cfs_fail_loc,
			
 
				-		.maxlen   = sizeof(cfs_fail_loc),
			
 
				-		.mode     = 0644,
			
 
				-		.proc_handler = &proc_fail_loc
			
 
				-	},
			
 
				-	{
			
 
				-		.procname = "fail_val",
			
 
				-		.data     = &cfs_fail_val,
			
 
				-		.maxlen   = sizeof(int),
			
 
				-		.mode     = 0644,
			
 
				-		.proc_handler = &proc_dointvec
			
 
				-	},
			
 
				-	{
			
 
				-		.procname	= "fail_err",
			
 
				-		.data		= &cfs_fail_err,
			
 
				-		.maxlen		= sizeof(cfs_fail_err),
			
 
				-		.mode		= 0644,
			
 
				-		.proc_handler	= &proc_dointvec,
			
 
				-	},
			
 
				-	{
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = {
			
 
				-	{ "console_ratelimit",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_console_ratelimit"},
			
 
				-	{ "debug_path",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_debug_file_path"},
			
 
				-	{ "panic_on_lbug",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"},
			
 
				-	{ "libcfs_console_backoff",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_console_backoff"},
			
 
				-	{ "debug_mb",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_debug_mb"},
			
 
				-	{ "console_min_delay_centisecs",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_console_min_delay"},
			
 
				-	{ "console_max_delay_centisecs",
			
 
				-	  "/sys/module/libcfs/parameters/libcfs_console_max_delay"},
			
 
				-	{},
			
 
				-};
			
 
				-
			
 
				-static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf,
			
 
				-				 size_t count, loff_t *ppos)
			
 
				-{
			
 
				-	struct ctl_table *table = filp->private_data;
			
 
				-	int error;
			
 
				-
			
 
				-	error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos);
			
 
				-	if (!error)
			
 
				-		error = count;
			
 
				-
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf,
			
 
				-				  size_t count, loff_t *ppos)
			
 
				-{
			
 
				-	struct ctl_table *table = filp->private_data;
			
 
				-	int error;
			
 
				-
			
 
				-	error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos);
			
 
				-	if (!error)
			
 
				-		error = count;
			
 
				-
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-static const struct file_operations lnet_debugfs_file_operations_rw = {
			
 
				-	.open		= simple_open,
			
 
				-	.read		= lnet_debugfs_read,
			
 
				-	.write		= lnet_debugfs_write,
			
 
				-	.llseek		= default_llseek,
			
 
				-};
			
 
				-
			
 
				-static const struct file_operations lnet_debugfs_file_operations_ro = {
			
 
				-	.open		= simple_open,
			
 
				-	.read		= lnet_debugfs_read,
			
 
				-	.llseek		= default_llseek,
			
 
				-};
			
 
				-
			
 
				-static const struct file_operations lnet_debugfs_file_operations_wo = {
			
 
				-	.open		= simple_open,
			
 
				-	.write		= lnet_debugfs_write,
			
 
				-	.llseek		= default_llseek,
			
 
				-};
			
 
				-
			
 
				-static const struct file_operations *lnet_debugfs_fops_select(umode_t mode)
			
 
				-{
			
 
				-	if (!(mode & 0222))
			
 
				-		return &lnet_debugfs_file_operations_ro;
			
 
				-
			
 
				-	if (!(mode & 0444))
			
 
				-		return &lnet_debugfs_file_operations_wo;
			
 
				-
			
 
				-	return &lnet_debugfs_file_operations_rw;
			
 
				-}
			
 
				-
			
 
				-void lustre_insert_debugfs(struct ctl_table *table)
			
 
				-{
			
 
				-	if (!lnet_debugfs_root)
			
 
				-		lnet_debugfs_root = debugfs_create_dir("lnet", NULL);
			
 
				-
			
 
				-	/* Even if we cannot create, just ignore it altogether) */
			
 
				-	if (IS_ERR_OR_NULL(lnet_debugfs_root))
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * We don't save the dentry returned because we don't call
			
 
				-	 * debugfs_remove() but rather remove_recursive()
			
 
				-	 */
			
 
				-	for (; table->procname; table++)
			
 
				-		debugfs_create_file(table->procname, table->mode,
			
 
				-				    lnet_debugfs_root, table,
			
 
				-				    lnet_debugfs_fops_select(table->mode));
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(lustre_insert_debugfs);
			
 
				-
			
 
				-static void lustre_insert_debugfs_links(
			
 
				-	const struct lnet_debugfs_symlink_def *symlinks)
			
 
				-{
			
 
				-	for (; symlinks && symlinks->name; symlinks++)
			
 
				-		debugfs_create_symlink(symlinks->name, lnet_debugfs_root,
			
 
				-				       symlinks->target);
			
 
				-}
			
 
				-
			
 
				-static void lustre_remove_debugfs(void)
			
 
				-{
			
 
				-	debugfs_remove_recursive(lnet_debugfs_root);
			
 
				-
			
 
				-	lnet_debugfs_root = NULL;
			
 
				-}
			
 
				-
			
 
				-static DEFINE_MUTEX(libcfs_startup);
			
 
				-static int libcfs_active;
			
 
				-
			
 
				-int libcfs_setup(void)
			
 
				-{
			
 
				-	int rc = -EINVAL;
			
 
				-
			
 
				-	mutex_lock(&libcfs_startup);
			
 
				-	if (libcfs_active)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (!libcfs_dev_registered)
			
 
				-		goto err;
			
 
				-
			
 
				-	rc = libcfs_debug_init(5 * 1024 * 1024);
			
 
				-	if (rc < 0) {
			
 
				-		pr_err("LustreError: libcfs_debug_init: %d\n", rc);
			
 
				-		goto err;
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_cpu_init();
			
 
				-	if (rc)
			
 
				-		goto err;
			
 
				-
			
 
				-	cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4);
			
 
				-	if (!cfs_rehash_wq) {
			
 
				-		CERROR("Failed to start rehash workqueue.\n");
			
 
				-		rc = -ENOMEM;
			
 
				-		goto err;
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_crypto_register();
			
 
				-	if (rc) {
			
 
				-		CERROR("cfs_crypto_register: error %d\n", rc);
			
 
				-		goto err;
			
 
				-	}
			
 
				-
			
 
				-	lustre_insert_debugfs(lnet_table);
			
 
				-	if (!IS_ERR_OR_NULL(lnet_debugfs_root))
			
 
				-		lustre_insert_debugfs_links(lnet_debugfs_symlinks);
			
 
				-
			
 
				-	CDEBUG(D_OTHER, "portals setup OK\n");
			
 
				-out:
			
 
				-	libcfs_active = 1;
			
 
				-	mutex_unlock(&libcfs_startup);
			
 
				-	return 0;
			
 
				-err:
			
 
				-	cfs_crypto_unregister();
			
 
				-	if (cfs_rehash_wq)
			
 
				-		destroy_workqueue(cfs_rehash_wq);
			
 
				-	cfs_cpu_fini();
			
 
				-	libcfs_debug_cleanup();
			
 
				-	mutex_unlock(&libcfs_startup);
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_setup);
			
 
				-
			
 
				-static int libcfs_init(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = misc_register(&libcfs_dev);
			
 
				-	if (rc)
			
 
				-		CERROR("misc_register: error %d\n", rc);
			
 
				-	else
			
 
				-		libcfs_dev_registered = 1;
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void libcfs_exit(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	lustre_remove_debugfs();
			
 
				-
			
 
				-	if (cfs_rehash_wq)
			
 
				-		destroy_workqueue(cfs_rehash_wq);
			
 
				-
			
 
				-	cfs_crypto_unregister();
			
 
				-
			
 
				-	if (libcfs_dev_registered)
			
 
				-		misc_deregister(&libcfs_dev);
			
 
				-
			
 
				-	cfs_cpu_fini();
			
 
				-
			
 
				-	rc = libcfs_debug_cleanup();
			
 
				-	if (rc)
			
 
				-		pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
			
 
				-}
			
 
				-
			
 
				-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
			
 
				-MODULE_DESCRIPTION("Lustre helper library");
			
 
				-MODULE_VERSION(LIBCFS_VERSION);
			
 
				-MODULE_LICENSE("GPL");
			
 
				-
			
 
				-module_init(libcfs_init);
			
 
				-module_exit(libcfs_exit);
			
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -1,1198 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * libcfs/libcfs/tracefile.c
			
 
				- *
			
 
				- * Author: Zach Brown <zab@clusterfs.com>
			
 
				- * Author: Phil Schwan <phil@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-#define LUSTRE_TRACEFILE_PRIVATE
			
 
				-#define pr_fmt(fmt) "Lustre: " fmt
			
 
				-
			
 
				-#include <linux/ratelimit.h>
			
 
				-#include <linux/highmem.h>
			
 
				-#include <linux/ctype.h>
			
 
				-#include <linux/kthread.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/uaccess.h>
			
 
				-#include "tracefile.h"
			
 
				-
			
 
				-/* XXX move things up to the top, comment */
			
 
				-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
			
 
				-
			
 
				-char cfs_tracefile[TRACEFILE_NAME_SIZE];
			
 
				-long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
			
 
				-static struct tracefiled_ctl trace_tctl;
			
 
				-static DEFINE_MUTEX(cfs_trace_thread_mutex);
			
 
				-static int thread_running;
			
 
				-
			
 
				-static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
			
 
				-
			
 
				-struct page_collection {
			
 
				-	struct list_head	pc_pages;
			
 
				-	/*
			
 
				-	 * if this flag is set, collect_pages() will spill both
			
 
				-	 * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
			
 
				-	 * only ->tcd_pages are spilled.
			
 
				-	 */
			
 
				-	int			pc_want_daemon_pages;
			
 
				-};
			
 
				-
			
 
				-struct tracefiled_ctl {
			
 
				-	struct completion	tctl_start;
			
 
				-	struct completion	tctl_stop;
			
 
				-	wait_queue_head_t	tctl_waitq;
			
 
				-	pid_t			tctl_pid;
			
 
				-	atomic_t		tctl_shutdown;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * small data-structure for each page owned by tracefiled.
			
 
				- */
			
 
				-struct cfs_trace_page {
			
 
				-	/*
			
 
				-	 * page itself
			
 
				-	 */
			
 
				-	struct page		*page;
			
 
				-	/*
			
 
				-	 * linkage into one of the lists in trace_data_union or
			
 
				-	 * page_collection
			
 
				-	 */
			
 
				-	struct list_head	linkage;
			
 
				-	/*
			
 
				-	 * number of bytes used within this page
			
 
				-	 */
			
 
				-	unsigned int		used;
			
 
				-	/*
			
 
				-	 * cpu that owns this page
			
 
				-	 */
			
 
				-	unsigned short		cpu;
			
 
				-	/*
			
 
				-	 * type(context) of this page
			
 
				-	 */
			
 
				-	unsigned short		type;
			
 
				-};
			
 
				-
			
 
				-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
			
 
				-					 struct cfs_trace_cpu_data *tcd);
			
 
				-
			
 
				-static inline struct cfs_trace_page *
			
 
				-cfs_tage_from_list(struct list_head *list)
			
 
				-{
			
 
				-	return list_entry(list, struct cfs_trace_page, linkage);
			
 
				-}
			
 
				-
			
 
				-static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-
			
 
				-	/* My caller is trying to free memory */
			
 
				-	if (!in_interrupt() && (current->flags & PF_MEMALLOC))
			
 
				-		return NULL;
			
 
				-
			
 
				-	/*
			
 
				-	 * Don't spam console with allocation failures: they will be reported
			
 
				-	 * by upper layer anyway.
			
 
				-	 */
			
 
				-	gfp |= __GFP_NOWARN;
			
 
				-	page = alloc_page(gfp);
			
 
				-	if (!page)
			
 
				-		return NULL;
			
 
				-
			
 
				-	tage = kmalloc(sizeof(*tage), gfp);
			
 
				-	if (!tage) {
			
 
				-		__free_page(page);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	tage->page = page;
			
 
				-	atomic_inc(&cfs_tage_allocated);
			
 
				-	return tage;
			
 
				-}
			
 
				-
			
 
				-static void cfs_tage_free(struct cfs_trace_page *tage)
			
 
				-{
			
 
				-	__free_page(tage->page);
			
 
				-	kfree(tage);
			
 
				-	atomic_dec(&cfs_tage_allocated);
			
 
				-}
			
 
				-
			
 
				-static void cfs_tage_to_tail(struct cfs_trace_page *tage,
			
 
				-			     struct list_head *queue)
			
 
				-{
			
 
				-	list_move_tail(&tage->linkage, queue);
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
			
 
				-			   struct list_head *stock)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	/*
			
 
				-	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
			
 
				-	 * from here: this will lead to infinite recursion.
			
 
				-	 */
			
 
				-
			
 
				-	for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
			
 
				-		struct cfs_trace_page *tage;
			
 
				-
			
 
				-		tage = cfs_tage_alloc(gfp);
			
 
				-		if (!tage)
			
 
				-			break;
			
 
				-		list_add_tail(&tage->linkage, stock);
			
 
				-	}
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-/* return a page that has 'len' bytes left at the end */
			
 
				-static struct cfs_trace_page *
			
 
				-cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
			
 
				-{
			
 
				-	struct cfs_trace_page *tage;
			
 
				-
			
 
				-	if (tcd->tcd_cur_pages > 0) {
			
 
				-		__LASSERT(!list_empty(&tcd->tcd_pages));
			
 
				-		tage = cfs_tage_from_list(tcd->tcd_pages.prev);
			
 
				-		if (tage->used + len <= PAGE_SIZE)
			
 
				-			return tage;
			
 
				-	}
			
 
				-
			
 
				-	if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
			
 
				-		if (tcd->tcd_cur_stock_pages > 0) {
			
 
				-			tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
			
 
				-			--tcd->tcd_cur_stock_pages;
			
 
				-			list_del_init(&tage->linkage);
			
 
				-		} else {
			
 
				-			tage = cfs_tage_alloc(GFP_ATOMIC);
			
 
				-			if (unlikely(!tage)) {
			
 
				-				if (!(current->flags & PF_MEMALLOC) ||
			
 
				-				    in_interrupt())
			
 
				-					pr_warn_ratelimited("cannot allocate a tage (%ld)\n",
			
 
				-							    tcd->tcd_cur_pages);
			
 
				-				return NULL;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		tage->used = 0;
			
 
				-		tage->cpu = smp_processor_id();
			
 
				-		tage->type = tcd->tcd_type;
			
 
				-		list_add_tail(&tage->linkage, &tcd->tcd_pages);
			
 
				-		tcd->tcd_cur_pages++;
			
 
				-
			
 
				-		if (tcd->tcd_cur_pages > 8 && thread_running) {
			
 
				-			struct tracefiled_ctl *tctl = &trace_tctl;
			
 
				-			/*
			
 
				-			 * wake up tracefiled to process some pages.
			
 
				-			 */
			
 
				-			wake_up(&tctl->tctl_waitq);
			
 
				-		}
			
 
				-		return tage;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
			
 
				-{
			
 
				-	int pgcount = tcd->tcd_cur_pages / 10;
			
 
				-	struct page_collection pc;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-
			
 
				-	/*
			
 
				-	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
			
 
				-	 * from here: this will lead to infinite recursion.
			
 
				-	 */
			
 
				-
			
 
				-	pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
			
 
				-			    pgcount + 1, tcd->tcd_cur_pages);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&pc.pc_pages);
			
 
				-
			
 
				-	list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
			
 
				-		if (!pgcount--)
			
 
				-			break;
			
 
				-
			
 
				-		list_move_tail(&tage->linkage, &pc.pc_pages);
			
 
				-		tcd->tcd_cur_pages--;
			
 
				-	}
			
 
				-	put_pages_on_tcd_daemon_list(&pc, tcd);
			
 
				-}
			
 
				-
			
 
				-/* return a page that has 'len' bytes left at the end */
			
 
				-static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
			
 
				-						 unsigned long len)
			
 
				-{
			
 
				-	struct cfs_trace_page *tage;
			
 
				-
			
 
				-	/*
			
 
				-	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
			
 
				-	 * from here: this will lead to infinite recursion.
			
 
				-	 */
			
 
				-
			
 
				-	if (len > PAGE_SIZE) {
			
 
				-		pr_err("cowardly refusing to write %lu bytes in a page\n", len);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	tage = cfs_trace_get_tage_try(tcd, len);
			
 
				-	if (tage)
			
 
				-		return tage;
			
 
				-	if (thread_running)
			
 
				-		cfs_tcd_shrink(tcd);
			
 
				-	if (tcd->tcd_cur_pages > 0) {
			
 
				-		tage = cfs_tage_from_list(tcd->tcd_pages.next);
			
 
				-		tage->used = 0;
			
 
				-		cfs_tage_to_tail(tage, &tcd->tcd_pages);
			
 
				-	}
			
 
				-	return tage;
			
 
				-}
			
 
				-
			
 
				-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
			
 
				-		     const char *format, ...)
			
 
				-{
			
 
				-	va_list args;
			
 
				-	int rc;
			
 
				-
			
 
				-	va_start(args, format);
			
 
				-	rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
			
 
				-	va_end(args);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_debug_msg);
			
 
				-
			
 
				-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
			
 
				-		       const char *format1, va_list args,
			
 
				-		       const char *format2, ...)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd = NULL;
			
 
				-	struct ptldebug_header header = { 0 };
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	/* string_buf is used only if tcd != NULL, and is always set then */
			
 
				-	char *string_buf = NULL;
			
 
				-	char *debug_buf;
			
 
				-	int known_size;
			
 
				-	int needed = 85; /* average message length */
			
 
				-	int max_nob;
			
 
				-	va_list ap;
			
 
				-	int depth;
			
 
				-	int i;
			
 
				-	int remain;
			
 
				-	int mask = msgdata->msg_mask;
			
 
				-	const char *file = kbasename(msgdata->msg_file);
			
 
				-	struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
			
 
				-
			
 
				-	tcd = cfs_trace_get_tcd();
			
 
				-
			
 
				-	/* cfs_trace_get_tcd() grabs a lock, which disables preemption and
			
 
				-	 * pins us to a particular CPU.  This avoids an smp_processor_id()
			
 
				-	 * warning on Linux when debugging is enabled.
			
 
				-	 */
			
 
				-	cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
			
 
				-
			
 
				-	if (!tcd)		/* arch may not log in IRQ context */
			
 
				-		goto console;
			
 
				-
			
 
				-	if (!tcd->tcd_cur_pages)
			
 
				-		header.ph_flags |= PH_FLAG_FIRST_RECORD;
			
 
				-
			
 
				-	if (tcd->tcd_shutting_down) {
			
 
				-		cfs_trace_put_tcd(tcd);
			
 
				-		tcd = NULL;
			
 
				-		goto console;
			
 
				-	}
			
 
				-
			
 
				-	depth = 0;
			
 
				-	known_size = strlen(file) + 1 + depth;
			
 
				-	if (msgdata->msg_fn)
			
 
				-		known_size += strlen(msgdata->msg_fn) + 1;
			
 
				-
			
 
				-	if (libcfs_debug_binary)
			
 
				-		known_size += sizeof(header);
			
 
				-
			
 
				-	/*
			
 
				-	 * '2' used because vsnprintf return real size required for output
			
 
				-	 * _without_ terminating NULL.
			
 
				-	 * if needed is to small for this format.
			
 
				-	 */
			
 
				-	for (i = 0; i < 2; i++) {
			
 
				-		tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
			
 
				-		if (!tage) {
			
 
				-			if (needed + known_size > PAGE_SIZE)
			
 
				-				mask |= D_ERROR;
			
 
				-
			
 
				-			cfs_trace_put_tcd(tcd);
			
 
				-			tcd = NULL;
			
 
				-			goto console;
			
 
				-		}
			
 
				-
			
 
				-		string_buf = (char *)page_address(tage->page) +
			
 
				-					tage->used + known_size;
			
 
				-
			
 
				-		max_nob = PAGE_SIZE - tage->used - known_size;
			
 
				-		if (max_nob <= 0) {
			
 
				-			pr_emerg("negative max_nob: %d\n", max_nob);
			
 
				-			mask |= D_ERROR;
			
 
				-			cfs_trace_put_tcd(tcd);
			
 
				-			tcd = NULL;
			
 
				-			goto console;
			
 
				-		}
			
 
				-
			
 
				-		needed = 0;
			
 
				-		if (format1) {
			
 
				-			va_copy(ap, args);
			
 
				-			needed = vsnprintf(string_buf, max_nob, format1, ap);
			
 
				-			va_end(ap);
			
 
				-		}
			
 
				-
			
 
				-		if (format2) {
			
 
				-			remain = max_nob - needed;
			
 
				-			if (remain < 0)
			
 
				-				remain = 0;
			
 
				-
			
 
				-			va_start(ap, format2);
			
 
				-			needed += vsnprintf(string_buf + needed, remain,
			
 
				-					    format2, ap);
			
 
				-			va_end(ap);
			
 
				-		}
			
 
				-
			
 
				-		if (needed < max_nob) /* well. printing ok.. */
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (*(string_buf + needed - 1) != '\n')
			
 
				-		pr_info("format at %s:%d:%s doesn't end in newline\n", file,
			
 
				-			msgdata->msg_line, msgdata->msg_fn);
			
 
				-
			
 
				-	header.ph_len = known_size + needed;
			
 
				-	debug_buf = (char *)page_address(tage->page) + tage->used;
			
 
				-
			
 
				-	if (libcfs_debug_binary) {
			
 
				-		memcpy(debug_buf, &header, sizeof(header));
			
 
				-		tage->used += sizeof(header);
			
 
				-		debug_buf += sizeof(header);
			
 
				-	}
			
 
				-
			
 
				-	/* indent message according to the nesting level */
			
 
				-	while (depth-- > 0) {
			
 
				-		*(debug_buf++) = '.';
			
 
				-		++tage->used;
			
 
				-	}
			
 
				-
			
 
				-	strcpy(debug_buf, file);
			
 
				-	tage->used += strlen(file) + 1;
			
 
				-	debug_buf += strlen(file) + 1;
			
 
				-
			
 
				-	if (msgdata->msg_fn) {
			
 
				-		strcpy(debug_buf, msgdata->msg_fn);
			
 
				-		tage->used += strlen(msgdata->msg_fn) + 1;
			
 
				-		debug_buf += strlen(msgdata->msg_fn) + 1;
			
 
				-	}
			
 
				-
			
 
				-	__LASSERT(debug_buf == string_buf);
			
 
				-
			
 
				-	tage->used += needed;
			
 
				-	__LASSERT(tage->used <= PAGE_SIZE);
			
 
				-
			
 
				-console:
			
 
				-	if (!(mask & libcfs_printk)) {
			
 
				-		/* no console output requested */
			
 
				-		if (tcd)
			
 
				-			cfs_trace_put_tcd(tcd);
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	if (cdls) {
			
 
				-		if (libcfs_console_ratelimit &&
			
 
				-		    cdls->cdls_next &&		/* not first time ever */
			
 
				-		    !time_after(jiffies, cdls->cdls_next)) {
			
 
				-			/* skipping a console message */
			
 
				-			cdls->cdls_count++;
			
 
				-			if (tcd)
			
 
				-				cfs_trace_put_tcd(tcd);
			
 
				-			return 1;
			
 
				-		}
			
 
				-
			
 
				-		if (time_after(jiffies,
			
 
				-			       cdls->cdls_next + libcfs_console_max_delay +
			
 
				-			       10 * HZ)) {
			
 
				-			/* last timeout was a long time ago */
			
 
				-			cdls->cdls_delay /= libcfs_console_backoff * 4;
			
 
				-		} else {
			
 
				-			cdls->cdls_delay *= libcfs_console_backoff;
			
 
				-		}
			
 
				-
			
 
				-		if (cdls->cdls_delay < libcfs_console_min_delay)
			
 
				-			cdls->cdls_delay = libcfs_console_min_delay;
			
 
				-		else if (cdls->cdls_delay > libcfs_console_max_delay)
			
 
				-			cdls->cdls_delay = libcfs_console_max_delay;
			
 
				-
			
 
				-		/* ensure cdls_next is never zero after it's been seen */
			
 
				-		cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
			
 
				-	}
			
 
				-
			
 
				-	if (tcd) {
			
 
				-		cfs_print_to_console(&header, mask, string_buf, needed, file,
			
 
				-				     msgdata->msg_fn);
			
 
				-		cfs_trace_put_tcd(tcd);
			
 
				-	} else {
			
 
				-		string_buf = cfs_trace_get_console_buffer();
			
 
				-
			
 
				-		needed = 0;
			
 
				-		if (format1) {
			
 
				-			va_copy(ap, args);
			
 
				-			needed = vsnprintf(string_buf,
			
 
				-					   CFS_TRACE_CONSOLE_BUFFER_SIZE,
			
 
				-					   format1, ap);
			
 
				-			va_end(ap);
			
 
				-		}
			
 
				-		if (format2) {
			
 
				-			remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
			
 
				-			if (remain > 0) {
			
 
				-				va_start(ap, format2);
			
 
				-				needed += vsnprintf(string_buf + needed, remain,
			
 
				-						    format2, ap);
			
 
				-				va_end(ap);
			
 
				-			}
			
 
				-		}
			
 
				-		cfs_print_to_console(&header, mask,
			
 
				-				     string_buf, needed, file, msgdata->msg_fn);
			
 
				-
			
 
				-		put_cpu();
			
 
				-	}
			
 
				-
			
 
				-	if (cdls && cdls->cdls_count) {
			
 
				-		string_buf = cfs_trace_get_console_buffer();
			
 
				-
			
 
				-		needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
			
 
				-				  "Skipped %d previous similar message%s\n",
			
 
				-				  cdls->cdls_count,
			
 
				-				  (cdls->cdls_count > 1) ? "s" : "");
			
 
				-
			
 
				-		cfs_print_to_console(&header, mask,
			
 
				-				     string_buf, needed, file, msgdata->msg_fn);
			
 
				-
			
 
				-		put_cpu();
			
 
				-		cdls->cdls_count = 0;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_debug_vmsg2);
			
 
				-
			
 
				-void
			
 
				-cfs_trace_assertion_failed(const char *str,
			
 
				-			   struct libcfs_debug_msg_data *msgdata)
			
 
				-{
			
 
				-	struct ptldebug_header hdr;
			
 
				-
			
 
				-	libcfs_panic_in_progress = 1;
			
 
				-	libcfs_catastrophe = 1;
			
 
				-	mb();
			
 
				-
			
 
				-	cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
			
 
				-
			
 
				-	cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
			
 
				-			     msgdata->msg_file, msgdata->msg_fn);
			
 
				-
			
 
				-	panic("Lustre debug assertion failure\n");
			
 
				-
			
 
				-	/* not reached */
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-panic_collect_pages(struct page_collection *pc)
			
 
				-{
			
 
				-	/* Do the collect_pages job on a single CPU: assumes that all other
			
 
				-	 * CPUs have been stopped during a panic.  If this isn't true for some
			
 
				-	 * arch, this will have to be implemented separately in each arch.
			
 
				-	 */
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&pc->pc_pages);
			
 
				-
			
 
				-	cfs_tcd_for_each(tcd, i, j) {
			
 
				-		list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
			
 
				-		tcd->tcd_cur_pages = 0;
			
 
				-
			
 
				-		if (pc->pc_want_daemon_pages) {
			
 
				-			list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
			
 
				-			tcd->tcd_cur_daemon_pages = 0;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void collect_pages_on_all_cpus(struct page_collection *pc)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	int i, cpu;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
			
 
				-			list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
			
 
				-			tcd->tcd_cur_pages = 0;
			
 
				-			if (pc->pc_want_daemon_pages) {
			
 
				-				list_splice_init(&tcd->tcd_daemon_pages,
			
 
				-						 &pc->pc_pages);
			
 
				-				tcd->tcd_cur_daemon_pages = 0;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void collect_pages(struct page_collection *pc)
			
 
				-{
			
 
				-	INIT_LIST_HEAD(&pc->pc_pages);
			
 
				-
			
 
				-	if (libcfs_panic_in_progress)
			
 
				-		panic_collect_pages(pc);
			
 
				-	else
			
 
				-		collect_pages_on_all_cpus(pc);
			
 
				-}
			
 
				-
			
 
				-static void put_pages_back_on_all_cpus(struct page_collection *pc)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	struct list_head *cur_head;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-	int i, cpu;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
			
 
				-			cur_head = tcd->tcd_pages.next;
			
 
				-
			
 
				-			list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
			
 
				-						 linkage) {
			
 
				-				__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-				if (tage->cpu != cpu || tage->type != i)
			
 
				-					continue;
			
 
				-
			
 
				-				cfs_tage_to_tail(tage, cur_head);
			
 
				-				tcd->tcd_cur_pages++;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void put_pages_back(struct page_collection *pc)
			
 
				-{
			
 
				-	if (!libcfs_panic_in_progress)
			
 
				-		put_pages_back_on_all_cpus(pc);
			
 
				-}
			
 
				-
			
 
				-/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
			
 
				- * we have a good amount of data at all times for dumping during an LBUG, even
			
 
				- * if we have been steadily writing (and otherwise discarding) pages via the
			
 
				- * debug daemon.
			
 
				- */
			
 
				-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
			
 
				-					 struct cfs_trace_cpu_data *tcd)
			
 
				-{
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-
			
 
				-	list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
			
 
				-		__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-		if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
			
 
				-			continue;
			
 
				-
			
 
				-		cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
			
 
				-		tcd->tcd_cur_daemon_pages++;
			
 
				-
			
 
				-		if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
			
 
				-			struct cfs_trace_page *victim;
			
 
				-
			
 
				-			__LASSERT(!list_empty(&tcd->tcd_daemon_pages));
			
 
				-			victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
			
 
				-
			
 
				-			__LASSERT_TAGE_INVARIANT(victim);
			
 
				-
			
 
				-			list_del(&victim->linkage);
			
 
				-			cfs_tage_free(victim);
			
 
				-			tcd->tcd_cur_daemon_pages--;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void put_pages_on_daemon_list(struct page_collection *pc)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	int i, cpu;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cfs_tcd_for_each_type_lock(tcd, i, cpu)
			
 
				-			put_pages_on_tcd_daemon_list(pc, tcd);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void cfs_trace_debug_print(void)
			
 
				-{
			
 
				-	struct page_collection pc;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-
			
 
				-	pc.pc_want_daemon_pages = 1;
			
 
				-	collect_pages(&pc);
			
 
				-	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
			
 
				-		char *p, *file, *fn;
			
 
				-		struct page *page;
			
 
				-
			
 
				-		__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-		page = tage->page;
			
 
				-		p = page_address(page);
			
 
				-		while (p < ((char *)page_address(page) + tage->used)) {
			
 
				-			struct ptldebug_header *hdr;
			
 
				-			int len;
			
 
				-
			
 
				-			hdr = (void *)p;
			
 
				-			p += sizeof(*hdr);
			
 
				-			file = p;
			
 
				-			p += strlen(file) + 1;
			
 
				-			fn = p;
			
 
				-			p += strlen(fn) + 1;
			
 
				-			len = hdr->ph_len - (int)(p - (char *)hdr);
			
 
				-
			
 
				-			cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
			
 
				-
			
 
				-			p += len;
			
 
				-		}
			
 
				-
			
 
				-		list_del(&tage->linkage);
			
 
				-		cfs_tage_free(tage);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int cfs_tracefile_dump_all_pages(char *filename)
			
 
				-{
			
 
				-	struct page_collection pc;
			
 
				-	struct file *filp;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-	char *buf;
			
 
				-	mm_segment_t __oldfs;
			
 
				-	int rc;
			
 
				-
			
 
				-	cfs_tracefile_write_lock();
			
 
				-
			
 
				-	filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
			
 
				-			 0600);
			
 
				-	if (IS_ERR(filp)) {
			
 
				-		rc = PTR_ERR(filp);
			
 
				-		filp = NULL;
			
 
				-		pr_err("LustreError: can't open %s for dump: rc %d\n",
			
 
				-		       filename, rc);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	pc.pc_want_daemon_pages = 1;
			
 
				-	collect_pages(&pc);
			
 
				-	if (list_empty(&pc.pc_pages)) {
			
 
				-		rc = 0;
			
 
				-		goto close;
			
 
				-	}
			
 
				-	__oldfs = get_fs();
			
 
				-	set_fs(get_ds());
			
 
				-
			
 
				-	/* ok, for now, just write the pages.  in the future we'll be building
			
 
				-	 * iobufs with the pages and calling generic_direct_IO
			
 
				-	 */
			
 
				-	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
			
 
				-		__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-		buf = kmap(tage->page);
			
 
				-		rc = kernel_write(filp, buf, tage->used, &filp->f_pos);
			
 
				-		kunmap(tage->page);
			
 
				-
			
 
				-		if (rc != (int)tage->used) {
			
 
				-			pr_warn("wanted to write %u but wrote %d\n", tage->used,
			
 
				-				rc);
			
 
				-			put_pages_back(&pc);
			
 
				-			__LASSERT(list_empty(&pc.pc_pages));
			
 
				-			break;
			
 
				-		}
			
 
				-		list_del(&tage->linkage);
			
 
				-		cfs_tage_free(tage);
			
 
				-	}
			
 
				-	set_fs(__oldfs);
			
 
				-	rc = vfs_fsync(filp, 1);
			
 
				-	if (rc)
			
 
				-		pr_err("sync returns %d\n", rc);
			
 
				-close:
			
 
				-	filp_close(filp, NULL);
			
 
				-out:
			
 
				-	cfs_tracefile_write_unlock();
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void cfs_trace_flush_pages(void)
			
 
				-{
			
 
				-	struct page_collection pc;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-
			
 
				-	pc.pc_want_daemon_pages = 1;
			
 
				-	collect_pages(&pc);
			
 
				-	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
			
 
				-		__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-		list_del(&tage->linkage);
			
 
				-		cfs_tage_free(tage);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
			
 
				-			    const char __user *usr_buffer, int usr_buffer_nob)
			
 
				-{
			
 
				-	int nob;
			
 
				-
			
 
				-	if (usr_buffer_nob > knl_buffer_nob)
			
 
				-		return -EOVERFLOW;
			
 
				-
			
 
				-	if (copy_from_user((void *)knl_buffer,
			
 
				-			   usr_buffer, usr_buffer_nob))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	nob = strnlen(knl_buffer, usr_buffer_nob);
			
 
				-	while (--nob >= 0)		      /* strip trailing whitespace */
			
 
				-		if (!isspace(knl_buffer[nob]))
			
 
				-			break;
			
 
				-
			
 
				-	if (nob < 0)			    /* empty string */
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (nob == knl_buffer_nob)	      /* no space to terminate */
			
 
				-		return -EOVERFLOW;
			
 
				-
			
 
				-	knl_buffer[nob + 1] = 0;		/* terminate */
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_trace_copyin_string);
			
 
				-
			
 
				-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
			
 
				-			     const char *knl_buffer, char *append)
			
 
				-{
			
 
				-	/*
			
 
				-	 * NB if 'append' != NULL, it's a single character to append to the
			
 
				-	 * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
			
 
				-	 */
			
 
				-	int nob = strlen(knl_buffer);
			
 
				-
			
 
				-	if (nob > usr_buffer_nob)
			
 
				-		nob = usr_buffer_nob;
			
 
				-
			
 
				-	if (copy_to_user(usr_buffer, knl_buffer, nob))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	if (append && nob < usr_buffer_nob) {
			
 
				-		if (copy_to_user(usr_buffer + nob, append, 1))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		nob++;
			
 
				-	}
			
 
				-
			
 
				-	return nob;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_trace_copyout_string);
			
 
				-
			
 
				-int cfs_trace_allocate_string_buffer(char **str, int nob)
			
 
				-{
			
 
				-	if (nob > 2 * PAGE_SIZE)	    /* string must be "sensible" */
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
			
 
				-	if (!*str)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
			
 
				-{
			
 
				-	char *str;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
			
 
				-				     usr_str, usr_str_nob);
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (str[0] != '/') {
			
 
				-		rc = -EINVAL;
			
 
				-		goto out;
			
 
				-	}
			
 
				-	rc = cfs_tracefile_dump_all_pages(str);
			
 
				-out:
			
 
				-	kfree(str);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_daemon_command(char *str)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	cfs_tracefile_write_lock();
			
 
				-
			
 
				-	if (!strcmp(str, "stop")) {
			
 
				-		cfs_tracefile_write_unlock();
			
 
				-		cfs_trace_stop_thread();
			
 
				-		cfs_tracefile_write_lock();
			
 
				-		memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
			
 
				-
			
 
				-	} else if (!strncmp(str, "size=", 5)) {
			
 
				-		unsigned long tmp;
			
 
				-
			
 
				-		rc = kstrtoul(str + 5, 10, &tmp);
			
 
				-		if (!rc) {
			
 
				-			if (tmp < 10 || tmp > 20480)
			
 
				-				cfs_tracefile_size = CFS_TRACEFILE_SIZE;
			
 
				-			else
			
 
				-				cfs_tracefile_size = tmp << 20;
			
 
				-		}
			
 
				-	} else if (strlen(str) >= sizeof(cfs_tracefile)) {
			
 
				-		rc = -ENAMETOOLONG;
			
 
				-	} else if (str[0] != '/') {
			
 
				-		rc = -EINVAL;
			
 
				-	} else {
			
 
				-		strcpy(cfs_tracefile, str);
			
 
				-
			
 
				-		pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n",
			
 
				-			cfs_tracefile,
			
 
				-			(long)(cfs_tracefile_size >> 10));
			
 
				-
			
 
				-		cfs_trace_start_thread();
			
 
				-	}
			
 
				-
			
 
				-	cfs_tracefile_write_unlock();
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
			
 
				-{
			
 
				-	char *str;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
			
 
				-				     usr_str, usr_str_nob);
			
 
				-	if (!rc)
			
 
				-		rc = cfs_trace_daemon_command(str);
			
 
				-
			
 
				-	kfree(str);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_set_debug_mb(int mb)
			
 
				-{
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	int pages;
			
 
				-	int limit = cfs_trace_max_debug_mb();
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-
			
 
				-	if (mb < num_possible_cpus()) {
			
 
				-		pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n",
			
 
				-			mb, num_possible_cpus());
			
 
				-		mb = num_possible_cpus();
			
 
				-	}
			
 
				-
			
 
				-	if (mb > limit) {
			
 
				-		pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n",
			
 
				-			mb, limit);
			
 
				-		mb = limit;
			
 
				-	}
			
 
				-
			
 
				-	mb /= num_possible_cpus();
			
 
				-	pages = mb << (20 - PAGE_SHIFT);
			
 
				-
			
 
				-	cfs_tracefile_write_lock();
			
 
				-
			
 
				-	cfs_tcd_for_each(tcd, i, j)
			
 
				-		tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
			
 
				-
			
 
				-	cfs_tracefile_write_unlock();
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_get_debug_mb(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	int total_pages = 0;
			
 
				-
			
 
				-	cfs_tracefile_read_lock();
			
 
				-
			
 
				-	cfs_tcd_for_each(tcd, i, j)
			
 
				-		total_pages += tcd->tcd_max_pages;
			
 
				-
			
 
				-	cfs_tracefile_read_unlock();
			
 
				-
			
 
				-	return (total_pages >> (20 - PAGE_SHIFT)) + 1;
			
 
				-}
			
 
				-
			
 
				-static int tracefiled(void *arg)
			
 
				-{
			
 
				-	struct page_collection pc;
			
 
				-	struct tracefiled_ctl *tctl = arg;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-	struct file *filp;
			
 
				-	char *buf;
			
 
				-	int last_loop = 0;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* we're started late enough that we pick up init's fs context */
			
 
				-	/* this is so broken in uml?  what on earth is going on? */
			
 
				-
			
 
				-	complete(&tctl->tctl_start);
			
 
				-
			
 
				-	while (1) {
			
 
				-		wait_queue_entry_t __wait;
			
 
				-
			
 
				-		pc.pc_want_daemon_pages = 0;
			
 
				-		collect_pages(&pc);
			
 
				-		if (list_empty(&pc.pc_pages))
			
 
				-			goto end_loop;
			
 
				-
			
 
				-		filp = NULL;
			
 
				-		cfs_tracefile_read_lock();
			
 
				-		if (cfs_tracefile[0]) {
			
 
				-			filp = filp_open(cfs_tracefile,
			
 
				-					 O_CREAT | O_RDWR | O_LARGEFILE,
			
 
				-					 0600);
			
 
				-			if (IS_ERR(filp)) {
			
 
				-				rc = PTR_ERR(filp);
			
 
				-				filp = NULL;
			
 
				-				pr_warn("couldn't open %s: %d\n", cfs_tracefile,
			
 
				-					rc);
			
 
				-			}
			
 
				-		}
			
 
				-		cfs_tracefile_read_unlock();
			
 
				-		if (!filp) {
			
 
				-			put_pages_on_daemon_list(&pc);
			
 
				-			__LASSERT(list_empty(&pc.pc_pages));
			
 
				-			goto end_loop;
			
 
				-		}
			
 
				-
			
 
				-		list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
			
 
				-			static loff_t f_pos;
			
 
				-
			
 
				-			__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-			if (f_pos >= (off_t)cfs_tracefile_size)
			
 
				-				f_pos = 0;
			
 
				-			else if (f_pos > i_size_read(file_inode(filp)))
			
 
				-				f_pos = i_size_read(file_inode(filp));
			
 
				-
			
 
				-			buf = kmap(tage->page);
			
 
				-			rc = kernel_write(filp, buf, tage->used, &f_pos);
			
 
				-			kunmap(tage->page);
			
 
				-
			
 
				-			if (rc != (int)tage->used) {
			
 
				-				pr_warn("wanted to write %u but wrote %d\n",
			
 
				-					tage->used, rc);
			
 
				-				put_pages_back(&pc);
			
 
				-				__LASSERT(list_empty(&pc.pc_pages));
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		filp_close(filp, NULL);
			
 
				-		put_pages_on_daemon_list(&pc);
			
 
				-		if (!list_empty(&pc.pc_pages)) {
			
 
				-			int i;
			
 
				-
			
 
				-			pr_alert("trace pages aren't empty\n");
			
 
				-			pr_err("total cpus(%d): ", num_possible_cpus());
			
 
				-			for (i = 0; i < num_possible_cpus(); i++)
			
 
				-				if (cpu_online(i))
			
 
				-					pr_cont("%d(on) ", i);
			
 
				-				else
			
 
				-					pr_cont("%d(off) ", i);
			
 
				-			pr_cont("\n");
			
 
				-
			
 
				-			i = 0;
			
 
				-			list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
			
 
				-						 linkage)
			
 
				-				pr_err("page %d belongs to cpu %d\n",
			
 
				-				       ++i, tage->cpu);
			
 
				-			pr_err("There are %d pages unwritten\n", i);
			
 
				-		}
			
 
				-		__LASSERT(list_empty(&pc.pc_pages));
			
 
				-end_loop:
			
 
				-		if (atomic_read(&tctl->tctl_shutdown)) {
			
 
				-			if (!last_loop) {
			
 
				-				last_loop = 1;
			
 
				-				continue;
			
 
				-			} else {
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		init_waitqueue_entry(&__wait, current);
			
 
				-		add_wait_queue(&tctl->tctl_waitq, &__wait);
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ);
			
 
				-		remove_wait_queue(&tctl->tctl_waitq, &__wait);
			
 
				-	}
			
 
				-	complete(&tctl->tctl_stop);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_start_thread(void)
			
 
				-{
			
 
				-	struct tracefiled_ctl *tctl = &trace_tctl;
			
 
				-	struct task_struct *task;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	mutex_lock(&cfs_trace_thread_mutex);
			
 
				-	if (thread_running)
			
 
				-		goto out;
			
 
				-
			
 
				-	init_completion(&tctl->tctl_start);
			
 
				-	init_completion(&tctl->tctl_stop);
			
 
				-	init_waitqueue_head(&tctl->tctl_waitq);
			
 
				-	atomic_set(&tctl->tctl_shutdown, 0);
			
 
				-
			
 
				-	task = kthread_run(tracefiled, tctl, "ktracefiled");
			
 
				-	if (IS_ERR(task)) {
			
 
				-		rc = PTR_ERR(task);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	wait_for_completion(&tctl->tctl_start);
			
 
				-	thread_running = 1;
			
 
				-out:
			
 
				-	mutex_unlock(&cfs_trace_thread_mutex);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void cfs_trace_stop_thread(void)
			
 
				-{
			
 
				-	struct tracefiled_ctl *tctl = &trace_tctl;
			
 
				-
			
 
				-	mutex_lock(&cfs_trace_thread_mutex);
			
 
				-	if (thread_running) {
			
 
				-		pr_info("shutting down debug daemon thread...\n");
			
 
				-		atomic_set(&tctl->tctl_shutdown, 1);
			
 
				-		wait_for_completion(&tctl->tctl_stop);
			
 
				-		thread_running = 0;
			
 
				-	}
			
 
				-	mutex_unlock(&cfs_trace_thread_mutex);
			
 
				-}
			
 
				-
			
 
				-int cfs_tracefile_init(int max_pages)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-	int rc;
			
 
				-	int factor;
			
 
				-
			
 
				-	rc = cfs_tracefile_init_arch();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	cfs_tcd_for_each(tcd, i, j) {
			
 
				-		/* tcd_pages_factor is initialized int tracefile_init_arch. */
			
 
				-		factor = tcd->tcd_pages_factor;
			
 
				-		INIT_LIST_HEAD(&tcd->tcd_pages);
			
 
				-		INIT_LIST_HEAD(&tcd->tcd_stock_pages);
			
 
				-		INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
			
 
				-		tcd->tcd_cur_pages = 0;
			
 
				-		tcd->tcd_cur_stock_pages = 0;
			
 
				-		tcd->tcd_cur_daemon_pages = 0;
			
 
				-		tcd->tcd_max_pages = (max_pages * factor) / 100;
			
 
				-		LASSERT(tcd->tcd_max_pages > 0);
			
 
				-		tcd->tcd_shutting_down = 0;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void trace_cleanup_on_all_cpus(void)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd;
			
 
				-	struct cfs_trace_page *tage;
			
 
				-	struct cfs_trace_page *tmp;
			
 
				-	int i, cpu;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
			
 
				-			tcd->tcd_shutting_down = 1;
			
 
				-
			
 
				-			list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
			
 
				-						 linkage) {
			
 
				-				__LASSERT_TAGE_INVARIANT(tage);
			
 
				-
			
 
				-				list_del(&tage->linkage);
			
 
				-				cfs_tage_free(tage);
			
 
				-			}
			
 
				-
			
 
				-			tcd->tcd_cur_pages = 0;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void cfs_trace_cleanup(void)
			
 
				-{
			
 
				-	struct page_collection pc;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&pc.pc_pages);
			
 
				-
			
 
				-	trace_cleanup_on_all_cpus();
			
 
				-
			
 
				-	cfs_tracefile_fini_arch();
			
 
				-}
			
 
				-
			
 
				-void cfs_tracefile_exit(void)
			
 
				-{
			
 
				-	cfs_trace_stop_thread();
			
 
				-	cfs_trace_cleanup();
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h
@@ -1,274 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef __LIBCFS_TRACEFILE_H__
			
 
				-#define __LIBCFS_TRACEFILE_H__
			
 
				-
			
 
				-#include <linux/spinlock.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <linux/cache.h>
			
 
				-#include <linux/threads.h>
			
 
				-#include <linux/limits.h>
			
 
				-#include <linux/smp.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-
			
 
				-enum cfs_trace_buf_type {
			
 
				-	CFS_TCD_TYPE_PROC = 0,
			
 
				-	CFS_TCD_TYPE_SOFTIRQ,
			
 
				-	CFS_TCD_TYPE_IRQ,
			
 
				-	CFS_TCD_TYPE_MAX
			
 
				-};
			
 
				-
			
 
				-/* trace file lock routines */
			
 
				-
			
 
				-#define TRACEFILE_NAME_SIZE 1024
			
 
				-extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
			
 
				-extern long long cfs_tracefile_size;
			
 
				-
			
 
				-/**
			
 
				- * The path of debug log dump upcall script.
			
 
				- */
			
 
				-extern char lnet_debug_log_upcall[1024];
			
 
				-
			
 
				-void libcfs_run_debug_log_upcall(char *file);
			
 
				-
			
 
				-int  cfs_tracefile_init_arch(void);
			
 
				-void cfs_tracefile_fini_arch(void);
			
 
				-
			
 
				-void cfs_tracefile_read_lock(void);
			
 
				-void cfs_tracefile_read_unlock(void);
			
 
				-void cfs_tracefile_write_lock(void);
			
 
				-void cfs_tracefile_write_unlock(void);
			
 
				-
			
 
				-int cfs_tracefile_dump_all_pages(char *filename);
			
 
				-void cfs_trace_debug_print(void);
			
 
				-void cfs_trace_flush_pages(void);
			
 
				-int cfs_trace_start_thread(void);
			
 
				-void cfs_trace_stop_thread(void);
			
 
				-int cfs_tracefile_init(int max_pages);
			
 
				-void cfs_tracefile_exit(void);
			
 
				-
			
 
				-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
			
 
				-			    const char __user *usr_buffer, int usr_buffer_nob);
			
 
				-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
			
 
				-			     const char *knl_str, char *append);
			
 
				-int cfs_trace_allocate_string_buffer(char **str, int nob);
			
 
				-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob);
			
 
				-int cfs_trace_daemon_command(char *str);
			
 
				-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob);
			
 
				-int cfs_trace_set_debug_mb(int mb);
			
 
				-int cfs_trace_get_debug_mb(void);
			
 
				-
			
 
				-void libcfs_debug_dumplog_internal(void *arg);
			
 
				-void libcfs_register_panic_notifier(void);
			
 
				-void libcfs_unregister_panic_notifier(void);
			
 
				-extern int libcfs_panic_in_progress;
			
 
				-int cfs_trace_max_debug_mb(void);
			
 
				-
			
 
				-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
			
 
				-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
			
 
				-#define CFS_TRACEFILE_SIZE (500 << 20)
			
 
				-
			
 
				-#ifdef LUSTRE_TRACEFILE_PRIVATE
			
 
				-
			
 
				-/*
			
 
				- * Private declare for tracefile
			
 
				- */
			
 
				-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
			
 
				-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
			
 
				-
			
 
				-#define CFS_TRACEFILE_SIZE (500 << 20)
			
 
				-
			
 
				-/*
			
 
				- * Size of a buffer for sprinting console messages if we can't get a page
			
 
				- * from system
			
 
				- */
			
 
				-#define CFS_TRACE_CONSOLE_BUFFER_SIZE   1024
			
 
				-
			
 
				-union cfs_trace_data_union {
			
 
				-	struct cfs_trace_cpu_data {
			
 
				-		/*
			
 
				-		 * Even though this structure is meant to be per-CPU, locking
			
 
				-		 * is needed because in some places the data may be accessed
			
 
				-		 * from other CPUs. This lock is directly used in trace_get_tcd
			
 
				-		 * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
			
 
				-		 * tcd_for_each_type_lock
			
 
				-		 */
			
 
				-		spinlock_t		tcd_lock;
			
 
				-		unsigned long		tcd_lock_flags;
			
 
				-
			
 
				-		/*
			
 
				-		 * pages with trace records not yet processed by tracefiled.
			
 
				-		 */
			
 
				-		struct list_head	tcd_pages;
			
 
				-		/* number of pages on ->tcd_pages */
			
 
				-		unsigned long		tcd_cur_pages;
			
 
				-
			
 
				-		/*
			
 
				-		 * pages with trace records already processed by
			
 
				-		 * tracefiled. These pages are kept in memory, so that some
			
 
				-		 * portion of log can be written in the event of LBUG. This
			
 
				-		 * list is maintained in LRU order.
			
 
				-		 *
			
 
				-		 * Pages are moved to ->tcd_daemon_pages by tracefiled()
			
 
				-		 * (put_pages_on_daemon_list()). LRU pages from this list are
			
 
				-		 * discarded when list grows too large.
			
 
				-		 */
			
 
				-		struct list_head	tcd_daemon_pages;
			
 
				-		/* number of pages on ->tcd_daemon_pages */
			
 
				-		unsigned long		tcd_cur_daemon_pages;
			
 
				-
			
 
				-		/*
			
 
				-		 * Maximal number of pages allowed on ->tcd_pages and
			
 
				-		 * ->tcd_daemon_pages each.
			
 
				-		 * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
			
 
				-		 * implementation.
			
 
				-		 */
			
 
				-		unsigned long		tcd_max_pages;
			
 
				-
			
 
				-		/*
			
 
				-		 * preallocated pages to write trace records into. Pages from
			
 
				-		 * ->tcd_stock_pages are moved to ->tcd_pages by
			
 
				-		 * portals_debug_msg().
			
 
				-		 *
			
 
				-		 * This list is necessary, because on some platforms it's
			
 
				-		 * impossible to perform efficient atomic page allocation in a
			
 
				-		 * non-blockable context.
			
 
				-		 *
			
 
				-		 * Such platforms fill ->tcd_stock_pages "on occasion", when
			
 
				-		 * tracing code is entered in blockable context.
			
 
				-		 *
			
 
				-		 * trace_get_tage_try() tries to get a page from
			
 
				-		 * ->tcd_stock_pages first and resorts to atomic page
			
 
				-		 * allocation only if this queue is empty. ->tcd_stock_pages
			
 
				-		 * is replenished when tracing code is entered in blocking
			
 
				-		 * context (darwin-tracefile.c:trace_get_tcd()). We try to
			
 
				-		 * maintain TCD_STOCK_PAGES (40 by default) pages in this
			
 
				-		 * queue. Atomic allocation is only required if more than
			
 
				-		 * TCD_STOCK_PAGES pagesful are consumed by trace records all
			
 
				-		 * emitted in non-blocking contexts. Which is quite unlikely.
			
 
				-		 */
			
 
				-		struct list_head	tcd_stock_pages;
			
 
				-		/* number of pages on ->tcd_stock_pages */
			
 
				-		unsigned long		tcd_cur_stock_pages;
			
 
				-
			
 
				-		unsigned short		tcd_shutting_down;
			
 
				-		unsigned short		tcd_cpu;
			
 
				-		unsigned short		tcd_type;
			
 
				-		/* The factors to share debug memory. */
			
 
				-		unsigned short		tcd_pages_factor;
			
 
				-	} tcd;
			
 
				-	char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
			
 
				-};
			
 
				-
			
 
				-#define TCD_MAX_TYPES      8
			
 
				-extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
			
 
				-
			
 
				-#define cfs_tcd_for_each(tcd, i, j)				       \
			
 
				-	for (i = 0; cfs_trace_data[i]; i++)				\
			
 
				-		for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd);	\
			
 
				-		     j < num_possible_cpus();				 \
			
 
				-		     j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
			
 
				-
			
 
				-#define cfs_tcd_for_each_type_lock(tcd, i, cpu)			   \
			
 
				-	for (i = 0; cfs_trace_data[i] &&				\
			
 
				-	     (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&			\
			
 
				-	     cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
			
 
				-
			
 
				-void cfs_set_ptldebug_header(struct ptldebug_header *header,
			
 
				-			     struct libcfs_debug_msg_data *m,
			
 
				-			     unsigned long stack);
			
 
				-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
			
 
				-			  const char *buf, int len, const char *file,
			
 
				-			  const char *fn);
			
 
				-
			
 
				-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
			
 
				-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
			
 
				-
			
 
				-extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
			
 
				-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void);
			
 
				-
			
 
				-static inline char *
			
 
				-cfs_trace_get_console_buffer(void)
			
 
				-{
			
 
				-	unsigned int i = get_cpu();
			
 
				-	unsigned int j = cfs_trace_buf_idx_get();
			
 
				-
			
 
				-	return cfs_trace_console_buffers[i][j];
			
 
				-}
			
 
				-
			
 
				-static inline struct cfs_trace_cpu_data *
			
 
				-cfs_trace_get_tcd(void)
			
 
				-{
			
 
				-	struct cfs_trace_cpu_data *tcd =
			
 
				-		&(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
			
 
				-
			
 
				-	cfs_trace_lock_tcd(tcd, 0);
			
 
				-
			
 
				-	return tcd;
			
 
				-}
			
 
				-
			
 
				-static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
			
 
				-{
			
 
				-	cfs_trace_unlock_tcd(tcd, 0);
			
 
				-
			
 
				-	put_cpu();
			
 
				-}
			
 
				-
			
 
				-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
			
 
				-			   struct list_head *stock);
			
 
				-
			
 
				-void cfs_trace_assertion_failed(const char *str,
			
 
				-				struct libcfs_debug_msg_data *m);
			
 
				-
			
 
				-/* ASSERTION that is safe to use within the debug system */
			
 
				-#define __LASSERT(cond)						 \
			
 
				-do {								    \
			
 
				-	if (unlikely(!(cond))) {					\
			
 
				-		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);     \
			
 
				-		cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
			
 
				-					   &msgdata);		   \
			
 
				-	}							       \
			
 
				-} while (0)
			
 
				-
			
 
				-#define __LASSERT_TAGE_INVARIANT(tage)				  \
			
 
				-do {								    \
			
 
				-	__LASSERT(tage);					\
			
 
				-	__LASSERT(tage->page);				  \
			
 
				-	__LASSERT(tage->used <= PAGE_SIZE);			 \
			
 
				-	__LASSERT(page_count(tage->page) > 0);		      \
			
 
				-} while (0)
			
 
				-
			
 
				-#endif	/* LUSTRE_TRACEFILE_PRIVATE */
			
 
				-
			
 
				-#endif /* __LIBCFS_TRACEFILE_H__ */
			
--- a/drivers/staging/lustre/lnet/lnet/Makefile
+++ b/drivers/staging/lustre/lnet/lnet/Makefile
@@ -1,10 +0,0 @@
 
				-# SPDX-License-Identifier: GPL-2.0
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
			
 
				-
			
 
				-obj-$(CONFIG_LNET) += lnet.o
			
 
				-
			
 
				-lnet-y := api-ni.o config.o nidstrings.o net_fault.o		\
			
 
				-	  lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o	\
			
 
				-	  lib-socket.o lib-move.o module.o lo.o			\
			
 
				-	  router.o router_proc.o acceptor.o peer.o
			
--- a/drivers/staging/lustre/lnet/lnet/acceptor.c
+++ b/drivers/staging/lustre/lnet/lnet/acceptor.c
@@ -1,501 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-#include <linux/completion.h>
			
 
				-#include <net/sock.h>
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-static int   accept_port    = 988;
			
 
				-static int   accept_backlog = 127;
			
 
				-static int   accept_timeout = 5;
			
 
				-
			
 
				-static struct {
			
 
				-	int			pta_shutdown;
			
 
				-	struct socket		*pta_sock;
			
 
				-	struct completion	pta_signal;
			
 
				-} lnet_acceptor_state = {
			
 
				-	.pta_shutdown = 1
			
 
				-};
			
 
				-
			
 
				-int
			
 
				-lnet_acceptor_port(void)
			
 
				-{
			
 
				-	return accept_port;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_acceptor_port);
			
 
				-
			
 
				-static inline int
			
 
				-lnet_accept_magic(__u32 magic, __u32 constant)
			
 
				-{
			
 
				-	return (magic == constant ||
			
 
				-		magic == __swab32(constant));
			
 
				-}
			
 
				-
			
 
				-static char *accept = "secure";
			
 
				-
			
 
				-module_param(accept, charp, 0444);
			
 
				-MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
			
 
				-module_param(accept_port, int, 0444);
			
 
				-MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
			
 
				-module_param(accept_backlog, int, 0444);
			
 
				-MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
			
 
				-module_param(accept_timeout, int, 0644);
			
 
				-MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
			
 
				-
			
 
				-static char *accept_type;
			
 
				-
			
 
				-static int
			
 
				-lnet_acceptor_get_tunables(void)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Userland acceptor uses 'accept_type' instead of 'accept', due to
			
 
				-	 * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
			
 
				-	 * for compatibility. Hence the trick.
			
 
				-	 */
			
 
				-	accept_type = accept;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_acceptor_timeout(void)
			
 
				-{
			
 
				-	return accept_timeout;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_acceptor_timeout);
			
 
				-
			
 
				-void
			
 
				-lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
			
 
				-			   __u32 peer_ip, int peer_port)
			
 
				-{
			
 
				-	switch (rc) {
			
 
				-	/* "normal" errors */
			
 
				-	case -ECONNREFUSED:
			
 
				-		CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n",
			
 
				-			libcfs_nid2str(peer_nid),
			
 
				-			&peer_ip, peer_port);
			
 
				-		break;
			
 
				-	case -EHOSTUNREACH:
			
 
				-	case -ENETUNREACH:
			
 
				-		CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
			
 
				-			libcfs_nid2str(peer_nid), &peer_ip);
			
 
				-		break;
			
 
				-	case -ETIMEDOUT:
			
 
				-		CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n",
			
 
				-			libcfs_nid2str(peer_nid),
			
 
				-			&peer_ip, peer_port);
			
 
				-		break;
			
 
				-	case -ECONNRESET:
			
 
				-		LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
			
 
				-				   libcfs_nid2str(peer_nid),
			
 
				-				   &peer_ip, peer_port,
			
 
				-				   libcfs_nid2str(peer_nid));
			
 
				-		break;
			
 
				-	case -EPROTO:
			
 
				-		LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n",
			
 
				-				   libcfs_nid2str(peer_nid),
			
 
				-				   &peer_ip, peer_port);
			
 
				-		break;
			
 
				-	case -EADDRINUSE:
			
 
				-		LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n",
			
 
				-				   libcfs_nid2str(peer_nid),
			
 
				-				   &peer_ip, peer_port);
			
 
				-		break;
			
 
				-	default:
			
 
				-		LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n",
			
 
				-				   rc, libcfs_nid2str(peer_nid),
			
 
				-				   &peer_ip, peer_port);
			
 
				-		break;
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_connect_console_error);
			
 
				-
			
 
				-int
			
 
				-lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
			
 
				-	     __u32 local_ip, __u32 peer_ip, int peer_port)
			
 
				-{
			
 
				-	struct lnet_acceptor_connreq cr;
			
 
				-	struct socket *sock;
			
 
				-	int rc;
			
 
				-	int port;
			
 
				-	int fatal;
			
 
				-
			
 
				-	BUILD_BUG_ON(sizeof(cr) > 16);	    /* too big to be on the stack */
			
 
				-
			
 
				-	for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
			
 
				-	     port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
			
 
				-	     --port) {
			
 
				-		/* Iterate through reserved ports. */
			
 
				-
			
 
				-		rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip,
			
 
				-				       peer_port);
			
 
				-		if (rc) {
			
 
				-			if (fatal)
			
 
				-				goto failed;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
			
 
				-
			
 
				-		cr.acr_magic   = LNET_PROTO_ACCEPTOR_MAGIC;
			
 
				-		cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
			
 
				-		cr.acr_nid     = peer_nid;
			
 
				-
			
 
				-		if (the_lnet.ln_testprotocompat) {
			
 
				-			/* single-shot proto check */
			
 
				-			lnet_net_lock(LNET_LOCK_EX);
			
 
				-			if (the_lnet.ln_testprotocompat & 4) {
			
 
				-				cr.acr_version++;
			
 
				-				the_lnet.ln_testprotocompat &= ~4;
			
 
				-			}
			
 
				-			if (the_lnet.ln_testprotocompat & 8) {
			
 
				-				cr.acr_magic = LNET_PROTO_MAGIC;
			
 
				-				the_lnet.ln_testprotocompat &= ~8;
			
 
				-			}
			
 
				-			lnet_net_unlock(LNET_LOCK_EX);
			
 
				-		}
			
 
				-
			
 
				-		rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
			
 
				-		if (rc)
			
 
				-			goto failed_sock;
			
 
				-
			
 
				-		*sockp = sock;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	rc = -EADDRINUSE;
			
 
				-	goto failed;
			
 
				-
			
 
				- failed_sock:
			
 
				-	sock_release(sock);
			
 
				- failed:
			
 
				-	lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_connect);
			
 
				-
			
 
				-static int
			
 
				-lnet_accept(struct socket *sock, __u32 magic)
			
 
				-{
			
 
				-	struct lnet_acceptor_connreq cr;
			
 
				-	__u32 peer_ip;
			
 
				-	int peer_port;
			
 
				-	int rc;
			
 
				-	int flip;
			
 
				-	struct lnet_ni *ni;
			
 
				-	char *str;
			
 
				-
			
 
				-	LASSERT(sizeof(cr) <= 16);	     /* not too big for the stack */
			
 
				-
			
 
				-	rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
			
 
				-	LASSERT(!rc);		      /* we succeeded before */
			
 
				-
			
 
				-	if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
			
 
				-		if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
			
 
				-			/*
			
 
				-			 * future version compatibility!
			
 
				-			 * When LNET unifies protocols over all LNDs, the first
			
 
				-			 * thing sent will be a version query. I send back
			
 
				-			 * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old"
			
 
				-			 */
			
 
				-			memset(&cr, 0, sizeof(cr));
			
 
				-			cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
			
 
				-			cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
			
 
				-			rc = lnet_sock_write(sock, &cr, sizeof(cr),
			
 
				-					     accept_timeout);
			
 
				-
			
 
				-			if (rc)
			
 
				-				CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n",
			
 
				-				       &peer_ip, rc);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
			
 
				-			str = "'old' socknal/tcpnal";
			
 
				-		else
			
 
				-			str = "unrecognised";
			
 
				-
			
 
				-		LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n",
			
 
				-				   &peer_ip, magic, str);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version),
			
 
				-			    accept_timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading connection request version from %pI4h\n",
			
 
				-		       rc, &peer_ip);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	if (flip)
			
 
				-		__swab32s(&cr.acr_version);
			
 
				-
			
 
				-	if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
			
 
				-		/*
			
 
				-		 * future version compatibility!
			
 
				-		 * An acceptor-specific protocol rev will first send a version
			
 
				-		 * query.  I send back my current version to tell her I'm
			
 
				-		 * "old".
			
 
				-		 */
			
 
				-		int peer_version = cr.acr_version;
			
 
				-
			
 
				-		memset(&cr, 0, sizeof(cr));
			
 
				-		cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
			
 
				-		cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
			
 
				-
			
 
				-		rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
			
 
				-		if (rc)
			
 
				-			CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n",
			
 
				-			       peer_version, &peer_ip, rc);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_sock_read(sock, &cr.acr_nid,
			
 
				-			    sizeof(cr) -
			
 
				-			    offsetof(struct lnet_acceptor_connreq, acr_nid),
			
 
				-			    accept_timeout);
			
 
				-	if (rc) {
			
 
				-		CERROR("Error %d reading connection request from %pI4h\n",
			
 
				-		       rc, &peer_ip);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	if (flip)
			
 
				-		__swab64s(&cr.acr_nid);
			
 
				-
			
 
				-	ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
			
 
				-	if (!ni ||	       /* no matching net */
			
 
				-	    ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
			
 
				-		if (ni)
			
 
				-			lnet_ni_decref(ni);
			
 
				-		LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n",
			
 
				-				   &peer_ip, libcfs_nid2str(cr.acr_nid));
			
 
				-		return -EPERM;
			
 
				-	}
			
 
				-
			
 
				-	if (!ni->ni_lnd->lnd_accept) {
			
 
				-		/* This catches a request for the loopback LND */
			
 
				-		lnet_ni_decref(ni);
			
 
				-		LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n",
			
 
				-				   &peer_ip, libcfs_nid2str(cr.acr_nid));
			
 
				-		return -EPERM;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Accept %s from %pI4h\n",
			
 
				-	       libcfs_nid2str(cr.acr_nid), &peer_ip);
			
 
				-
			
 
				-	rc = ni->ni_lnd->lnd_accept(ni, sock);
			
 
				-
			
 
				-	lnet_ni_decref(ni);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_acceptor(void *arg)
			
 
				-{
			
 
				-	struct socket *newsock;
			
 
				-	int rc;
			
 
				-	__u32 magic;
			
 
				-	__u32 peer_ip;
			
 
				-	int peer_port;
			
 
				-	int secure = (int)((long)arg);
			
 
				-
			
 
				-	LASSERT(!lnet_acceptor_state.pta_sock);
			
 
				-
			
 
				-	rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port,
			
 
				-			      accept_backlog);
			
 
				-	if (rc) {
			
 
				-		if (rc == -EADDRINUSE)
			
 
				-			LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n",
			
 
				-					   accept_port);
			
 
				-		else
			
 
				-			LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n",
			
 
				-					   accept_port, rc);
			
 
				-
			
 
				-		lnet_acceptor_state.pta_sock = NULL;
			
 
				-	} else {
			
 
				-		LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
			
 
				-	}
			
 
				-
			
 
				-	/* set init status and unblock parent */
			
 
				-	lnet_acceptor_state.pta_shutdown = rc;
			
 
				-	complete(&lnet_acceptor_state.pta_signal);
			
 
				-
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	while (!lnet_acceptor_state.pta_shutdown) {
			
 
				-		rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
			
 
				-		if (rc) {
			
 
				-			if (rc != -EAGAIN) {
			
 
				-				CWARN("Accept error %d: pausing...\n", rc);
			
 
				-				set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-				schedule_timeout(HZ);
			
 
				-			}
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* maybe the LNet acceptor thread has been waken */
			
 
				-		if (lnet_acceptor_state.pta_shutdown) {
			
 
				-			sock_release(newsock);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't determine new connection's address\n");
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
			
 
				-			CERROR("Refusing connection from %pI4h: insecure port %d\n",
			
 
				-			       &peer_ip, peer_port);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		rc = lnet_sock_read(newsock, &magic, sizeof(magic),
			
 
				-				    accept_timeout);
			
 
				-		if (rc) {
			
 
				-			CERROR("Error %d reading connection request from %pI4h\n",
			
 
				-			       rc, &peer_ip);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		rc = lnet_accept(newsock, magic);
			
 
				-		if (rc)
			
 
				-			goto failed;
			
 
				-
			
 
				-		continue;
			
 
				-
			
 
				-failed:
			
 
				-		sock_release(newsock);
			
 
				-	}
			
 
				-
			
 
				-	sock_release(lnet_acceptor_state.pta_sock);
			
 
				-	lnet_acceptor_state.pta_sock = NULL;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Acceptor stopping\n");
			
 
				-
			
 
				-	/* unblock lnet_acceptor_stop() */
			
 
				-	complete(&lnet_acceptor_state.pta_signal);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-accept2secure(const char *acc, long *sec)
			
 
				-{
			
 
				-	if (!strcmp(acc, "secure")) {
			
 
				-		*sec = 1;
			
 
				-		return 1;
			
 
				-	} else if (!strcmp(acc, "all")) {
			
 
				-		*sec = 0;
			
 
				-		return 1;
			
 
				-	} else if (!strcmp(acc, "none")) {
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
			
 
				-			   acc);
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_acceptor_start(void)
			
 
				-{
			
 
				-	struct task_struct *task;
			
 
				-	int rc;
			
 
				-	long rc2;
			
 
				-	long secure;
			
 
				-
			
 
				-	/* if acceptor is already running return immediately */
			
 
				-	if (!lnet_acceptor_state.pta_shutdown)
			
 
				-		return 0;
			
 
				-
			
 
				-	LASSERT(!lnet_acceptor_state.pta_sock);
			
 
				-
			
 
				-	rc = lnet_acceptor_get_tunables();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	init_completion(&lnet_acceptor_state.pta_signal);
			
 
				-	rc = accept2secure(accept_type, &secure);
			
 
				-	if (rc <= 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (!lnet_count_acceptor_nis())  /* not required */
			
 
				-		return 0;
			
 
				-
			
 
				-	task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
			
 
				-			   "acceptor_%03ld", secure);
			
 
				-	if (IS_ERR(task)) {
			
 
				-		rc2 = PTR_ERR(task);
			
 
				-		CERROR("Can't start acceptor thread: %ld\n", rc2);
			
 
				-
			
 
				-		return -ESRCH;
			
 
				-	}
			
 
				-
			
 
				-	/* wait for acceptor to startup */
			
 
				-	wait_for_completion(&lnet_acceptor_state.pta_signal);
			
 
				-
			
 
				-	if (!lnet_acceptor_state.pta_shutdown) {
			
 
				-		/* started OK */
			
 
				-		LASSERT(lnet_acceptor_state.pta_sock);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!lnet_acceptor_state.pta_sock);
			
 
				-
			
 
				-	return -ENETDOWN;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_acceptor_stop(void)
			
 
				-{
			
 
				-	struct sock *sk;
			
 
				-
			
 
				-	if (lnet_acceptor_state.pta_shutdown) /* not running */
			
 
				-		return;
			
 
				-
			
 
				-	lnet_acceptor_state.pta_shutdown = 1;
			
 
				-
			
 
				-	sk = lnet_acceptor_state.pta_sock->sk;
			
 
				-
			
 
				-	/* awake any sleepers using safe method */
			
 
				-	sk->sk_state_change(sk);
			
 
				-
			
 
				-	/* block until acceptor signals exit */
			
 
				-	wait_for_completion(&lnet_acceptor_state.pta_signal);
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1,2307 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-#include <linux/log2.h>
			
 
				-#include <linux/ktime.h>
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <uapi/linux/lnet/lnet-dlc.h>
			
 
				-
			
 
				-#define D_LNI D_CONSOLE
			
 
				-
			
 
				-struct lnet the_lnet;		/* THE state of the network */
			
 
				-EXPORT_SYMBOL(the_lnet);
			
 
				-
			
 
				-static char *ip2nets = "";
			
 
				-module_param(ip2nets, charp, 0444);
			
 
				-MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
			
 
				-
			
 
				-static char *networks = "";
			
 
				-module_param(networks, charp, 0444);
			
 
				-MODULE_PARM_DESC(networks, "local networks");
			
 
				-
			
 
				-static char *routes = "";
			
 
				-module_param(routes, charp, 0444);
			
 
				-MODULE_PARM_DESC(routes, "routes to non-local networks");
			
 
				-
			
 
				-static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
			
 
				-module_param(rnet_htable_size, int, 0444);
			
 
				-MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
			
 
				-
			
 
				-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
			
 
				-		     struct lnet_process_id __user *ids, int n_ids);
			
 
				-
			
 
				-static char *
			
 
				-lnet_get_routes(void)
			
 
				-{
			
 
				-	return routes;
			
 
				-}
			
 
				-
			
 
				-static char *
			
 
				-lnet_get_networks(void)
			
 
				-{
			
 
				-	char *nets;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (*networks && *ip2nets) {
			
 
				-		LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n");
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (*ip2nets) {
			
 
				-		rc = lnet_parse_ip2nets(&nets, ip2nets);
			
 
				-		return !rc ? nets : NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (*networks)
			
 
				-		return networks;
			
 
				-
			
 
				-	return "tcp";
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_init_locks(void)
			
 
				-{
			
 
				-	spin_lock_init(&the_lnet.ln_eq_wait_lock);
			
 
				-	init_waitqueue_head(&the_lnet.ln_eq_waitq);
			
 
				-	init_waitqueue_head(&the_lnet.ln_rc_waitq);
			
 
				-	mutex_init(&the_lnet.ln_lnd_mutex);
			
 
				-	mutex_init(&the_lnet.ln_api_mutex);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_create_remote_nets_table(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct list_head *hash;
			
 
				-
			
 
				-	LASSERT(!the_lnet.ln_remote_nets_hash);
			
 
				-	LASSERT(the_lnet.ln_remote_nets_hbits > 0);
			
 
				-	hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash),
			
 
				-			      GFP_KERNEL);
			
 
				-	if (!hash) {
			
 
				-		CERROR("Failed to create remote nets hash table\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
			
 
				-		INIT_LIST_HEAD(&hash[i]);
			
 
				-	the_lnet.ln_remote_nets_hash = hash;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_destroy_remote_nets_table(void)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (!the_lnet.ln_remote_nets_hash)
			
 
				-		return;
			
 
				-
			
 
				-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
			
 
				-		LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
			
 
				-
			
 
				-	kvfree(the_lnet.ln_remote_nets_hash);
			
 
				-	the_lnet.ln_remote_nets_hash = NULL;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_destroy_locks(void)
			
 
				-{
			
 
				-	if (the_lnet.ln_res_lock) {
			
 
				-		cfs_percpt_lock_free(the_lnet.ln_res_lock);
			
 
				-		the_lnet.ln_res_lock = NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (the_lnet.ln_net_lock) {
			
 
				-		cfs_percpt_lock_free(the_lnet.ln_net_lock);
			
 
				-		the_lnet.ln_net_lock = NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_create_locks(void)
			
 
				-{
			
 
				-	lnet_init_locks();
			
 
				-
			
 
				-	the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
			
 
				-	if (!the_lnet.ln_res_lock)
			
 
				-		goto failed;
			
 
				-
			
 
				-	the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
			
 
				-	if (!the_lnet.ln_net_lock)
			
 
				-		goto failed;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	lnet_destroy_locks();
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-static void lnet_assert_wire_constants(void)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Wire protocol assertions generated by 'wirecheck'
			
 
				-	 * running on Linux robert.bartonsoftware.com 2.6.8-1.521
			
 
				-	 * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
			
 
				-	 * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
			
 
				-	 */
			
 
				-
			
 
				-	/* Constants... */
			
 
				-	BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
			
 
				-	BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
			
 
				-	BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
			
 
				-	BUILD_BUG_ON(LNET_MSG_ACK != 0);
			
 
				-	BUILD_BUG_ON(LNET_MSG_PUT != 1);
			
 
				-	BUILD_BUG_ON(LNET_MSG_GET != 2);
			
 
				-	BUILD_BUG_ON(LNET_MSG_REPLY != 3);
			
 
				-	BUILD_BUG_ON(LNET_MSG_HELLO != 4);
			
 
				-
			
 
				-	/* Checks for struct ptl_handle_wire_t */
			
 
				-	BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
			
 
				-
			
 
				-	/* Checks for struct struct lnet_magicversion */
			
 
				-	BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
			
 
				-
			
 
				-	/* Checks for struct struct lnet_hdr */
			
 
				-	BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40);
			
 
				-
			
 
				-	/* Ack */
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4);
			
 
				-
			
 
				-	/* Put */
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4);
			
 
				-
			
 
				-	/* Get */
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4);
			
 
				-
			
 
				-	/* Reply */
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16);
			
 
				-
			
 
				-	/* Hello */
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8);
			
 
				-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40);
			
 
				-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4);
			
 
				-}
			
 
				-
			
 
				-static struct lnet_lnd *
			
 
				-lnet_find_lnd_by_type(__u32 type)
			
 
				-{
			
 
				-	struct lnet_lnd *lnd;
			
 
				-	struct list_head *tmp;
			
 
				-
			
 
				-	/* holding lnd mutex */
			
 
				-	list_for_each(tmp, &the_lnet.ln_lnds) {
			
 
				-		lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
			
 
				-
			
 
				-		if (lnd->lnd_type == type)
			
 
				-			return lnd;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_register_lnd(struct lnet_lnd *lnd)
			
 
				-{
			
 
				-	mutex_lock(&the_lnet.ln_lnd_mutex);
			
 
				-
			
 
				-	LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
			
 
				-	LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type));
			
 
				-
			
 
				-	list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
			
 
				-	lnd->lnd_refcount = 0;
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
			
 
				-
			
 
				-	mutex_unlock(&the_lnet.ln_lnd_mutex);
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_register_lnd);
			
 
				-
			
 
				-void
			
 
				-lnet_unregister_lnd(struct lnet_lnd *lnd)
			
 
				-{
			
 
				-	mutex_lock(&the_lnet.ln_lnd_mutex);
			
 
				-
			
 
				-	LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
			
 
				-	LASSERT(!lnd->lnd_refcount);
			
 
				-
			
 
				-	list_del(&lnd->lnd_list);
			
 
				-	CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
			
 
				-
			
 
				-	mutex_unlock(&the_lnet.ln_lnd_mutex);
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_unregister_lnd);
			
 
				-
			
 
				-void
			
 
				-lnet_counters_get(struct lnet_counters *counters)
			
 
				-{
			
 
				-	struct lnet_counters *ctr;
			
 
				-	int i;
			
 
				-
			
 
				-	memset(counters, 0, sizeof(*counters));
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
			
 
				-		counters->msgs_max     += ctr->msgs_max;
			
 
				-		counters->msgs_alloc   += ctr->msgs_alloc;
			
 
				-		counters->errors       += ctr->errors;
			
 
				-		counters->send_count   += ctr->send_count;
			
 
				-		counters->recv_count   += ctr->recv_count;
			
 
				-		counters->route_count  += ctr->route_count;
			
 
				-		counters->drop_count   += ctr->drop_count;
			
 
				-		counters->send_length  += ctr->send_length;
			
 
				-		counters->recv_length  += ctr->recv_length;
			
 
				-		counters->route_length += ctr->route_length;
			
 
				-		counters->drop_length  += ctr->drop_length;
			
 
				-	}
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_counters_get);
			
 
				-
			
 
				-void
			
 
				-lnet_counters_reset(void)
			
 
				-{
			
 
				-	struct lnet_counters *counters;
			
 
				-	int i;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
			
 
				-		memset(counters, 0, sizeof(struct lnet_counters));
			
 
				-
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-}
			
 
				-
			
 
				-static char *
			
 
				-lnet_res_type2str(int type)
			
 
				-{
			
 
				-	switch (type) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case LNET_COOKIE_TYPE_MD:
			
 
				-		return "MD";
			
 
				-	case LNET_COOKIE_TYPE_ME:
			
 
				-		return "ME";
			
 
				-	case LNET_COOKIE_TYPE_EQ:
			
 
				-		return "EQ";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_res_container_cleanup(struct lnet_res_container *rec)
			
 
				-{
			
 
				-	int count = 0;
			
 
				-
			
 
				-	if (!rec->rec_type) /* not set yet, it's uninitialized */
			
 
				-		return;
			
 
				-
			
 
				-	while (!list_empty(&rec->rec_active)) {
			
 
				-		struct list_head *e = rec->rec_active.next;
			
 
				-
			
 
				-		list_del_init(e);
			
 
				-		if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
			
 
				-			kfree(list_entry(e, struct lnet_eq, eq_list));
			
 
				-
			
 
				-		} else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
			
 
				-			kfree(list_entry(e, struct lnet_libmd, md_list));
			
 
				-
			
 
				-		} else { /* NB: Active MEs should be attached on portals */
			
 
				-			LBUG();
			
 
				-		}
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				-	if (count > 0) {
			
 
				-		/*
			
 
				-		 * Found alive MD/ME/EQ, user really should unlink/free
			
 
				-		 * all of them before finalize LNet, but if someone didn't,
			
 
				-		 * we have to recycle garbage for him
			
 
				-		 */
			
 
				-		CERROR("%d active elements on exit of %s container\n",
			
 
				-		       count, lnet_res_type2str(rec->rec_type));
			
 
				-	}
			
 
				-
			
 
				-	kfree(rec->rec_lh_hash);
			
 
				-	rec->rec_lh_hash = NULL;
			
 
				-
			
 
				-	rec->rec_type = 0; /* mark it as finalized */
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!rec->rec_type);
			
 
				-
			
 
				-	rec->rec_type = type;
			
 
				-	INIT_LIST_HEAD(&rec->rec_active);
			
 
				-	rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
			
 
				-
			
 
				-	/* Arbitrary choice of hash table size */
			
 
				-	rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]),
			
 
				-					GFP_KERNEL, cpt);
			
 
				-	if (!rec->rec_lh_hash) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < LNET_LH_HASH_SIZE; i++)
			
 
				-		INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-out:
			
 
				-	CERROR("Failed to setup %s resource container\n",
			
 
				-	       lnet_res_type2str(type));
			
 
				-	lnet_res_container_cleanup(rec);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_res_containers_destroy(struct lnet_res_container **recs)
			
 
				-{
			
 
				-	struct lnet_res_container *rec;
			
 
				-	int i;
			
 
				-
			
 
				-	cfs_percpt_for_each(rec, i, recs)
			
 
				-		lnet_res_container_cleanup(rec);
			
 
				-
			
 
				-	cfs_percpt_free(recs);
			
 
				-}
			
 
				-
			
 
				-static struct lnet_res_container **
			
 
				-lnet_res_containers_create(int type)
			
 
				-{
			
 
				-	struct lnet_res_container **recs;
			
 
				-	struct lnet_res_container *rec;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
			
 
				-	if (!recs) {
			
 
				-		CERROR("Failed to allocate %s resource containers\n",
			
 
				-		       lnet_res_type2str(type));
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(rec, i, recs) {
			
 
				-		rc = lnet_res_container_setup(rec, i, type);
			
 
				-		if (rc) {
			
 
				-			lnet_res_containers_destroy(recs);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return recs;
			
 
				-}
			
 
				-
			
 
				-struct lnet_libhandle *
			
 
				-lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
			
 
				-{
			
 
				-	/* ALWAYS called with lnet_res_lock held */
			
 
				-	struct list_head *head;
			
 
				-	struct lnet_libhandle *lh;
			
 
				-	unsigned int hash;
			
 
				-
			
 
				-	if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
			
 
				-		return NULL;
			
 
				-
			
 
				-	hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
			
 
				-	head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
			
 
				-
			
 
				-	list_for_each_entry(lh, head, lh_hash_chain) {
			
 
				-		if (lh->lh_cookie == cookie)
			
 
				-			return lh;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_res_lh_initialize(struct lnet_res_container *rec,
			
 
				-		       struct lnet_libhandle *lh)
			
 
				-{
			
 
				-	/* ALWAYS called with lnet_res_lock held */
			
 
				-	unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
			
 
				-	unsigned int hash;
			
 
				-
			
 
				-	lh->lh_cookie = rec->rec_lh_cookie;
			
 
				-	rec->rec_lh_cookie += 1 << ibits;
			
 
				-
			
 
				-	hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
			
 
				-
			
 
				-	list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
			
 
				-}
			
 
				-
			
 
				-static int lnet_unprepare(void);
			
 
				-
			
 
				-static int
			
 
				-lnet_prepare(lnet_pid_t requested_pid)
			
 
				-{
			
 
				-	/* Prepare to bring up the network */
			
 
				-	struct lnet_res_container **recs;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (requested_pid == LNET_PID_ANY) {
			
 
				-		/* Don't instantiate LNET just for me */
			
 
				-		return -ENETDOWN;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!the_lnet.ln_refcount);
			
 
				-
			
 
				-	the_lnet.ln_routing = 0;
			
 
				-
			
 
				-	LASSERT(!(requested_pid & LNET_PID_USERFLAG));
			
 
				-	the_lnet.ln_pid = requested_pid;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_test_peers);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_nis);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_routers);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
			
 
				-
			
 
				-	rc = lnet_create_remote_nets_table();
			
 
				-	if (rc)
			
 
				-		goto failed;
			
 
				-	/*
			
 
				-	 * NB the interface cookie in wire handles guards against delayed
			
 
				-	 * replies and ACKs appearing valid after reboot.
			
 
				-	 */
			
 
				-	the_lnet.ln_interface_cookie = ktime_get_ns();
			
 
				-
			
 
				-	the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-						sizeof(struct lnet_counters));
			
 
				-	if (!the_lnet.ln_counters) {
			
 
				-		CERROR("Failed to allocate counters for LNet\n");
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_peer_tables_create();
			
 
				-	if (rc)
			
 
				-		goto failed;
			
 
				-
			
 
				-	rc = lnet_msg_containers_create();
			
 
				-	if (rc)
			
 
				-		goto failed;
			
 
				-
			
 
				-	rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
			
 
				-				      LNET_COOKIE_TYPE_EQ);
			
 
				-	if (rc)
			
 
				-		goto failed;
			
 
				-
			
 
				-	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
			
 
				-	if (!recs) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	the_lnet.ln_me_containers = recs;
			
 
				-
			
 
				-	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
			
 
				-	if (!recs) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	the_lnet.ln_md_containers = recs;
			
 
				-
			
 
				-	rc = lnet_portals_create();
			
 
				-	if (rc) {
			
 
				-		CERROR("Failed to create portals for LNet: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	lnet_unprepare();
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_unprepare(void)
			
 
				-{
			
 
				-	/*
			
 
				-	 * NB no LNET_LOCK since this is the last reference.  All LND instances
			
 
				-	 * have shut down already, so it is safe to unlink and free all
			
 
				-	 * descriptors, even those that appear committed to a network op (eg MD
			
 
				-	 * with non-zero pending count)
			
 
				-	 */
			
 
				-	lnet_fail_nid(LNET_NID_ANY, 0);
			
 
				-
			
 
				-	LASSERT(!the_lnet.ln_refcount);
			
 
				-	LASSERT(list_empty(&the_lnet.ln_test_peers));
			
 
				-	LASSERT(list_empty(&the_lnet.ln_nis));
			
 
				-	LASSERT(list_empty(&the_lnet.ln_nis_cpt));
			
 
				-	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
			
 
				-
			
 
				-	lnet_portals_destroy();
			
 
				-
			
 
				-	if (the_lnet.ln_md_containers) {
			
 
				-		lnet_res_containers_destroy(the_lnet.ln_md_containers);
			
 
				-		the_lnet.ln_md_containers = NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (the_lnet.ln_me_containers) {
			
 
				-		lnet_res_containers_destroy(the_lnet.ln_me_containers);
			
 
				-		the_lnet.ln_me_containers = NULL;
			
 
				-	}
			
 
				-
			
 
				-	lnet_res_container_cleanup(&the_lnet.ln_eq_container);
			
 
				-
			
 
				-	lnet_msg_containers_destroy();
			
 
				-	lnet_peer_tables_destroy();
			
 
				-	lnet_rtrpools_free(0);
			
 
				-
			
 
				-	if (the_lnet.ln_counters) {
			
 
				-		cfs_percpt_free(the_lnet.ln_counters);
			
 
				-		the_lnet.ln_counters = NULL;
			
 
				-	}
			
 
				-	lnet_destroy_remote_nets_table();
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-struct lnet_ni  *
			
 
				-lnet_net2ni_locked(__u32 net, int cpt)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-	struct lnet_ni *ni;
			
 
				-
			
 
				-	LASSERT(cpt != LNET_LOCK_EX);
			
 
				-
			
 
				-	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				-		ni = list_entry(tmp, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		if (LNET_NIDNET(ni->ni_nid) == net) {
			
 
				-			lnet_ni_addref_locked(ni, cpt);
			
 
				-			return ni;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-struct lnet_ni *
			
 
				-lnet_net2ni(__u32 net)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-
			
 
				-	lnet_net_lock(0);
			
 
				-	ni = lnet_net2ni_locked(net, 0);
			
 
				-	lnet_net_unlock(0);
			
 
				-
			
 
				-	return ni;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_net2ni);
			
 
				-
			
 
				-static unsigned int
			
 
				-lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
			
 
				-{
			
 
				-	__u64 key = nid;
			
 
				-	unsigned int val;
			
 
				-
			
 
				-	LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
			
 
				-
			
 
				-	if (number == 1)
			
 
				-		return 0;
			
 
				-
			
 
				-	val = hash_long(key, LNET_CPT_BITS);
			
 
				-	/* NB: LNET_CP_NUMBER doesn't have to be PO2 */
			
 
				-	if (val < number)
			
 
				-		return val;
			
 
				-
			
 
				-	return (unsigned int)(key + val + (val >> 1)) % number;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_cpt_of_nid_locked(lnet_nid_t nid)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-
			
 
				-	/* must called with hold of lnet_net_lock */
			
 
				-	if (LNET_CPT_NUMBER == 1)
			
 
				-		return 0; /* the only one */
			
 
				-
			
 
				-	/* take lnet_net_lock(any) would be OK */
			
 
				-	if (!list_empty(&the_lnet.ln_nis_cpt)) {
			
 
				-		list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
			
 
				-			if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
			
 
				-				continue;
			
 
				-
			
 
				-			LASSERT(ni->ni_cpts);
			
 
				-			return ni->ni_cpts[lnet_nid_cpt_hash
			
 
				-					   (nid, ni->ni_ncpts)];
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_cpt_of_nid(lnet_nid_t nid)
			
 
				-{
			
 
				-	int cpt;
			
 
				-	int cpt2;
			
 
				-
			
 
				-	if (LNET_CPT_NUMBER == 1)
			
 
				-		return 0; /* the only one */
			
 
				-
			
 
				-	if (list_empty(&the_lnet.ln_nis_cpt))
			
 
				-		return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-	cpt2 = lnet_cpt_of_nid_locked(nid);
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	return cpt2;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_cpt_of_nid);
			
 
				-
			
 
				-int
			
 
				-lnet_islocalnet(__u32 net)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	ni = lnet_net2ni_locked(net, cpt);
			
 
				-	if (ni)
			
 
				-		lnet_ni_decref_locked(ni, cpt);
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	return !!ni;
			
 
				-}
			
 
				-
			
 
				-struct lnet_ni  *
			
 
				-lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct list_head *tmp;
			
 
				-
			
 
				-	LASSERT(cpt != LNET_LOCK_EX);
			
 
				-
			
 
				-	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				-		ni = list_entry(tmp, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		if (ni->ni_nid == nid) {
			
 
				-			lnet_ni_addref_locked(ni, cpt);
			
 
				-			return ni;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_islocalnid(lnet_nid_t nid)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-	ni = lnet_nid2ni_locked(nid, cpt);
			
 
				-	if (ni)
			
 
				-		lnet_ni_decref_locked(ni, cpt);
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	return !!ni;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_count_acceptor_nis(void)
			
 
				-{
			
 
				-	/* Return the # of NIs that need the acceptor. */
			
 
				-	int count = 0;
			
 
				-	struct list_head *tmp;
			
 
				-	struct lnet_ni *ni;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				-		ni = list_entry(tmp, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		if (ni->ni_lnd->lnd_accept)
			
 
				-			count++;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_ping_info *
			
 
				-lnet_ping_info_create(int num_ni)
			
 
				-{
			
 
				-	struct lnet_ping_info *ping_info;
			
 
				-	unsigned int infosz;
			
 
				-
			
 
				-	infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
			
 
				-	ping_info = kvzalloc(infosz, GFP_KERNEL);
			
 
				-	if (!ping_info) {
			
 
				-		CERROR("Can't allocate ping info[%d]\n", num_ni);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	ping_info->pi_nnis = num_ni;
			
 
				-	ping_info->pi_pid = the_lnet.ln_pid;
			
 
				-	ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
			
 
				-	ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
			
 
				-
			
 
				-	return ping_info;
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-lnet_get_ni_count(void)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	lnet_net_lock(0);
			
 
				-
			
 
				-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
			
 
				-		count++;
			
 
				-
			
 
				-	lnet_net_unlock(0);
			
 
				-
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-lnet_ping_info_free(struct lnet_ping_info *pinfo)
			
 
				-{
			
 
				-	kvfree(pinfo);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_info_destroy(void)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
			
 
				-		lnet_ni_lock(ni);
			
 
				-		ni->ni_status = NULL;
			
 
				-		lnet_ni_unlock(ni);
			
 
				-	}
			
 
				-
			
 
				-	lnet_ping_info_free(the_lnet.ln_ping_info);
			
 
				-	the_lnet.ln_ping_info = NULL;
			
 
				-
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_event_handler(struct lnet_event *event)
			
 
				-{
			
 
				-	struct lnet_ping_info *pinfo = event->md.user_ptr;
			
 
				-
			
 
				-	if (event->unlinked)
			
 
				-		pinfo->pi_features = LNET_PING_FEAT_INVAL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ping_info_setup(struct lnet_ping_info **ppinfo,
			
 
				-		     struct lnet_handle_md *md_handle,
			
 
				-		     int ni_count, bool set_eq)
			
 
				-{
			
 
				-	struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY};
			
 
				-	struct lnet_handle_me me_handle;
			
 
				-	struct lnet_md md = { NULL };
			
 
				-	int rc, rc2;
			
 
				-
			
 
				-	if (set_eq) {
			
 
				-		rc = LNetEQAlloc(0, lnet_ping_event_handler,
			
 
				-				 &the_lnet.ln_ping_target_eq);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't allocate ping EQ: %d\n", rc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	*ppinfo = lnet_ping_info_create(ni_count);
			
 
				-	if (!*ppinfo) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto failed_0;
			
 
				-	}
			
 
				-
			
 
				-	rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
			
 
				-			  LNET_PROTO_PING_MATCHBITS, 0,
			
 
				-			  LNET_UNLINK, LNET_INS_AFTER,
			
 
				-			  &me_handle);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create ping ME: %d\n", rc);
			
 
				-		goto failed_1;
			
 
				-	}
			
 
				-
			
 
				-	/* initialize md content */
			
 
				-	md.start = *ppinfo;
			
 
				-	md.length = offsetof(struct lnet_ping_info,
			
 
				-			     pi_ni[(*ppinfo)->pi_nnis]);
			
 
				-	md.threshold = LNET_MD_THRESH_INF;
			
 
				-	md.max_size = 0;
			
 
				-	md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
			
 
				-		     LNET_MD_MANAGE_REMOTE;
			
 
				-	md.user_ptr  = NULL;
			
 
				-	md.eq_handle = the_lnet.ln_ping_target_eq;
			
 
				-	md.user_ptr = *ppinfo;
			
 
				-
			
 
				-	rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't attach ping MD: %d\n", rc);
			
 
				-		goto failed_2;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-failed_2:
			
 
				-	rc2 = LNetMEUnlink(me_handle);
			
 
				-	LASSERT(!rc2);
			
 
				-failed_1:
			
 
				-	lnet_ping_info_free(*ppinfo);
			
 
				-	*ppinfo = NULL;
			
 
				-failed_0:
			
 
				-	if (set_eq)
			
 
				-		LNetEQFree(the_lnet.ln_ping_target_eq);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_md_unlink(struct lnet_ping_info *pinfo,
			
 
				-		    struct lnet_handle_md *md_handle)
			
 
				-{
			
 
				-	LNetMDUnlink(*md_handle);
			
 
				-	LNetInvalidateMDHandle(md_handle);
			
 
				-
			
 
				-	/* NB md could be busy; this just starts the unlink */
			
 
				-	while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
			
 
				-		CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
			
 
				-		set_current_state(TASK_NOLOAD);
			
 
				-		schedule_timeout(HZ);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
			
 
				-{
			
 
				-	struct lnet_ni_status *ns;
			
 
				-	struct lnet_ni *ni;
			
 
				-	int i = 0;
			
 
				-
			
 
				-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
			
 
				-		LASSERT(i < ping_info->pi_nnis);
			
 
				-
			
 
				-		ns = &ping_info->pi_ni[i];
			
 
				-
			
 
				-		ns->ns_nid = ni->ni_nid;
			
 
				-
			
 
				-		lnet_ni_lock(ni);
			
 
				-		ns->ns_status = (ni->ni_status) ?
			
 
				-				 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
			
 
				-		ni->ni_status = ns;
			
 
				-		lnet_ni_unlock(ni);
			
 
				-
			
 
				-		i++;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_target_update(struct lnet_ping_info *pinfo,
			
 
				-			struct lnet_handle_md md_handle)
			
 
				-{
			
 
				-	struct lnet_ping_info *old_pinfo = NULL;
			
 
				-	struct lnet_handle_md old_md;
			
 
				-
			
 
				-	/* switch the NIs to point to the new ping info created */
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	if (!the_lnet.ln_routing)
			
 
				-		pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
			
 
				-	lnet_ping_info_install_locked(pinfo);
			
 
				-
			
 
				-	if (the_lnet.ln_ping_info) {
			
 
				-		old_pinfo = the_lnet.ln_ping_info;
			
 
				-		old_md = the_lnet.ln_ping_target_md;
			
 
				-	}
			
 
				-	the_lnet.ln_ping_target_md = md_handle;
			
 
				-	the_lnet.ln_ping_info = pinfo;
			
 
				-
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	if (old_pinfo) {
			
 
				-		/* unlink the old ping info */
			
 
				-		lnet_ping_md_unlink(old_pinfo, &old_md);
			
 
				-		lnet_ping_info_free(old_pinfo);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_target_fini(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	lnet_ping_md_unlink(the_lnet.ln_ping_info,
			
 
				-			    &the_lnet.ln_ping_target_md);
			
 
				-
			
 
				-	rc = LNetEQFree(the_lnet.ln_ping_target_eq);
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	lnet_ping_info_destroy();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ni_tq_credits(struct lnet_ni *ni)
			
 
				-{
			
 
				-	int credits;
			
 
				-
			
 
				-	LASSERT(ni->ni_ncpts >= 1);
			
 
				-
			
 
				-	if (ni->ni_ncpts == 1)
			
 
				-		return ni->ni_maxtxcredits;
			
 
				-
			
 
				-	credits = ni->ni_maxtxcredits / ni->ni_ncpts;
			
 
				-	credits = max(credits, 8 * ni->ni_peertxcredits);
			
 
				-	credits = min(credits, ni->ni_maxtxcredits);
			
 
				-
			
 
				-	return credits;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ni_unlink_locked(struct lnet_ni *ni)
			
 
				-{
			
 
				-	if (!list_empty(&ni->ni_cptlist)) {
			
 
				-		list_del_init(&ni->ni_cptlist);
			
 
				-		lnet_ni_decref_locked(ni, 0);
			
 
				-	}
			
 
				-
			
 
				-	/* move it to zombie list and nobody can find it anymore */
			
 
				-	LASSERT(!list_empty(&ni->ni_list));
			
 
				-	list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
			
 
				-	lnet_ni_decref_locked(ni, 0);	/* drop ln_nis' ref */
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_clear_zombies_nis_locked(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int islo;
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct lnet_ni *temp;
			
 
				-
			
 
				-	/*
			
 
				-	 * Now wait for the NI's I just nuked to show up on ln_zombie_nis
			
 
				-	 * and shut them down in guaranteed thread context
			
 
				-	 */
			
 
				-	i = 2;
			
 
				-	list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) {
			
 
				-		int *ref;
			
 
				-		int j;
			
 
				-
			
 
				-		list_del_init(&ni->ni_list);
			
 
				-		cfs_percpt_for_each(ref, j, ni->ni_refs) {
			
 
				-			if (!*ref)
			
 
				-				continue;
			
 
				-			/* still busy, add it back to zombie list */
			
 
				-			list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&ni->ni_list)) {
			
 
				-			lnet_net_unlock(LNET_LOCK_EX);
			
 
				-			++i;
			
 
				-			if ((i & (-i)) == i) {
			
 
				-				CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
			
 
				-				       libcfs_nid2str(ni->ni_nid));
			
 
				-			}
			
 
				-			set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-			schedule_timeout(HZ);
			
 
				-			lnet_net_lock(LNET_LOCK_EX);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		ni->ni_lnd->lnd_refcount--;
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-		islo = ni->ni_lnd->lnd_type == LOLND;
			
 
				-
			
 
				-		LASSERT(!in_interrupt());
			
 
				-		ni->ni_lnd->lnd_shutdown(ni);
			
 
				-
			
 
				-		/*
			
 
				-		 * can't deref lnd anymore now; it might have unregistered
			
 
				-		 * itself...
			
 
				-		 */
			
 
				-		if (!islo)
			
 
				-			CDEBUG(D_LNI, "Removed LNI %s\n",
			
 
				-			       libcfs_nid2str(ni->ni_nid));
			
 
				-
			
 
				-		lnet_ni_free(ni);
			
 
				-		i = 2;
			
 
				-
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_shutdown_lndnis(void)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct lnet_ni *temp;
			
 
				-	int i;
			
 
				-
			
 
				-	/* NB called holding the global mutex */
			
 
				-
			
 
				-	/* All quiet on the API front */
			
 
				-	LASSERT(!the_lnet.ln_shutdown);
			
 
				-	LASSERT(!the_lnet.ln_refcount);
			
 
				-	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	the_lnet.ln_shutdown = 1;	/* flag shutdown */
			
 
				-
			
 
				-	/* Unlink NIs from the global table */
			
 
				-	list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) {
			
 
				-		lnet_ni_unlink_locked(ni);
			
 
				-	}
			
 
				-
			
 
				-	/* Drop the cached loopback NI. */
			
 
				-	if (the_lnet.ln_loni) {
			
 
				-		lnet_ni_decref_locked(the_lnet.ln_loni, 0);
			
 
				-		the_lnet.ln_loni = NULL;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	/*
			
 
				-	 * Clear lazy portals and drop delayed messages which hold refs
			
 
				-	 * on their lnet_msg::msg_rxpeer
			
 
				-	 */
			
 
				-	for (i = 0; i < the_lnet.ln_nportals; i++)
			
 
				-		LNetClearLazyPortal(i);
			
 
				-
			
 
				-	/*
			
 
				-	 * Clear the peer table and wait for all peers to go (they hold refs on
			
 
				-	 * their NIs)
			
 
				-	 */
			
 
				-	lnet_peer_tables_cleanup(NULL);
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	lnet_clear_zombies_nis_locked();
			
 
				-	the_lnet.ln_shutdown = 0;
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-}
			
 
				-
			
 
				-/* shutdown down the NI and release refcount */
			
 
				-static void
			
 
				-lnet_shutdown_lndni(struct lnet_ni *ni)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	lnet_ni_unlink_locked(ni);
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	/* clear messages for this NI on the lazy portal */
			
 
				-	for (i = 0; i < the_lnet.ln_nportals; i++)
			
 
				-		lnet_clear_lazy_portal(ni, i, "Shutting down NI");
			
 
				-
			
 
				-	/* Do peer table cleanup for this ni */
			
 
				-	lnet_peer_tables_cleanup(ni);
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	lnet_clear_zombies_nis_locked();
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
			
 
				-	int rc = -EINVAL;
			
 
				-	int lnd_type;
			
 
				-	struct lnet_lnd *lnd;
			
 
				-	struct lnet_tx_queue *tq;
			
 
				-	int i;
			
 
				-	u32 seed;
			
 
				-
			
 
				-	lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
			
 
				-
			
 
				-	LASSERT(libcfs_isknown_lnd(lnd_type));
			
 
				-
			
 
				-	if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
			
 
				-	    lnd_type == IIBLND || lnd_type == VIBLND) {
			
 
				-		CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
			
 
				-		goto failed0;
			
 
				-	}
			
 
				-
			
 
				-	/* Make sure this new NI is unique. */
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-	if (!rc) {
			
 
				-		if (lnd_type == LOLND) {
			
 
				-			lnet_ni_free(ni);
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		CERROR("Net %s is not unique\n",
			
 
				-		       libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
			
 
				-		rc = -EEXIST;
			
 
				-		goto failed0;
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&the_lnet.ln_lnd_mutex);
			
 
				-	lnd = lnet_find_lnd_by_type(lnd_type);
			
 
				-
			
 
				-	if (!lnd) {
			
 
				-		mutex_unlock(&the_lnet.ln_lnd_mutex);
			
 
				-		rc = request_module("%s", libcfs_lnd2modname(lnd_type));
			
 
				-		mutex_lock(&the_lnet.ln_lnd_mutex);
			
 
				-
			
 
				-		lnd = lnet_find_lnd_by_type(lnd_type);
			
 
				-		if (!lnd) {
			
 
				-			mutex_unlock(&the_lnet.ln_lnd_mutex);
			
 
				-			CERROR("Can't load LND %s, module %s, rc=%d\n",
			
 
				-			       libcfs_lnd2str(lnd_type),
			
 
				-			       libcfs_lnd2modname(lnd_type), rc);
			
 
				-			rc = -EINVAL;
			
 
				-			goto failed0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	lnd->lnd_refcount++;
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	ni->ni_lnd = lnd;
			
 
				-
			
 
				-	if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
			
 
				-		lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
			
 
				-
			
 
				-	if (lnd_tunables) {
			
 
				-		ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
			
 
				-					      GFP_NOFS);
			
 
				-		if (!ni->ni_lnd_tunables) {
			
 
				-			mutex_unlock(&the_lnet.ln_lnd_mutex);
			
 
				-			rc = -ENOMEM;
			
 
				-			goto failed0;
			
 
				-		}
			
 
				-		memcpy(ni->ni_lnd_tunables, lnd_tunables,
			
 
				-		       sizeof(*ni->ni_lnd_tunables));
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If given some LND tunable parameters, parse those now to
			
 
				-	 * override the values in the NI structure.
			
 
				-	 */
			
 
				-	if (conf) {
			
 
				-		if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
			
 
				-			ni->ni_peerrtrcredits =
			
 
				-				conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
			
 
				-		if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
			
 
				-			ni->ni_peertimeout =
			
 
				-				conf->cfg_config_u.cfg_net.net_peer_timeout;
			
 
				-		if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
			
 
				-			ni->ni_peertxcredits =
			
 
				-				conf->cfg_config_u.cfg_net.net_peer_tx_credits;
			
 
				-		if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
			
 
				-			ni->ni_maxtxcredits =
			
 
				-				conf->cfg_config_u.cfg_net.net_max_tx_credits;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnd->lnd_startup(ni);
			
 
				-
			
 
				-	mutex_unlock(&the_lnet.ln_lnd_mutex);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
			
 
				-				   rc, libcfs_lnd2str(lnd->lnd_type));
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-		lnd->lnd_refcount--;
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-		goto failed0;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	/* refcount for ln_nis */
			
 
				-	lnet_ni_addref_locked(ni, 0);
			
 
				-	list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
			
 
				-	if (ni->ni_cpts) {
			
 
				-		lnet_ni_addref_locked(ni, 0);
			
 
				-		list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	if (lnd->lnd_type == LOLND) {
			
 
				-		lnet_ni_addref(ni);
			
 
				-		LASSERT(!the_lnet.ln_loni);
			
 
				-		the_lnet.ln_loni = ni;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) {
			
 
				-		LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
			
 
				-				   libcfs_lnd2str(lnd->lnd_type),
			
 
				-				   !ni->ni_peertxcredits ?
			
 
				-				   "" : "per-peer ");
			
 
				-		/*
			
 
				-		 * shutdown the NI since if we get here then it must've already
			
 
				-		 * been started
			
 
				-		 */
			
 
				-		lnet_shutdown_lndni(ni);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
			
 
				-		tq->tq_credits_min =
			
 
				-		tq->tq_credits_max =
			
 
				-		tq->tq_credits = lnet_ni_tq_credits(ni);
			
 
				-	}
			
 
				-
			
 
				-	/* Nodes with small feet have little entropy. The NID for this
			
 
				-	 * node gives the most entropy in the low bits.
			
 
				-	 */
			
 
				-	seed = LNET_NIDADDR(ni->ni_nid);
			
 
				-	add_device_randomness(&seed, sizeof(seed));
			
 
				-
			
 
				-	CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
			
 
				-	       libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
			
 
				-	       lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
			
 
				-	       ni->ni_peerrtrcredits, ni->ni_peertimeout);
			
 
				-
			
 
				-	return 0;
			
 
				-failed0:
			
 
				-	lnet_ni_free(ni);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_startup_lndnis(struct list_head *nilist)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	int rc;
			
 
				-	int ni_count = 0;
			
 
				-
			
 
				-	while (!list_empty(nilist)) {
			
 
				-		ni = list_entry(nilist->next, struct lnet_ni, ni_list);
			
 
				-		list_del(&ni->ni_list);
			
 
				-		rc = lnet_startup_lndni(ni, NULL);
			
 
				-
			
 
				-		if (rc < 0)
			
 
				-			goto failed;
			
 
				-
			
 
				-		ni_count++;
			
 
				-	}
			
 
				-
			
 
				-	return ni_count;
			
 
				-failed:
			
 
				-	lnet_shutdown_lndnis();
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Initialize LNet library.
			
 
				- *
			
 
				- * Automatically called at module loading time. Caller has to call
			
 
				- * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
			
 
				- * latter returned 0. It must be called exactly once.
			
 
				- *
			
 
				- * \retval 0 on success
			
 
				- * \retval -ve on failures.
			
 
				- */
			
 
				-int lnet_lib_init(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	lnet_assert_wire_constants();
			
 
				-
			
 
				-	memset(&the_lnet, 0, sizeof(the_lnet));
			
 
				-
			
 
				-	/* refer to global cfs_cpt_tab for now */
			
 
				-	the_lnet.ln_cpt_table	= cfs_cpt_tab;
			
 
				-	the_lnet.ln_cpt_number	= cfs_cpt_number(cfs_cpt_tab);
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_cpt_number > 0);
			
 
				-	if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
			
 
				-		/* we are under risk of consuming all lh_cookie */
			
 
				-		CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n",
			
 
				-		       the_lnet.ln_cpt_number, LNET_CPT_MAX);
			
 
				-		return -E2BIG;
			
 
				-	}
			
 
				-
			
 
				-	while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
			
 
				-		the_lnet.ln_cpt_bits++;
			
 
				-
			
 
				-	rc = lnet_create_locks();
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create LNet global locks: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	the_lnet.ln_refcount = 0;
			
 
				-	LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_lnds);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
			
 
				-	INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
			
 
				-
			
 
				-	/*
			
 
				-	 * The hash table size is the number of bits it takes to express the set
			
 
				-	 * ln_num_routes, minus 1 (better to under estimate than over so we
			
 
				-	 * don't waste memory).
			
 
				-	 */
			
 
				-	if (rnet_htable_size <= 0)
			
 
				-		rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
			
 
				-	else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
			
 
				-		rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
			
 
				-	the_lnet.ln_remote_nets_hbits = max_t(int, 1,
			
 
				-					   order_base_2(rnet_htable_size) - 1);
			
 
				-
			
 
				-	/*
			
 
				-	 * All LNDs apart from the LOLND are in separate modules.  They
			
 
				-	 * register themselves when their module loads, and unregister
			
 
				-	 * themselves when their module is unloaded.
			
 
				-	 */
			
 
				-	lnet_register_lnd(&the_lolnd);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Finalize LNet library.
			
 
				- *
			
 
				- * \pre lnet_lib_init() called with success.
			
 
				- * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
			
 
				- */
			
 
				-void lnet_lib_exit(void)
			
 
				-{
			
 
				-	LASSERT(!the_lnet.ln_refcount);
			
 
				-
			
 
				-	while (!list_empty(&the_lnet.ln_lnds))
			
 
				-		lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
			
 
				-					       struct lnet_lnd, lnd_list));
			
 
				-	lnet_destroy_locks();
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Set LNet PID and start LNet interfaces, routing, and forwarding.
			
 
				- *
			
 
				- * Users must call this function at least once before any other functions.
			
 
				- * For each successful call there must be a corresponding call to
			
 
				- * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
			
 
				- * ignored.
			
 
				- *
			
 
				- * The PID used by LNet may be different from the one requested.
			
 
				- * See LNetGetId().
			
 
				- *
			
 
				- * \param requested_pid PID requested by the caller.
			
 
				- *
			
 
				- * \return >= 0 on success, and < 0 error code on failures.
			
 
				- */
			
 
				-int
			
 
				-LNetNIInit(lnet_pid_t requested_pid)
			
 
				-{
			
 
				-	int im_a_router = 0;
			
 
				-	int rc;
			
 
				-	int ni_count;
			
 
				-	struct lnet_ping_info *pinfo;
			
 
				-	struct lnet_handle_md md_handle;
			
 
				-	struct list_head net_head;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&net_head);
			
 
				-
			
 
				-	mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
			
 
				-
			
 
				-	if (the_lnet.ln_refcount > 0) {
			
 
				-		rc = the_lnet.ln_refcount++;
			
 
				-		mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_prepare(requested_pid);
			
 
				-	if (rc) {
			
 
				-		mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* Add in the loopback network */
			
 
				-	if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto err_empty_list;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If LNet is being initialized via DLC it is possible
			
 
				-	 * that the user requests not to load module parameters (ones which
			
 
				-	 * are supported by DLC) on initialization.  Therefore, make sure not
			
 
				-	 * to load networks, routes and forwarding from module parameters
			
 
				-	 * in this case. On cleanup in case of failure only clean up
			
 
				-	 * routes if it has been loaded
			
 
				-	 */
			
 
				-	if (!the_lnet.ln_nis_from_mod_params) {
			
 
				-		rc = lnet_parse_networks(&net_head, lnet_get_networks());
			
 
				-		if (rc < 0)
			
 
				-			goto err_empty_list;
			
 
				-	}
			
 
				-
			
 
				-	ni_count = lnet_startup_lndnis(&net_head);
			
 
				-	if (ni_count < 0) {
			
 
				-		rc = ni_count;
			
 
				-		goto err_empty_list;
			
 
				-	}
			
 
				-
			
 
				-	if (!the_lnet.ln_nis_from_mod_params) {
			
 
				-		rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
			
 
				-		if (rc)
			
 
				-			goto err_shutdown_lndnis;
			
 
				-
			
 
				-		rc = lnet_check_routes();
			
 
				-		if (rc)
			
 
				-			goto err_destroy_routes;
			
 
				-
			
 
				-		rc = lnet_rtrpools_alloc(im_a_router);
			
 
				-		if (rc)
			
 
				-			goto err_destroy_routes;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_acceptor_start();
			
 
				-	if (rc)
			
 
				-		goto err_destroy_routes;
			
 
				-
			
 
				-	the_lnet.ln_refcount = 1;
			
 
				-	/* Now I may use my own API functions... */
			
 
				-
			
 
				-	rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
			
 
				-	if (rc)
			
 
				-		goto err_acceptor_stop;
			
 
				-
			
 
				-	lnet_ping_target_update(pinfo, md_handle);
			
 
				-
			
 
				-	rc = lnet_router_checker_start();
			
 
				-	if (rc)
			
 
				-		goto err_stop_ping;
			
 
				-
			
 
				-	lnet_fault_init();
			
 
				-	lnet_router_debugfs_init();
			
 
				-
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-err_stop_ping:
			
 
				-	lnet_ping_target_fini();
			
 
				-err_acceptor_stop:
			
 
				-	the_lnet.ln_refcount = 0;
			
 
				-	lnet_acceptor_stop();
			
 
				-err_destroy_routes:
			
 
				-	if (!the_lnet.ln_nis_from_mod_params)
			
 
				-		lnet_destroy_routes();
			
 
				-err_shutdown_lndnis:
			
 
				-	lnet_shutdown_lndnis();
			
 
				-err_empty_list:
			
 
				-	lnet_unprepare();
			
 
				-	LASSERT(rc < 0);
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-	while (!list_empty(&net_head)) {
			
 
				-		struct lnet_ni *ni;
			
 
				-
			
 
				-		ni = list_entry(net_head.next, struct lnet_ni, ni_list);
			
 
				-		list_del_init(&ni->ni_list);
			
 
				-		lnet_ni_free(ni);
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetNIInit);
			
 
				-
			
 
				-/**
			
 
				- * Stop LNet interfaces, routing, and forwarding.
			
 
				- *
			
 
				- * Users must call this function once for each successful call to LNetNIInit().
			
 
				- * Once the LNetNIFini() operation has been started, the results of pending
			
 
				- * API operations are undefined.
			
 
				- *
			
 
				- * \return always 0 for current implementation.
			
 
				- */
			
 
				-int
			
 
				-LNetNIFini(void)
			
 
				-{
			
 
				-	mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (the_lnet.ln_refcount != 1) {
			
 
				-		the_lnet.ln_refcount--;
			
 
				-	} else {
			
 
				-		LASSERT(!the_lnet.ln_niinit_self);
			
 
				-
			
 
				-		lnet_fault_fini();
			
 
				-		lnet_router_debugfs_fini();
			
 
				-		lnet_router_checker_stop();
			
 
				-		lnet_ping_target_fini();
			
 
				-
			
 
				-		/* Teardown fns that use my own API functions BEFORE here */
			
 
				-		the_lnet.ln_refcount = 0;
			
 
				-
			
 
				-		lnet_acceptor_stop();
			
 
				-		lnet_destroy_routes();
			
 
				-		lnet_shutdown_lndnis();
			
 
				-		lnet_unprepare();
			
 
				-	}
			
 
				-
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetNIFini);
			
 
				-
			
 
				-/**
			
 
				- * Grabs the ni data from the ni structure and fills the out
			
 
				- * parameters
			
 
				- *
			
 
				- * \param[in] ni network       interface structure
			
 
				- * \param[out] config	       NI configuration
			
 
				- */
			
 
				-static void
			
 
				-lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
			
 
				-	struct lnet_ioctl_net_config *net_config;
			
 
				-	size_t min_size, tunable_size = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!ni || !config)
			
 
				-		return;
			
 
				-
			
 
				-	net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk;
			
 
				-	if (!net_config)
			
 
				-		return;
			
 
				-
			
 
				-	BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
			
 
				-		     ARRAY_SIZE(net_config->ni_interfaces));
			
 
				-
			
 
				-	for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
			
 
				-		if (!ni->ni_interfaces[i])
			
 
				-			break;
			
 
				-
			
 
				-		strncpy(net_config->ni_interfaces[i],
			
 
				-			ni->ni_interfaces[i],
			
 
				-			sizeof(net_config->ni_interfaces[i]));
			
 
				-	}
			
 
				-
			
 
				-	config->cfg_nid = ni->ni_nid;
			
 
				-	config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
			
 
				-	config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
			
 
				-	config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
			
 
				-	config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
			
 
				-
			
 
				-	net_config->ni_status = ni->ni_status->ns_status;
			
 
				-
			
 
				-	if (ni->ni_cpts) {
			
 
				-		int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
			
 
				-
			
 
				-		for (i = 0; i < num_cpts; i++)
			
 
				-			net_config->ni_cpts[i] = ni->ni_cpts[i];
			
 
				-
			
 
				-		config->cfg_ncpts = num_cpts;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * See if user land tools sent in a newer and larger version
			
 
				-	 * of struct lnet_tunables than what the kernel uses.
			
 
				-	 */
			
 
				-	min_size = sizeof(*config) + sizeof(*net_config);
			
 
				-
			
 
				-	if (config->cfg_hdr.ioc_len > min_size)
			
 
				-		tunable_size = config->cfg_hdr.ioc_len - min_size;
			
 
				-
			
 
				-	/* Don't copy to much data to user space */
			
 
				-	min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
			
 
				-	lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
			
 
				-
			
 
				-	if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
			
 
				-		memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
			
 
				-		config->cfg_config_u.cfg_net.net_interface_count = 1;
			
 
				-
			
 
				-		/* Tell user land that kernel side has less data */
			
 
				-		if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
			
 
				-			min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
			
 
				-			config->cfg_hdr.ioc_len -= min_size;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_get_net_config(struct lnet_ioctl_config_data *config)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct list_head *tmp;
			
 
				-	int idx = config->cfg_count;
			
 
				-	int cpt, i = 0;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				-		if (i++ != idx)
			
 
				-			continue;
			
 
				-
			
 
				-		ni = list_entry(tmp, struct lnet_ni, ni_list);
			
 
				-		lnet_ni_lock(ni);
			
 
				-		lnet_fill_ni_info(ni, config);
			
 
				-		lnet_ni_unlock(ni);
			
 
				-		rc = 0;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
			
 
				-{
			
 
				-	char *nets = conf->cfg_config_u.cfg_net.net_intf;
			
 
				-	struct lnet_ping_info *pinfo;
			
 
				-	struct lnet_handle_md md_handle;
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct list_head net_head;
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	int rc;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&net_head);
			
 
				-
			
 
				-	/* Create a ni structure for the network string */
			
 
				-	rc = lnet_parse_networks(&net_head, nets);
			
 
				-	if (rc <= 0)
			
 
				-		return !rc ? -EINVAL : rc;
			
 
				-
			
 
				-	mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	if (rc > 1) {
			
 
				-		rc = -EINVAL; /* only add one interface per call */
			
 
				-		goto failed0;
			
 
				-	}
			
 
				-
			
 
				-	ni = list_entry(net_head.next, struct lnet_ni, ni_list);
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-	/*
			
 
				-	 * make sure that the net added doesn't invalidate the current
			
 
				-	 * configuration LNet is keeping
			
 
				-	 */
			
 
				-	if (rnet) {
			
 
				-		CERROR("Adding net %s will invalidate routing configuration\n",
			
 
				-		       nets);
			
 
				-		rc = -EUSERS;
			
 
				-		goto failed0;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
			
 
				-				  false);
			
 
				-	if (rc)
			
 
				-		goto failed0;
			
 
				-
			
 
				-	list_del_init(&ni->ni_list);
			
 
				-
			
 
				-	rc = lnet_startup_lndni(ni, conf);
			
 
				-	if (rc)
			
 
				-		goto failed1;
			
 
				-
			
 
				-	if (ni->ni_lnd->lnd_accept) {
			
 
				-		rc = lnet_acceptor_start();
			
 
				-		if (rc < 0) {
			
 
				-			/* shutdown the ni that we just started */
			
 
				-			CERROR("Failed to start up acceptor thread\n");
			
 
				-			lnet_shutdown_lndni(ni);
			
 
				-			goto failed1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_ping_target_update(pinfo, md_handle);
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-failed1:
			
 
				-	lnet_ping_md_unlink(pinfo, &md_handle);
			
 
				-	lnet_ping_info_free(pinfo);
			
 
				-failed0:
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-	while (!list_empty(&net_head)) {
			
 
				-		ni = list_entry(net_head.next, struct lnet_ni, ni_list);
			
 
				-		list_del_init(&ni->ni_list);
			
 
				-		lnet_ni_free(ni);
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_dyn_del_ni(__u32 net)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct lnet_ping_info *pinfo;
			
 
				-	struct lnet_handle_md md_handle;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* don't allow userspace to shutdown the LOLND */
			
 
				-	if (LNET_NETTYP(net) == LOLND)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-	/* create and link a new ping info, before removing the old one */
			
 
				-	rc = lnet_ping_info_setup(&pinfo, &md_handle,
			
 
				-				  lnet_get_ni_count() - 1, false);
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	ni = lnet_net2ni(net);
			
 
				-	if (!ni) {
			
 
				-		rc = -EINVAL;
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	/* decrement the reference counter taken by lnet_net2ni() */
			
 
				-	lnet_ni_decref_locked(ni, 0);
			
 
				-
			
 
				-	lnet_shutdown_lndni(ni);
			
 
				-
			
 
				-	if (!lnet_count_acceptor_nis())
			
 
				-		lnet_acceptor_stop();
			
 
				-
			
 
				-	lnet_ping_target_update(pinfo, md_handle);
			
 
				-	goto out;
			
 
				-failed:
			
 
				-	lnet_ping_md_unlink(pinfo, &md_handle);
			
 
				-	lnet_ping_info_free(pinfo);
			
 
				-out:
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * LNet ioctl handler.
			
 
				- *
			
 
				- */
			
 
				-int
			
 
				-LNetCtl(unsigned int cmd, void *arg)
			
 
				-{
			
 
				-	struct libcfs_ioctl_data *data = arg;
			
 
				-	struct lnet_ioctl_config_data *config;
			
 
				-	struct lnet_process_id id = {0};
			
 
				-	struct lnet_ni *ni;
			
 
				-	int rc;
			
 
				-	unsigned long secs_passed;
			
 
				-
			
 
				-	BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX <
			
 
				-		     sizeof(struct lnet_ioctl_net_config) +
			
 
				-		     sizeof(struct lnet_ioctl_config_data));
			
 
				-
			
 
				-	switch (cmd) {
			
 
				-	case IOC_LIBCFS_GET_NI:
			
 
				-		rc = LNetGetId(data->ioc_count, &id);
			
 
				-		data->ioc_nid = id.nid;
			
 
				-		return rc;
			
 
				-
			
 
				-	case IOC_LIBCFS_FAIL_NID:
			
 
				-		return lnet_fail_nid(data->ioc_nid, data->ioc_count);
			
 
				-
			
 
				-	case IOC_LIBCFS_ADD_ROUTE:
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < sizeof(*config))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-		rc = lnet_add_route(config->cfg_net,
			
 
				-				    config->cfg_config_u.cfg_route.rtr_hop,
			
 
				-				    config->cfg_nid,
			
 
				-				    config->cfg_config_u.cfg_route.rtr_priority);
			
 
				-		if (!rc) {
			
 
				-			rc = lnet_check_routes();
			
 
				-			if (rc)
			
 
				-				lnet_del_route(config->cfg_net,
			
 
				-					       config->cfg_nid);
			
 
				-		}
			
 
				-		mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-		return rc;
			
 
				-
			
 
				-	case IOC_LIBCFS_DEL_ROUTE:
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < sizeof(*config))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-		rc = lnet_del_route(config->cfg_net, config->cfg_nid);
			
 
				-		mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-		return rc;
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_ROUTE:
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < sizeof(*config))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_get_route(config->cfg_count,
			
 
				-				      &config->cfg_net,
			
 
				-				      &config->cfg_config_u.cfg_route.rtr_hop,
			
 
				-				      &config->cfg_nid,
			
 
				-				      &config->cfg_config_u.cfg_route.rtr_flags,
			
 
				-				      &config->cfg_config_u.cfg_route.rtr_priority);
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_NET: {
			
 
				-		size_t total = sizeof(*config) +
			
 
				-			       sizeof(struct lnet_ioctl_net_config);
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < total)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_get_net_config(config);
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_LNET_STATS: {
			
 
				-		struct lnet_ioctl_lnet_stats *lnet_stats = arg;
			
 
				-
			
 
				-		if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		lnet_counters_get(&lnet_stats->st_cntrs);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_CONFIG_RTR:
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < sizeof(*config))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-		if (config->cfg_config_u.cfg_buffers.buf_enable) {
			
 
				-			rc = lnet_rtrpools_enable();
			
 
				-			mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-			return rc;
			
 
				-		}
			
 
				-		lnet_rtrpools_disable();
			
 
				-		mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-		return 0;
			
 
				-
			
 
				-	case IOC_LIBCFS_ADD_BUF:
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < sizeof(*config))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-		rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny,
			
 
				-					  config->cfg_config_u.cfg_buffers.buf_small,
			
 
				-					  config->cfg_config_u.cfg_buffers.buf_large);
			
 
				-		mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-		return rc;
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_BUF: {
			
 
				-		struct lnet_ioctl_pool_cfg *pool_cfg;
			
 
				-		size_t total = sizeof(*config) + sizeof(*pool_cfg);
			
 
				-
			
 
				-		config = arg;
			
 
				-
			
 
				-		if (config->cfg_hdr.ioc_len < total)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
			
 
				-		return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_GET_PEER_INFO: {
			
 
				-		struct lnet_ioctl_peer *peer_info = arg;
			
 
				-
			
 
				-		if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_get_peer_info(peer_info->pr_count,
			
 
				-			&peer_info->pr_nid,
			
 
				-			peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
			
 
				-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_NOTIFY_ROUTER:
			
 
				-		secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]);
			
 
				-		secs_passed *= msecs_to_jiffies(MSEC_PER_SEC);
			
 
				-
			
 
				-		return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
			
 
				-				   jiffies - secs_passed);
			
 
				-
			
 
				-	case IOC_LIBCFS_LNET_DIST:
			
 
				-		rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
			
 
				-		if (rc < 0 && rc != -EHOSTUNREACH)
			
 
				-			return rc;
			
 
				-
			
 
				-		data->ioc_u32[0] = rc;
			
 
				-		return 0;
			
 
				-
			
 
				-	case IOC_LIBCFS_TESTPROTOCOMPAT:
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-		the_lnet.ln_testprotocompat = data->ioc_flags;
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-		return 0;
			
 
				-
			
 
				-	case IOC_LIBCFS_LNET_FAULT:
			
 
				-		return lnet_fault_ctl(data->ioc_flags, data);
			
 
				-
			
 
				-	case IOC_LIBCFS_PING:
			
 
				-		id.nid = data->ioc_nid;
			
 
				-		id.pid = data->ioc_u32[0];
			
 
				-		rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
			
 
				-			       data->ioc_pbuf1,
			
 
				-			       data->ioc_plen1 / sizeof(struct lnet_process_id));
			
 
				-		if (rc < 0)
			
 
				-			return rc;
			
 
				-		data->ioc_count = rc;
			
 
				-		return 0;
			
 
				-
			
 
				-	default:
			
 
				-		ni = lnet_net2ni(data->ioc_net);
			
 
				-		if (!ni)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		if (!ni->ni_lnd->lnd_ctl)
			
 
				-			rc = -EINVAL;
			
 
				-		else
			
 
				-			rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
			
 
				-
			
 
				-		lnet_ni_decref(ni);
			
 
				-		return rc;
			
 
				-	}
			
 
				-	/* not reached */
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetCtl);
			
 
				-
			
 
				-void LNetDebugPeer(struct lnet_process_id id)
			
 
				-{
			
 
				-	lnet_debug_peer(id.nid);
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetDebugPeer);
			
 
				-
			
 
				-/**
			
 
				- * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that
			
 
				- * all interfaces share a same PID, as requested by LNetNIInit().
			
 
				- *
			
 
				- * \param index Index of the interface to look up.
			
 
				- * \param id On successful return, this location will hold the
			
 
				- * lnet_process_id ID of the interface.
			
 
				- *
			
 
				- * \retval 0 If an interface exists at \a index.
			
 
				- * \retval -ENOENT If no interface has been found.
			
 
				- */
			
 
				-int
			
 
				-LNetGetId(unsigned int index, struct lnet_process_id *id)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct list_head *tmp;
			
 
				-	int cpt;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				-		if (index--)
			
 
				-			continue;
			
 
				-
			
 
				-		ni = list_entry(tmp, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		id->nid = ni->ni_nid;
			
 
				-		id->pid = the_lnet.ln_pid;
			
 
				-		rc = 0;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetGetId);
			
 
				-
			
 
				-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
			
 
				-		     struct lnet_process_id __user *ids, int n_ids)
			
 
				-{
			
 
				-	struct lnet_handle_eq eqh;
			
 
				-	struct lnet_handle_md mdh;
			
 
				-	struct lnet_event event;
			
 
				-	struct lnet_md md = { NULL };
			
 
				-	int which;
			
 
				-	int unlinked = 0;
			
 
				-	int replied = 0;
			
 
				-	const int a_long_time = 60000; /* mS */
			
 
				-	int infosz;
			
 
				-	struct lnet_ping_info *info;
			
 
				-	struct lnet_process_id tmpid;
			
 
				-	int i;
			
 
				-	int nob;
			
 
				-	int rc;
			
 
				-	int rc2;
			
 
				-
			
 
				-	infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
			
 
				-
			
 
				-	if (n_ids <= 0 ||
			
 
				-	    id.nid == LNET_NID_ANY ||
			
 
				-	    timeout_ms > 500000 ||	      /* arbitrary limit! */
			
 
				-	    n_ids > 20)			 /* arbitrary limit! */
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (id.pid == LNET_PID_ANY)
			
 
				-		id.pid = LNET_PID_LUSTRE;
			
 
				-
			
 
				-	info = kzalloc(infosz, GFP_KERNEL);
			
 
				-	if (!info)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	/* NB 2 events max (including any unlink event) */
			
 
				-	rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't allocate EQ: %d\n", rc);
			
 
				-		goto out_0;
			
 
				-	}
			
 
				-
			
 
				-	/* initialize md content */
			
 
				-	md.start     = info;
			
 
				-	md.length    = infosz;
			
 
				-	md.threshold = 2; /*GET/REPLY*/
			
 
				-	md.max_size  = 0;
			
 
				-	md.options   = LNET_MD_TRUNCATE;
			
 
				-	md.user_ptr  = NULL;
			
 
				-	md.eq_handle = eqh;
			
 
				-
			
 
				-	rc = LNetMDBind(md, LNET_UNLINK, &mdh);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't bind MD: %d\n", rc);
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	rc = LNetGet(LNET_NID_ANY, mdh, id,
			
 
				-		     LNET_RESERVED_PORTAL,
			
 
				-		     LNET_PROTO_PING_MATCHBITS, 0);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		/* Don't CERROR; this could be deliberate! */
			
 
				-
			
 
				-		rc2 = LNetMDUnlink(mdh);
			
 
				-		LASSERT(!rc2);
			
 
				-
			
 
				-		/* NB must wait for the UNLINK event below... */
			
 
				-		unlinked = 1;
			
 
				-		timeout_ms = a_long_time;
			
 
				-	}
			
 
				-
			
 
				-	do {
			
 
				-		/* MUST block for unlink to complete */
			
 
				-
			
 
				-		rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked,
			
 
				-				 &event, &which);
			
 
				-
			
 
				-		CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
			
 
				-		       (rc2 <= 0) ? -1 : event.type,
			
 
				-		       (rc2 <= 0) ? -1 : event.status,
			
 
				-		       (rc2 > 0 && event.unlinked) ? " unlinked" : "");
			
 
				-
			
 
				-		LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
			
 
				-
			
 
				-		if (rc2 <= 0 || event.status) {
			
 
				-			/* timeout or error */
			
 
				-			if (!replied && !rc)
			
 
				-				rc = (rc2 < 0) ? rc2 :
			
 
				-				     !rc2 ? -ETIMEDOUT :
			
 
				-				     event.status;
			
 
				-
			
 
				-			if (!unlinked) {
			
 
				-				/* Ensure completion in finite time... */
			
 
				-				LNetMDUnlink(mdh);
			
 
				-				/* No assertion (racing with network) */
			
 
				-				unlinked = 1;
			
 
				-				timeout_ms = a_long_time;
			
 
				-			} else if (!rc2) {
			
 
				-				/* timed out waiting for unlink */
			
 
				-				CWARN("ping %s: late network completion\n",
			
 
				-				      libcfs_id2str(id));
			
 
				-			}
			
 
				-		} else if (event.type == LNET_EVENT_REPLY) {
			
 
				-			replied = 1;
			
 
				-			rc = event.mlength;
			
 
				-		}
			
 
				-
			
 
				-	} while (rc2 <= 0 || !event.unlinked);
			
 
				-
			
 
				-	if (!replied) {
			
 
				-		if (rc >= 0)
			
 
				-			CWARN("%s: Unexpected rc >= 0 but no reply!\n",
			
 
				-			      libcfs_id2str(id));
			
 
				-		rc = -EIO;
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	nob = rc;
			
 
				-	LASSERT(nob >= 0 && nob <= infosz);
			
 
				-
			
 
				-	rc = -EPROTO;			   /* if I can't parse... */
			
 
				-
			
 
				-	if (nob < 8) {
			
 
				-		/* can't check magic/version */
			
 
				-		CERROR("%s: ping info too short %d\n",
			
 
				-		       libcfs_id2str(id), nob);
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
			
 
				-		lnet_swap_pinginfo(info);
			
 
				-	} else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
			
 
				-		CERROR("%s: Unexpected magic %08x\n",
			
 
				-		       libcfs_id2str(id), info->pi_magic);
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) {
			
 
				-		CERROR("%s: ping w/o NI status: 0x%x\n",
			
 
				-		       libcfs_id2str(id), info->pi_features);
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
			
 
				-		CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
			
 
				-		       nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	if (info->pi_nnis < n_ids)
			
 
				-		n_ids = info->pi_nnis;
			
 
				-
			
 
				-	if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
			
 
				-		CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
			
 
				-		       nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
			
 
				-		goto out_1;
			
 
				-	}
			
 
				-
			
 
				-	rc = -EFAULT;			   /* If I SEGV... */
			
 
				-
			
 
				-	memset(&tmpid, 0, sizeof(tmpid));
			
 
				-	for (i = 0; i < n_ids; i++) {
			
 
				-		tmpid.pid = info->pi_pid;
			
 
				-		tmpid.nid = info->pi_ni[i].ns_nid;
			
 
				-		if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
			
 
				-			goto out_1;
			
 
				-	}
			
 
				-	rc = info->pi_nnis;
			
 
				-
			
 
				- out_1:
			
 
				-	rc2 = LNetEQFree(eqh);
			
 
				-	if (rc2)
			
 
				-		CERROR("rc2 %d\n", rc2);
			
 
				-	LASSERT(!rc2);
			
 
				-
			
 
				- out_0:
			
 
				-	kfree(info);
			
 
				-	return rc;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -1,1235 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-#include <linux/nsproxy.h>
			
 
				-#include <net/net_namespace.h>
			
 
				-#include <linux/ctype.h>
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-struct lnet_text_buf {	    /* tmp struct for parsing routes */
			
 
				-	struct list_head ltb_list;	/* stash on lists */
			
 
				-	int ltb_size;	/* allocated size */
			
 
				-	char ltb_text[0];     /* text buffer */
			
 
				-};
			
 
				-
			
 
				-static int lnet_tbnob;			/* track text buf allocation */
			
 
				-#define LNET_MAX_TEXTBUF_NOB     (64 << 10)	/* bound allocation */
			
 
				-#define LNET_SINGLE_TEXTBUF_NOB  (4 << 10)
			
 
				-
			
 
				-static void
			
 
				-lnet_syntax(char *name, char *str, int offset, int width)
			
 
				-{
			
 
				-	static char dots[LNET_SINGLE_TEXTBUF_NOB];
			
 
				-	static char dashes[LNET_SINGLE_TEXTBUF_NOB];
			
 
				-
			
 
				-	memset(dots, '.', sizeof(dots));
			
 
				-	dots[sizeof(dots) - 1] = 0;
			
 
				-	memset(dashes, '-', sizeof(dashes));
			
 
				-	dashes[sizeof(dashes) - 1] = 0;
			
 
				-
			
 
				-	LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
			
 
				-	LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
			
 
				-			   (int)strlen(name), dots, offset, dots,
			
 
				-			    (width < 1) ? 0 : width - 1, dashes);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_issep(char c)
			
 
				-{
			
 
				-	switch (c) {
			
 
				-	case '\n':
			
 
				-	case '\r':
			
 
				-	case ';':
			
 
				-		return 1;
			
 
				-	default:
			
 
				-		return 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_net_unique(__u32 net, struct list_head *nilist)
			
 
				-{
			
 
				-	struct list_head *tmp;
			
 
				-	struct lnet_ni *ni;
			
 
				-
			
 
				-	list_for_each(tmp, nilist) {
			
 
				-		ni = list_entry(tmp, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		if (LNET_NIDNET(ni->ni_nid) == net)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_ni_free(struct lnet_ni *ni)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (ni->ni_refs)
			
 
				-		cfs_percpt_free(ni->ni_refs);
			
 
				-
			
 
				-	if (ni->ni_tx_queues)
			
 
				-		cfs_percpt_free(ni->ni_tx_queues);
			
 
				-
			
 
				-	if (ni->ni_cpts)
			
 
				-		cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
			
 
				-
			
 
				-	kfree(ni->ni_lnd_tunables);
			
 
				-
			
 
				-	for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++)
			
 
				-		kfree(ni->ni_interfaces[i]);
			
 
				-
			
 
				-	/* release reference to net namespace */
			
 
				-	if (ni->ni_net_ns)
			
 
				-		put_net(ni->ni_net_ns);
			
 
				-
			
 
				-	kfree(ni);
			
 
				-}
			
 
				-
			
 
				-struct lnet_ni *
			
 
				-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
			
 
				-{
			
 
				-	struct lnet_tx_queue *tq;
			
 
				-	struct lnet_ni *ni;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!lnet_net_unique(net, nilist)) {
			
 
				-		LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
			
 
				-				   libcfs_net2str(net));
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	ni = kzalloc(sizeof(*ni), GFP_NOFS);
			
 
				-	if (!ni) {
			
 
				-		CERROR("Out of memory creating network %s\n",
			
 
				-		       libcfs_net2str(net));
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_init(&ni->ni_lock);
			
 
				-	INIT_LIST_HEAD(&ni->ni_cptlist);
			
 
				-	ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-				       sizeof(*ni->ni_refs[0]));
			
 
				-	if (!ni->ni_refs)
			
 
				-		goto failed;
			
 
				-
			
 
				-	ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-					    sizeof(*ni->ni_tx_queues[0]));
			
 
				-	if (!ni->ni_tx_queues)
			
 
				-		goto failed;
			
 
				-
			
 
				-	cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
			
 
				-		INIT_LIST_HEAD(&tq->tq_delayed);
			
 
				-
			
 
				-	if (!el) {
			
 
				-		ni->ni_cpts  = NULL;
			
 
				-		ni->ni_ncpts = LNET_CPT_NUMBER;
			
 
				-	} else {
			
 
				-		rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
			
 
				-		if (rc <= 0) {
			
 
				-			CERROR("Failed to set CPTs for NI %s: %d\n",
			
 
				-			       libcfs_net2str(net), rc);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(rc <= LNET_CPT_NUMBER);
			
 
				-		if (rc == LNET_CPT_NUMBER) {
			
 
				-			cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER);
			
 
				-			ni->ni_cpts = NULL;
			
 
				-		}
			
 
				-
			
 
				-		ni->ni_ncpts = rc;
			
 
				-	}
			
 
				-
			
 
				-	/* LND will fill in the address part of the NID */
			
 
				-	ni->ni_nid = LNET_MKNID(net, 0);
			
 
				-
			
 
				-	/* Store net namespace in which current ni is being created */
			
 
				-	if (current->nsproxy->net_ns)
			
 
				-		ni->ni_net_ns = get_net(current->nsproxy->net_ns);
			
 
				-	else
			
 
				-		ni->ni_net_ns = NULL;
			
 
				-
			
 
				-	ni->ni_last_alive = ktime_get_real_seconds();
			
 
				-	list_add_tail(&ni->ni_list, nilist);
			
 
				-	return ni;
			
 
				- failed:
			
 
				-	lnet_ni_free(ni);
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_parse_networks(struct list_head *nilist, char *networks)
			
 
				-{
			
 
				-	struct cfs_expr_list *el = NULL;
			
 
				-	char *tokens;
			
 
				-	char *str;
			
 
				-	char *tmp;
			
 
				-	struct lnet_ni *ni;
			
 
				-	__u32 net;
			
 
				-	int nnets = 0;
			
 
				-	struct list_head *temp_node;
			
 
				-
			
 
				-	if (!networks) {
			
 
				-		CERROR("networks string is undefined\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
			
 
				-		/* _WAY_ conservative */
			
 
				-		LCONSOLE_ERROR_MSG(0x112,
			
 
				-				   "Can't parse networks: string too long\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	tokens = kstrdup(networks, GFP_KERNEL);
			
 
				-	if (!tokens) {
			
 
				-		CERROR("Can't allocate net tokens\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	tmp = tokens;
			
 
				-	str = tokens;
			
 
				-
			
 
				-	while (str && *str) {
			
 
				-		char *comma = strchr(str, ',');
			
 
				-		char *bracket = strchr(str, '(');
			
 
				-		char *square = strchr(str, '[');
			
 
				-		char *iface;
			
 
				-		int niface;
			
 
				-		int rc;
			
 
				-
			
 
				-		/*
			
 
				-		 * NB we don't check interface conflicts here; it's the LNDs
			
 
				-		 * responsibility (if it cares at all)
			
 
				-		 */
			
 
				-		if (square && (!comma || square < comma)) {
			
 
				-			/*
			
 
				-			 * i.e: o2ib0(ib0)[1,2], number between square
			
 
				-			 * brackets are CPTs this NI needs to be bond
			
 
				-			 */
			
 
				-			if (bracket && bracket > square) {
			
 
				-				tmp = square;
			
 
				-				goto failed_syntax;
			
 
				-			}
			
 
				-
			
 
				-			tmp = strchr(square, ']');
			
 
				-			if (!tmp) {
			
 
				-				tmp = square;
			
 
				-				goto failed_syntax;
			
 
				-			}
			
 
				-
			
 
				-			rc = cfs_expr_list_parse(square, tmp - square + 1,
			
 
				-						 0, LNET_CPT_NUMBER - 1, &el);
			
 
				-			if (rc) {
			
 
				-				tmp = square;
			
 
				-				goto failed_syntax;
			
 
				-			}
			
 
				-
			
 
				-			while (square <= tmp)
			
 
				-				*square++ = ' ';
			
 
				-		}
			
 
				-
			
 
				-		if (!bracket || (comma && comma < bracket)) {
			
 
				-			/* no interface list specified */
			
 
				-
			
 
				-			if (comma)
			
 
				-				*comma++ = 0;
			
 
				-			net = libcfs_str2net(strim(str));
			
 
				-
			
 
				-			if (net == LNET_NIDNET(LNET_NID_ANY)) {
			
 
				-				LCONSOLE_ERROR_MSG(0x113,
			
 
				-						   "Unrecognised network type\n");
			
 
				-				tmp = str;
			
 
				-				goto failed_syntax;
			
 
				-			}
			
 
				-
			
 
				-			if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
			
 
				-			    !lnet_ni_alloc(net, el, nilist))
			
 
				-				goto failed;
			
 
				-
			
 
				-			if (el) {
			
 
				-				cfs_expr_list_free(el);
			
 
				-				el = NULL;
			
 
				-			}
			
 
				-
			
 
				-			str = comma;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		*bracket = 0;
			
 
				-		net = libcfs_str2net(strim(str));
			
 
				-		if (net == LNET_NIDNET(LNET_NID_ANY)) {
			
 
				-			tmp = str;
			
 
				-			goto failed_syntax;
			
 
				-		}
			
 
				-
			
 
				-		ni = lnet_ni_alloc(net, el, nilist);
			
 
				-		if (!ni)
			
 
				-			goto failed;
			
 
				-
			
 
				-		if (el) {
			
 
				-			cfs_expr_list_free(el);
			
 
				-			el = NULL;
			
 
				-		}
			
 
				-
			
 
				-		niface = 0;
			
 
				-		iface = bracket + 1;
			
 
				-
			
 
				-		bracket = strchr(iface, ')');
			
 
				-		if (!bracket) {
			
 
				-			tmp = iface;
			
 
				-			goto failed_syntax;
			
 
				-		}
			
 
				-
			
 
				-		*bracket = 0;
			
 
				-		do {
			
 
				-			comma = strchr(iface, ',');
			
 
				-			if (comma)
			
 
				-				*comma++ = 0;
			
 
				-
			
 
				-			iface = strim(iface);
			
 
				-			if (!*iface) {
			
 
				-				tmp = iface;
			
 
				-				goto failed_syntax;
			
 
				-			}
			
 
				-
			
 
				-			if (niface == LNET_MAX_INTERFACES) {
			
 
				-				LCONSOLE_ERROR_MSG(0x115,
			
 
				-						   "Too many interfaces for net %s\n",
			
 
				-						   libcfs_net2str(net));
			
 
				-				goto failed;
			
 
				-			}
			
 
				-
			
 
				-			/*
			
 
				-			 * Allocate a separate piece of memory and copy
			
 
				-			 * into it the string, so we don't have
			
 
				-			 * a depencency on the tokens string.  This way we
			
 
				-			 * can free the tokens at the end of the function.
			
 
				-			 * The newly allocated ni_interfaces[] can be
			
 
				-			 * freed when freeing the NI
			
 
				-			 */
			
 
				-			ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL);
			
 
				-			if (!ni->ni_interfaces[niface]) {
			
 
				-				CERROR("Can't allocate net interface name\n");
			
 
				-				goto failed;
			
 
				-			}
			
 
				-			niface++;
			
 
				-			iface = comma;
			
 
				-		} while (iface);
			
 
				-
			
 
				-		str = bracket + 1;
			
 
				-		comma = strchr(bracket + 1, ',');
			
 
				-		if (comma) {
			
 
				-			*comma = 0;
			
 
				-			str = strim(str);
			
 
				-			if (*str) {
			
 
				-				tmp = str;
			
 
				-				goto failed_syntax;
			
 
				-			}
			
 
				-			str = comma + 1;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		str = strim(str);
			
 
				-		if (*str) {
			
 
				-			tmp = str;
			
 
				-			goto failed_syntax;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	list_for_each(temp_node, nilist)
			
 
				-		nnets++;
			
 
				-
			
 
				-	kfree(tokens);
			
 
				-	return nnets;
			
 
				-
			
 
				- failed_syntax:
			
 
				-	lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
			
 
				- failed:
			
 
				-	while (!list_empty(nilist)) {
			
 
				-		ni = list_entry(nilist->next, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		list_del(&ni->ni_list);
			
 
				-		lnet_ni_free(ni);
			
 
				-	}
			
 
				-
			
 
				-	if (el)
			
 
				-		cfs_expr_list_free(el);
			
 
				-
			
 
				-	kfree(tokens);
			
 
				-
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_text_buf *
			
 
				-lnet_new_text_buf(int str_len)
			
 
				-{
			
 
				-	struct lnet_text_buf *ltb;
			
 
				-	int nob;
			
 
				-
			
 
				-	/* NB allocate space for the terminating 0 */
			
 
				-	nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]);
			
 
				-	if (nob > LNET_SINGLE_TEXTBUF_NOB) {
			
 
				-		/* _way_ conservative for "route net gateway..." */
			
 
				-		CERROR("text buffer too big\n");
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
			
 
				-		CERROR("Too many text buffers\n");
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	ltb = kzalloc(nob, GFP_KERNEL);
			
 
				-	if (!ltb)
			
 
				-		return NULL;
			
 
				-
			
 
				-	ltb->ltb_size = nob;
			
 
				-	ltb->ltb_text[0] = 0;
			
 
				-	lnet_tbnob += nob;
			
 
				-	return ltb;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_free_text_buf(struct lnet_text_buf *ltb)
			
 
				-{
			
 
				-	lnet_tbnob -= ltb->ltb_size;
			
 
				-	kfree(ltb);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_free_text_bufs(struct list_head *tbs)
			
 
				-{
			
 
				-	struct lnet_text_buf *ltb;
			
 
				-
			
 
				-	while (!list_empty(tbs)) {
			
 
				-		ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
			
 
				-
			
 
				-		list_del(&ltb->ltb_list);
			
 
				-		lnet_free_text_buf(ltb);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_str2tbs_sep(struct list_head *tbs, char *str)
			
 
				-{
			
 
				-	struct list_head pending;
			
 
				-	char *sep;
			
 
				-	int nob;
			
 
				-	int i;
			
 
				-	struct lnet_text_buf *ltb;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&pending);
			
 
				-
			
 
				-	/* Split 'str' into separate commands */
			
 
				-	for (;;) {
			
 
				-		/* skip leading whitespace */
			
 
				-		while (isspace(*str))
			
 
				-			str++;
			
 
				-
			
 
				-		/* scan for separator or comment */
			
 
				-		for (sep = str; *sep; sep++)
			
 
				-			if (lnet_issep(*sep) || *sep == '#')
			
 
				-				break;
			
 
				-
			
 
				-		nob = (int)(sep - str);
			
 
				-		if (nob > 0) {
			
 
				-			ltb = lnet_new_text_buf(nob);
			
 
				-			if (!ltb) {
			
 
				-				lnet_free_text_bufs(&pending);
			
 
				-				return -ENOMEM;
			
 
				-			}
			
 
				-
			
 
				-			for (i = 0; i < nob; i++)
			
 
				-				if (isspace(str[i]))
			
 
				-					ltb->ltb_text[i] = ' ';
			
 
				-				else
			
 
				-					ltb->ltb_text[i] = str[i];
			
 
				-
			
 
				-			ltb->ltb_text[nob] = 0;
			
 
				-
			
 
				-			list_add_tail(&ltb->ltb_list, &pending);
			
 
				-		}
			
 
				-
			
 
				-		if (*sep == '#') {
			
 
				-			/* scan for separator */
			
 
				-			do {
			
 
				-				sep++;
			
 
				-			} while (*sep && !lnet_issep(*sep));
			
 
				-		}
			
 
				-
			
 
				-		if (!*sep)
			
 
				-			break;
			
 
				-
			
 
				-		str = sep + 1;
			
 
				-	}
			
 
				-
			
 
				-	list_splice(&pending, tbs->prev);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_expand1tb(struct list_head *list,
			
 
				-	       char *str, char *sep1, char *sep2,
			
 
				-	       char *item, int itemlen)
			
 
				-{
			
 
				-	int len1 = (int)(sep1 - str);
			
 
				-	int len2 = strlen(sep2 + 1);
			
 
				-	struct lnet_text_buf *ltb;
			
 
				-
			
 
				-	LASSERT(*sep1 == '[');
			
 
				-	LASSERT(*sep2 == ']');
			
 
				-
			
 
				-	ltb = lnet_new_text_buf(len1 + itemlen + len2);
			
 
				-	if (!ltb)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	memcpy(ltb->ltb_text, str, len1);
			
 
				-	memcpy(&ltb->ltb_text[len1], item, itemlen);
			
 
				-	memcpy(&ltb->ltb_text[len1 + itemlen], sep2 + 1, len2);
			
 
				-	ltb->ltb_text[len1 + itemlen + len2] = 0;
			
 
				-
			
 
				-	list_add_tail(&ltb->ltb_list, list);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_str2tbs_expand(struct list_head *tbs, char *str)
			
 
				-{
			
 
				-	char num[16];
			
 
				-	struct list_head pending;
			
 
				-	char *sep;
			
 
				-	char *sep2;
			
 
				-	char *parsed;
			
 
				-	char *enditem;
			
 
				-	int lo;
			
 
				-	int hi;
			
 
				-	int stride;
			
 
				-	int i;
			
 
				-	int nob;
			
 
				-	int scanned;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&pending);
			
 
				-
			
 
				-	sep = strchr(str, '[');
			
 
				-	if (!sep)			/* nothing to expand */
			
 
				-		return 0;
			
 
				-
			
 
				-	sep2 = strchr(sep, ']');
			
 
				-	if (!sep2)
			
 
				-		goto failed;
			
 
				-
			
 
				-	for (parsed = sep; parsed < sep2; parsed = enditem) {
			
 
				-		enditem = ++parsed;
			
 
				-		while (enditem < sep2 && *enditem != ',')
			
 
				-			enditem++;
			
 
				-
			
 
				-		if (enditem == parsed)		/* no empty items */
			
 
				-			goto failed;
			
 
				-
			
 
				-		if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi,
			
 
				-			   &stride, &scanned) < 3) {
			
 
				-			if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
			
 
				-				/* simple string enumeration */
			
 
				-				if (lnet_expand1tb(&pending, str, sep, sep2,
			
 
				-						   parsed,
			
 
				-						   (int)(enditem - parsed))) {
			
 
				-					goto failed;
			
 
				-				}
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			stride = 1;
			
 
				-		}
			
 
				-
			
 
				-		/* range expansion */
			
 
				-
			
 
				-		if (enditem != parsed + scanned) /* no trailing junk */
			
 
				-			goto failed;
			
 
				-
			
 
				-		if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
			
 
				-		    (hi - lo) % stride)
			
 
				-			goto failed;
			
 
				-
			
 
				-		for (i = lo; i <= hi; i += stride) {
			
 
				-			snprintf(num, sizeof(num), "%d", i);
			
 
				-			nob = strlen(num);
			
 
				-			if (nob + 1 == sizeof(num))
			
 
				-				goto failed;
			
 
				-
			
 
				-			if (lnet_expand1tb(&pending, str, sep, sep2,
			
 
				-					   num, nob))
			
 
				-				goto failed;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	list_splice(&pending, tbs->prev);
			
 
				-	return 1;
			
 
				-
			
 
				- failed:
			
 
				-	lnet_free_text_bufs(&pending);
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_hops(char *str, unsigned int *hops)
			
 
				-{
			
 
				-	int len = strlen(str);
			
 
				-	int nob = len;
			
 
				-
			
 
				-	return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
			
 
				-		nob == len &&
			
 
				-		*hops > 0 && *hops < 256);
			
 
				-}
			
 
				-
			
 
				-#define LNET_PRIORITY_SEPARATOR (':')
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_priority(char *str, unsigned int *priority, char **token)
			
 
				-{
			
 
				-	int nob;
			
 
				-	char *sep;
			
 
				-	int len;
			
 
				-
			
 
				-	sep = strchr(str, LNET_PRIORITY_SEPARATOR);
			
 
				-	if (!sep) {
			
 
				-		*priority = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				-	len = strlen(sep + 1);
			
 
				-
			
 
				-	if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) {
			
 
				-		/*
			
 
				-		 * Update the caller's token pointer so it treats the found
			
 
				-		 * priority as the token to report in the error message.
			
 
				-		 */
			
 
				-		*token += sep - str + 1;
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob);
			
 
				-
			
 
				-	/*
			
 
				-	 * Change priority separator to \0 to be able to parse NID
			
 
				-	 */
			
 
				-	*sep = '\0';
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_route(char *str, int *im_a_router)
			
 
				-{
			
 
				-	/* static scratch buffer OK (single threaded) */
			
 
				-	static char cmd[LNET_SINGLE_TEXTBUF_NOB];
			
 
				-
			
 
				-	struct list_head nets;
			
 
				-	struct list_head gateways;
			
 
				-	struct list_head *tmp1;
			
 
				-	struct list_head *tmp2;
			
 
				-	__u32 net;
			
 
				-	lnet_nid_t nid;
			
 
				-	struct lnet_text_buf *ltb;
			
 
				-	int rc;
			
 
				-	char *sep;
			
 
				-	char *token = str;
			
 
				-	int ntokens = 0;
			
 
				-	int myrc = -1;
			
 
				-	__u32 hops;
			
 
				-	int got_hops = 0;
			
 
				-	unsigned int priority = 0;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&gateways);
			
 
				-	INIT_LIST_HEAD(&nets);
			
 
				-
			
 
				-	/* save a copy of the string for error messages */
			
 
				-	strncpy(cmd, str, sizeof(cmd));
			
 
				-	cmd[sizeof(cmd) - 1] = '\0';
			
 
				-
			
 
				-	sep = str;
			
 
				-	for (;;) {
			
 
				-		/* scan for token start */
			
 
				-		while (isspace(*sep))
			
 
				-			sep++;
			
 
				-		if (!*sep) {
			
 
				-			if (ntokens < (got_hops ? 3 : 2))
			
 
				-				goto token_error;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		ntokens++;
			
 
				-		token = sep++;
			
 
				-
			
 
				-		/* scan for token end */
			
 
				-		while (*sep && !isspace(*sep))
			
 
				-			sep++;
			
 
				-		if (*sep)
			
 
				-			*sep++ = 0;
			
 
				-
			
 
				-		if (ntokens == 1) {
			
 
				-			tmp2 = &nets;		/* expanding nets */
			
 
				-		} else if (ntokens == 2 &&
			
 
				-			   lnet_parse_hops(token, &hops)) {
			
 
				-			got_hops = 1;	   /* got a hop count */
			
 
				-			continue;
			
 
				-		} else {
			
 
				-			tmp2 = &gateways;	/* expanding gateways */
			
 
				-		}
			
 
				-
			
 
				-		ltb = lnet_new_text_buf(strlen(token));
			
 
				-		if (!ltb)
			
 
				-			goto out;
			
 
				-
			
 
				-		strcpy(ltb->ltb_text, token);
			
 
				-		tmp1 = &ltb->ltb_list;
			
 
				-		list_add_tail(tmp1, tmp2);
			
 
				-
			
 
				-		while (tmp1 != tmp2) {
			
 
				-			ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
			
 
				-
			
 
				-			rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
			
 
				-			if (rc < 0)
			
 
				-				goto token_error;
			
 
				-
			
 
				-			tmp1 = tmp1->next;
			
 
				-
			
 
				-			if (rc > 0) {		/* expanded! */
			
 
				-				list_del(&ltb->ltb_list);
			
 
				-				lnet_free_text_buf(ltb);
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			if (ntokens == 1) {
			
 
				-				net = libcfs_str2net(ltb->ltb_text);
			
 
				-				if (net == LNET_NIDNET(LNET_NID_ANY) ||
			
 
				-				    LNET_NETTYP(net) == LOLND)
			
 
				-					goto token_error;
			
 
				-			} else {
			
 
				-				rc = lnet_parse_priority(ltb->ltb_text,
			
 
				-							 &priority, &token);
			
 
				-				if (rc < 0)
			
 
				-					goto token_error;
			
 
				-
			
 
				-				nid = libcfs_str2nid(ltb->ltb_text);
			
 
				-				if (nid == LNET_NID_ANY ||
			
 
				-				    LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
			
 
				-					goto token_error;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/**
			
 
				-	 * if there are no hops set then we want to flag this value as
			
 
				-	 * unset since hops is an optional parameter
			
 
				-	 */
			
 
				-	if (!got_hops)
			
 
				-		hops = LNET_UNDEFINED_HOPS;
			
 
				-
			
 
				-	LASSERT(!list_empty(&nets));
			
 
				-	LASSERT(!list_empty(&gateways));
			
 
				-
			
 
				-	list_for_each(tmp1, &nets) {
			
 
				-		ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
			
 
				-		net = libcfs_str2net(ltb->ltb_text);
			
 
				-		LASSERT(net != LNET_NIDNET(LNET_NID_ANY));
			
 
				-
			
 
				-		list_for_each(tmp2, &gateways) {
			
 
				-			ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list);
			
 
				-			nid = libcfs_str2nid(ltb->ltb_text);
			
 
				-			LASSERT(nid != LNET_NID_ANY);
			
 
				-
			
 
				-			if (lnet_islocalnid(nid)) {
			
 
				-				*im_a_router = 1;
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			rc = lnet_add_route(net, hops, nid, priority);
			
 
				-			if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) {
			
 
				-				CERROR("Can't create route to %s via %s\n",
			
 
				-				       libcfs_net2str(net),
			
 
				-				       libcfs_nid2str(nid));
			
 
				-				goto out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	myrc = 0;
			
 
				-	goto out;
			
 
				-
			
 
				- token_error:
			
 
				-	lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
			
 
				- out:
			
 
				-	lnet_free_text_bufs(&nets);
			
 
				-	lnet_free_text_bufs(&gateways);
			
 
				-	return myrc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
			
 
				-{
			
 
				-	struct lnet_text_buf *ltb;
			
 
				-
			
 
				-	while (!list_empty(tbs)) {
			
 
				-		ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
			
 
				-
			
 
				-		if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
			
 
				-			lnet_free_text_bufs(tbs);
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				-
			
 
				-		list_del(&ltb->ltb_list);
			
 
				-		lnet_free_text_buf(ltb);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_parse_routes(char *routes, int *im_a_router)
			
 
				-{
			
 
				-	struct list_head tbs;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	*im_a_router = 0;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&tbs);
			
 
				-
			
 
				-	if (lnet_str2tbs_sep(&tbs, routes) < 0) {
			
 
				-		CERROR("Error parsing routes\n");
			
 
				-		rc = -EINVAL;
			
 
				-	} else {
			
 
				-		rc = lnet_parse_route_tbs(&tbs, im_a_router);
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!lnet_tbnob);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
			
 
				-{
			
 
				-	LIST_HEAD(list);
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	rc = cfs_ip_addr_parse(token, len, &list);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	for (rc = i = 0; !rc && i < nip; i++)
			
 
				-		rc = cfs_ip_addr_match(ipaddrs[i], &list);
			
 
				-
			
 
				-	cfs_expr_list_free_list(&list);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
			
 
				-{
			
 
				-	static char tokens[LNET_SINGLE_TEXTBUF_NOB];
			
 
				-
			
 
				-	int matched = 0;
			
 
				-	int ntokens = 0;
			
 
				-	int len;
			
 
				-	char *net = NULL;
			
 
				-	char *sep;
			
 
				-	char *token;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(strlen(net_entry) < sizeof(tokens));
			
 
				-
			
 
				-	/* work on a copy of the string */
			
 
				-	strcpy(tokens, net_entry);
			
 
				-	sep = tokens;
			
 
				-	for (;;) {
			
 
				-		/* scan for token start */
			
 
				-		while (isspace(*sep))
			
 
				-			sep++;
			
 
				-		if (!*sep)
			
 
				-			break;
			
 
				-
			
 
				-		token = sep++;
			
 
				-
			
 
				-		/* scan for token end */
			
 
				-		while (*sep && !isspace(*sep))
			
 
				-			sep++;
			
 
				-		if (*sep)
			
 
				-			*sep++ = 0;
			
 
				-
			
 
				-		if (!ntokens++) {
			
 
				-			net = token;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		len = strlen(token);
			
 
				-
			
 
				-		rc = lnet_match_network_token(token, len, ipaddrs, nip);
			
 
				-		if (rc < 0) {
			
 
				-			lnet_syntax("ip2nets", net_entry,
			
 
				-				    (int)(token - tokens), len);
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		if (rc)
			
 
				-			matched |= 1;
			
 
				-	}
			
 
				-
			
 
				-	if (!matched)
			
 
				-		return 0;
			
 
				-
			
 
				-	strcpy(net_entry, net);		 /* replace with matched net */
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static __u32
			
 
				-lnet_netspec2net(char *netspec)
			
 
				-{
			
 
				-	char *bracket = strchr(netspec, '(');
			
 
				-	__u32 net;
			
 
				-
			
 
				-	if (bracket)
			
 
				-		*bracket = 0;
			
 
				-
			
 
				-	net = libcfs_str2net(netspec);
			
 
				-
			
 
				-	if (bracket)
			
 
				-		*bracket = '(';
			
 
				-
			
 
				-	return net;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_splitnets(char *source, struct list_head *nets)
			
 
				-{
			
 
				-	int offset = 0;
			
 
				-	int offset2;
			
 
				-	int len;
			
 
				-	struct lnet_text_buf *tb;
			
 
				-	struct lnet_text_buf *tb2;
			
 
				-	struct list_head *t;
			
 
				-	char *sep;
			
 
				-	char *bracket;
			
 
				-	__u32 net;
			
 
				-
			
 
				-	LASSERT(!list_empty(nets));
			
 
				-	LASSERT(nets->next == nets->prev);     /* single entry */
			
 
				-
			
 
				-	tb = list_entry(nets->next, struct lnet_text_buf, ltb_list);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		sep = strchr(tb->ltb_text, ',');
			
 
				-		bracket = strchr(tb->ltb_text, '(');
			
 
				-
			
 
				-		if (sep && bracket && bracket < sep) {
			
 
				-			/* netspec lists interfaces... */
			
 
				-
			
 
				-			offset2 = offset + (int)(bracket - tb->ltb_text);
			
 
				-			len = strlen(bracket);
			
 
				-
			
 
				-			bracket = strchr(bracket + 1, ')');
			
 
				-
			
 
				-			if (!bracket ||
			
 
				-			    !(bracket[1] == ',' || !bracket[1])) {
			
 
				-				lnet_syntax("ip2nets", source, offset2, len);
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-
			
 
				-			sep = !bracket[1] ? NULL : bracket + 1;
			
 
				-		}
			
 
				-
			
 
				-		if (sep)
			
 
				-			*sep++ = 0;
			
 
				-
			
 
				-		net = lnet_netspec2net(tb->ltb_text);
			
 
				-		if (net == LNET_NIDNET(LNET_NID_ANY)) {
			
 
				-			lnet_syntax("ip2nets", source, offset,
			
 
				-				    strlen(tb->ltb_text));
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				-
			
 
				-		list_for_each(t, nets) {
			
 
				-			tb2 = list_entry(t, struct lnet_text_buf, ltb_list);
			
 
				-
			
 
				-			if (tb2 == tb)
			
 
				-				continue;
			
 
				-
			
 
				-			if (net == lnet_netspec2net(tb2->ltb_text)) {
			
 
				-				/* duplicate network */
			
 
				-				lnet_syntax("ip2nets", source, offset,
			
 
				-					    strlen(tb->ltb_text));
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if (!sep)
			
 
				-			return 0;
			
 
				-
			
 
				-		offset += (int)(sep - tb->ltb_text);
			
 
				-		len = strlen(sep);
			
 
				-		tb2 = lnet_new_text_buf(len);
			
 
				-		if (!tb2)
			
 
				-			return -ENOMEM;
			
 
				-
			
 
				-		strncpy(tb2->ltb_text, sep, len);
			
 
				-		tb2->ltb_text[len] = '\0';
			
 
				-		list_add_tail(&tb2->ltb_list, nets);
			
 
				-
			
 
				-		tb = tb2;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
			
 
				-{
			
 
				-	static char networks[LNET_SINGLE_TEXTBUF_NOB];
			
 
				-	static char source[LNET_SINGLE_TEXTBUF_NOB];
			
 
				-
			
 
				-	struct list_head raw_entries;
			
 
				-	struct list_head matched_nets;
			
 
				-	struct list_head current_nets;
			
 
				-	struct list_head *t;
			
 
				-	struct list_head *t2;
			
 
				-	struct lnet_text_buf *tb;
			
 
				-	struct lnet_text_buf *temp;
			
 
				-	struct lnet_text_buf *tb2;
			
 
				-	__u32 net1;
			
 
				-	__u32 net2;
			
 
				-	int len;
			
 
				-	int count;
			
 
				-	int dup;
			
 
				-	int rc;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&raw_entries);
			
 
				-	if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
			
 
				-		CERROR("Error parsing ip2nets\n");
			
 
				-		LASSERT(!lnet_tbnob);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&matched_nets);
			
 
				-	INIT_LIST_HEAD(&current_nets);
			
 
				-	networks[0] = 0;
			
 
				-	count = 0;
			
 
				-	len = 0;
			
 
				-	rc = 0;
			
 
				-
			
 
				-	list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) {
			
 
				-		strncpy(source, tb->ltb_text, sizeof(source));
			
 
				-		source[sizeof(source) - 1] = '\0';
			
 
				-
			
 
				-		/* replace ltb_text with the network(s) add on match */
			
 
				-		rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
			
 
				-		if (rc < 0)
			
 
				-			break;
			
 
				-
			
 
				-		list_del(&tb->ltb_list);
			
 
				-
			
 
				-		if (!rc) {		  /* no match */
			
 
				-			lnet_free_text_buf(tb);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* split into separate networks */
			
 
				-		INIT_LIST_HEAD(&current_nets);
			
 
				-		list_add(&tb->ltb_list, &current_nets);
			
 
				-		rc = lnet_splitnets(source, &current_nets);
			
 
				-		if (rc < 0)
			
 
				-			break;
			
 
				-
			
 
				-		dup = 0;
			
 
				-		list_for_each(t, &current_nets) {
			
 
				-			tb = list_entry(t, struct lnet_text_buf, ltb_list);
			
 
				-			net1 = lnet_netspec2net(tb->ltb_text);
			
 
				-			LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY));
			
 
				-
			
 
				-			list_for_each(t2, &matched_nets) {
			
 
				-				tb2 = list_entry(t2, struct lnet_text_buf,
			
 
				-						 ltb_list);
			
 
				-				net2 = lnet_netspec2net(tb2->ltb_text);
			
 
				-				LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY));
			
 
				-
			
 
				-				if (net1 == net2) {
			
 
				-					dup = 1;
			
 
				-					break;
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-			if (dup)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		if (dup) {
			
 
				-			lnet_free_text_bufs(&current_nets);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		list_for_each_safe(t, t2, &current_nets) {
			
 
				-			tb = list_entry(t, struct lnet_text_buf, ltb_list);
			
 
				-
			
 
				-			list_del(&tb->ltb_list);
			
 
				-			list_add_tail(&tb->ltb_list, &matched_nets);
			
 
				-
			
 
				-			len += snprintf(networks + len, sizeof(networks) - len,
			
 
				-					"%s%s", !len ? "" : ",",
			
 
				-					tb->ltb_text);
			
 
				-
			
 
				-			if (len >= sizeof(networks)) {
			
 
				-				CERROR("Too many matched networks\n");
			
 
				-				rc = -E2BIG;
			
 
				-				goto out;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				- out:
			
 
				-	lnet_free_text_bufs(&raw_entries);
			
 
				-	lnet_free_text_bufs(&matched_nets);
			
 
				-	lnet_free_text_bufs(&current_nets);
			
 
				-	LASSERT(!lnet_tbnob);
			
 
				-
			
 
				-	if (rc < 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	*networksp = networks;
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ipaddr_enumerate(__u32 **ipaddrsp)
			
 
				-{
			
 
				-	int up;
			
 
				-	__u32 netmask;
			
 
				-	__u32 *ipaddrs;
			
 
				-	__u32 *ipaddrs2;
			
 
				-	int nip;
			
 
				-	char **ifnames;
			
 
				-	int nif = lnet_ipif_enumerate(&ifnames);
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (nif <= 0)
			
 
				-		return nif;
			
 
				-
			
 
				-	ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL);
			
 
				-	if (!ipaddrs) {
			
 
				-		CERROR("Can't allocate ipaddrs[%d]\n", nif);
			
 
				-		lnet_ipif_free_enumeration(ifnames, nif);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	for (i = nip = 0; i < nif; i++) {
			
 
				-		if (!strcmp(ifnames[i], "lo"))
			
 
				-			continue;
			
 
				-
			
 
				-		rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask);
			
 
				-		if (rc) {
			
 
				-			CWARN("Can't query interface %s: %d\n",
			
 
				-			      ifnames[i], rc);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (!up) {
			
 
				-			CWARN("Ignoring interface %s: it's down\n",
			
 
				-			      ifnames[i]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		nip++;
			
 
				-	}
			
 
				-
			
 
				-	lnet_ipif_free_enumeration(ifnames, nif);
			
 
				-
			
 
				-	if (nip == nif) {
			
 
				-		*ipaddrsp = ipaddrs;
			
 
				-	} else {
			
 
				-		if (nip > 0) {
			
 
				-			ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2),
			
 
				-					   GFP_KERNEL);
			
 
				-			if (!ipaddrs2) {
			
 
				-				CERROR("Can't allocate ipaddrs[%d]\n", nip);
			
 
				-				nip = -ENOMEM;
			
 
				-			} else {
			
 
				-				memcpy(ipaddrs2, ipaddrs,
			
 
				-				       nip * sizeof(*ipaddrs));
			
 
				-				*ipaddrsp = ipaddrs2;
			
 
				-				rc = nip;
			
 
				-			}
			
 
				-		}
			
 
				-		kfree(ipaddrs);
			
 
				-	}
			
 
				-	return nip;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_parse_ip2nets(char **networksp, char *ip2nets)
			
 
				-{
			
 
				-	__u32 *ipaddrs = NULL;
			
 
				-	int nip = lnet_ipaddr_enumerate(&ipaddrs);
			
 
				-	int rc;
			
 
				-
			
 
				-	if (nip < 0) {
			
 
				-		LCONSOLE_ERROR_MSG(0x117,
			
 
				-				   "Error %d enumerating local IP interfaces for ip2nets to match\n",
			
 
				-				   nip);
			
 
				-		return nip;
			
 
				-	}
			
 
				-
			
 
				-	if (!nip) {
			
 
				-		LCONSOLE_ERROR_MSG(0x118,
			
 
				-				   "No local IP interfaces for ip2nets to match\n");
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
			
 
				-	kfree(ipaddrs);
			
 
				-
			
 
				-	if (rc < 0) {
			
 
				-		LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!rc) {
			
 
				-		LCONSOLE_ERROR_MSG(0x11a,
			
 
				-				   "ip2nets does not match any local IP interfaces\n");
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
@@ -1,426 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/lib-eq.c
			
 
				- *
			
 
				- * Library level Event queue management routines
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-/**
			
 
				- * Create an event queue that has room for \a count number of events.
			
 
				- *
			
 
				- * The event queue is circular and older events will be overwritten by new
			
 
				- * ones if they are not removed in time by the user using the functions
			
 
				- * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
			
 
				- * determine the appropriate size of the event queue to prevent this loss
			
 
				- * of events. Note that when EQ handler is specified in \a callback, no
			
 
				- * event loss can happen, since the handler is run for each event deposited
			
 
				- * into the EQ.
			
 
				- *
			
 
				- * \param count The number of events to be stored in the event queue. It
			
 
				- * will be rounded up to the next power of two.
			
 
				- * \param callback A handler function that runs when an event is deposited
			
 
				- * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
			
 
				- * indicate that no event handler is desired.
			
 
				- * \param handle On successful return, this location will hold a handle for
			
 
				- * the newly created EQ.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -EINVAL If an parameter is not valid.
			
 
				- * \retval -ENOMEM If memory for the EQ can't be allocated.
			
 
				- *
			
 
				- * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
			
 
				- */
			
 
				-int
			
 
				-LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
			
 
				-	    struct lnet_handle_eq *handle)
			
 
				-{
			
 
				-	struct lnet_eq *eq;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * We need count to be a power of 2 so that when eq_{enq,deq}_seq
			
 
				-	 * overflow, they don't skip entries, so the queue has the same
			
 
				-	 * apparent capacity at all times
			
 
				-	 */
			
 
				-	if (count)
			
 
				-		count = roundup_pow_of_two(count);
			
 
				-
			
 
				-	if (callback != LNET_EQ_HANDLER_NONE && count)
			
 
				-		CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count);
			
 
				-
			
 
				-	/*
			
 
				-	 * count can be 0 if only need callback, we can eliminate
			
 
				-	 * overhead of enqueue event
			
 
				-	 */
			
 
				-	if (!count && callback == LNET_EQ_HANDLER_NONE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	eq = kzalloc(sizeof(*eq), GFP_NOFS);
			
 
				-	if (!eq)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	if (count) {
			
 
				-		eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event),
			
 
				-					       GFP_KERNEL | __GFP_ZERO);
			
 
				-		if (!eq->eq_events)
			
 
				-			goto failed;
			
 
				-		/*
			
 
				-		 * NB allocator has set all event sequence numbers to 0,
			
 
				-		 * so all them should be earlier than eq_deq_seq
			
 
				-		 */
			
 
				-	}
			
 
				-
			
 
				-	eq->eq_deq_seq = 1;
			
 
				-	eq->eq_enq_seq = 1;
			
 
				-	eq->eq_size = count;
			
 
				-	eq->eq_callback = callback;
			
 
				-
			
 
				-	eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-				       sizeof(*eq->eq_refs[0]));
			
 
				-	if (!eq->eq_refs)
			
 
				-		goto failed;
			
 
				-
			
 
				-	/* MUST hold both exclusive lnet_res_lock */
			
 
				-	lnet_res_lock(LNET_LOCK_EX);
			
 
				-	/*
			
 
				-	 * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
			
 
				-	 * both EQ lookup and poll event with only lnet_eq_wait_lock
			
 
				-	 */
			
 
				-	lnet_eq_wait_lock();
			
 
				-
			
 
				-	lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
			
 
				-	list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
			
 
				-
			
 
				-	lnet_eq_wait_unlock();
			
 
				-	lnet_res_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	lnet_eq2handle(handle, eq);
			
 
				-	return 0;
			
 
				-
			
 
				-failed:
			
 
				-	kvfree(eq->eq_events);
			
 
				-
			
 
				-	if (eq->eq_refs)
			
 
				-		cfs_percpt_free(eq->eq_refs);
			
 
				-
			
 
				-	kfree(eq);
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetEQAlloc);
			
 
				-
			
 
				-/**
			
 
				- * Release the resources associated with an event queue if it's idle;
			
 
				- * otherwise do nothing and it's up to the user to try again.
			
 
				- *
			
 
				- * \param eqh A handle for the event queue to be released.
			
 
				- *
			
 
				- * \retval 0 If the EQ is not in use and freed.
			
 
				- * \retval -ENOENT If \a eqh does not point to a valid EQ.
			
 
				- * \retval -EBUSY  If the EQ is still in use by some MDs.
			
 
				- */
			
 
				-int
			
 
				-LNetEQFree(struct lnet_handle_eq eqh)
			
 
				-{
			
 
				-	struct lnet_eq *eq;
			
 
				-	struct lnet_event *events = NULL;
			
 
				-	int **refs = NULL;
			
 
				-	int *ref;
			
 
				-	int rc = 0;
			
 
				-	int size = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	lnet_res_lock(LNET_LOCK_EX);
			
 
				-	/*
			
 
				-	 * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
			
 
				-	 * both EQ lookup and poll event with only lnet_eq_wait_lock
			
 
				-	 */
			
 
				-	lnet_eq_wait_lock();
			
 
				-
			
 
				-	eq = lnet_handle2eq(&eqh);
			
 
				-	if (!eq) {
			
 
				-		rc = -ENOENT;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(ref, i, eq->eq_refs) {
			
 
				-		LASSERT(*ref >= 0);
			
 
				-		if (!*ref)
			
 
				-			continue;
			
 
				-
			
 
				-		CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
			
 
				-		       i, *ref);
			
 
				-		rc = -EBUSY;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/* stash for free after lock dropped */
			
 
				-	events = eq->eq_events;
			
 
				-	size = eq->eq_size;
			
 
				-	refs = eq->eq_refs;
			
 
				-
			
 
				-	lnet_res_lh_invalidate(&eq->eq_lh);
			
 
				-	list_del(&eq->eq_list);
			
 
				-	kfree(eq);
			
 
				- out:
			
 
				-	lnet_eq_wait_unlock();
			
 
				-	lnet_res_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	kvfree(events);
			
 
				-	if (refs)
			
 
				-		cfs_percpt_free(refs);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetEQFree);
			
 
				-
			
 
				-void
			
 
				-lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev)
			
 
				-{
			
 
				-	/* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
			
 
				-	int index;
			
 
				-
			
 
				-	if (!eq->eq_size) {
			
 
				-		LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
			
 
				-		eq->eq_callback(ev);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	lnet_eq_wait_lock();
			
 
				-	ev->sequence = eq->eq_enq_seq++;
			
 
				-
			
 
				-	LASSERT(is_power_of_2(eq->eq_size));
			
 
				-	index = ev->sequence & (eq->eq_size - 1);
			
 
				-
			
 
				-	eq->eq_events[index] = *ev;
			
 
				-
			
 
				-	if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
			
 
				-		eq->eq_callback(ev);
			
 
				-
			
 
				-	/* Wake anyone waiting in LNetEQPoll() */
			
 
				-	if (waitqueue_active(&the_lnet.ln_eq_waitq))
			
 
				-		wake_up_all(&the_lnet.ln_eq_waitq);
			
 
				-	lnet_eq_wait_unlock();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev)
			
 
				-{
			
 
				-	int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
			
 
				-	struct lnet_event *new_event = &eq->eq_events[new_index];
			
 
				-	int rc;
			
 
				-
			
 
				-	/* must called with lnet_eq_wait_lock hold */
			
 
				-	if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
			
 
				-		return 0;
			
 
				-
			
 
				-	/* We've got a new event... */
			
 
				-	*ev = *new_event;
			
 
				-
			
 
				-	CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
			
 
				-	       new_event, eq->eq_deq_seq, eq->eq_size);
			
 
				-
			
 
				-	/* ...but did it overwrite an event we've not seen yet? */
			
 
				-	if (eq->eq_deq_seq == new_event->sequence) {
			
 
				-		rc = 1;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * don't complain with CERROR: some EQs are sized small
			
 
				-		 * anyway; if it's important, the caller should complain
			
 
				-		 */
			
 
				-		CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
			
 
				-		       eq->eq_deq_seq, new_event->sequence);
			
 
				-		rc = -EOVERFLOW;
			
 
				-	}
			
 
				-
			
 
				-	eq->eq_deq_seq = new_event->sequence + 1;
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * A nonblocking function that can be used to get the next event in an EQ.
			
 
				- * If an event handler is associated with the EQ, the handler will run before
			
 
				- * this function returns successfully. The event is removed from the queue.
			
 
				- *
			
 
				- * \param eventq A handle for the event queue.
			
 
				- * \param event On successful return (1 or -EOVERFLOW), this location will
			
 
				- * hold the next event in the EQ.
			
 
				- *
			
 
				- * \retval 0	  No pending event in the EQ.
			
 
				- * \retval 1	  Indicates success.
			
 
				- * \retval -ENOENT    If \a eventq does not point to a valid EQ.
			
 
				- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
			
 
				- * at least one event between this event and the last event obtained from the
			
 
				- * EQ has been dropped due to limited space in the EQ.
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Block the calling process until there is an event in the EQ.
			
 
				- * If an event handler is associated with the EQ, the handler will run before
			
 
				- * this function returns successfully. This function returns the next event
			
 
				- * in the EQ and removes it from the EQ.
			
 
				- *
			
 
				- * \param eventq A handle for the event queue.
			
 
				- * \param event On successful return (1 or -EOVERFLOW), this location will
			
 
				- * hold the next event in the EQ.
			
 
				- *
			
 
				- * \retval 1	  Indicates success.
			
 
				- * \retval -ENOENT    If \a eventq does not point to a valid EQ.
			
 
				- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
			
 
				- * at least one event between this event and the last event obtained from the
			
 
				- * EQ has been dropped due to limited space in the EQ.
			
 
				- */
			
 
				-
			
 
				-static int
			
 
				-lnet_eq_wait_locked(int *timeout_ms, long state)
			
 
				-__must_hold(&the_lnet.ln_eq_wait_lock)
			
 
				-{
			
 
				-	int tms = *timeout_ms;
			
 
				-	int wait;
			
 
				-	wait_queue_entry_t wl;
			
 
				-	unsigned long now;
			
 
				-
			
 
				-	if (!tms)
			
 
				-		return -ENXIO; /* don't want to wait and no new event */
			
 
				-
			
 
				-	init_waitqueue_entry(&wl, current);
			
 
				-	set_current_state(state);
			
 
				-	add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
			
 
				-
			
 
				-	lnet_eq_wait_unlock();
			
 
				-
			
 
				-	if (tms < 0) {
			
 
				-		schedule();
			
 
				-	} else {
			
 
				-		now = jiffies;
			
 
				-		schedule_timeout(msecs_to_jiffies(tms));
			
 
				-		tms -= jiffies_to_msecs(jiffies - now);
			
 
				-		if (tms < 0) /* no more wait but may have new event */
			
 
				-			tms = 0;
			
 
				-	}
			
 
				-
			
 
				-	wait = tms; /* might need to call here again */
			
 
				-	*timeout_ms = tms;
			
 
				-
			
 
				-	lnet_eq_wait_lock();
			
 
				-	remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
			
 
				-
			
 
				-	return wait;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Block the calling process until there's an event from a set of EQs or
			
 
				- * timeout happens.
			
 
				- *
			
 
				- * If an event handler is associated with the EQ, the handler will run before
			
 
				- * this function returns successfully, in which case the corresponding event
			
 
				- * is consumed.
			
 
				- *
			
 
				- * LNetEQPoll() provides a timeout to allow applications to poll, block for a
			
 
				- * fixed period, or block indefinitely.
			
 
				- *
			
 
				- * \param eventqs,neq An array of EQ handles, and size of the array.
			
 
				- * \param timeout_ms Time in milliseconds to wait for an event to occur on
			
 
				- * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
			
 
				- * infinite timeout.
			
 
				- * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD
			
 
				- * \param event,which On successful return (1 or -EOVERFLOW), \a event will
			
 
				- * hold the next event in the EQs, and \a which will contain the index of the
			
 
				- * EQ from which the event was taken.
			
 
				- *
			
 
				- * \retval 0	  No pending event in the EQs after timeout.
			
 
				- * \retval 1	  Indicates success.
			
 
				- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
			
 
				- * at least one event between this event and the last event obtained from the
			
 
				- * EQ indicated by \a which has been dropped due to limited space in the EQ.
			
 
				- * \retval -ENOENT    If there's an invalid handle in \a eventqs.
			
 
				- */
			
 
				-int
			
 
				-LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms,
			
 
				-	   int interruptible,
			
 
				-	   struct lnet_event *event, int *which)
			
 
				-{
			
 
				-	int wait = 1;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (neq < 1)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	lnet_eq_wait_lock();
			
 
				-
			
 
				-	for (;;) {
			
 
				-		for (i = 0; i < neq; i++) {
			
 
				-			struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]);
			
 
				-
			
 
				-			if (!eq) {
			
 
				-				lnet_eq_wait_unlock();
			
 
				-				return -ENOENT;
			
 
				-			}
			
 
				-
			
 
				-			rc = lnet_eq_dequeue_event(eq, event);
			
 
				-			if (rc) {
			
 
				-				lnet_eq_wait_unlock();
			
 
				-				*which = i;
			
 
				-				return rc;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if (!wait)
			
 
				-			break;
			
 
				-
			
 
				-		/*
			
 
				-		 * return value of lnet_eq_wait_locked:
			
 
				-		 * -1 : did nothing and it's sure no new event
			
 
				-		 *  1 : sleep inside and wait until new event
			
 
				-		 *  0 : don't want to wait anymore, but might have new event
			
 
				-		 *      so need to call dequeue again
			
 
				-		 */
			
 
				-		wait = lnet_eq_wait_locked(&timeout_ms,
			
 
				-					   interruptible ? TASK_INTERRUPTIBLE
			
 
				-					   : TASK_NOLOAD);
			
 
				-		if (wait < 0) /* no new event */
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	lnet_eq_wait_unlock();
			
 
				-	return 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -1,463 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/lib-md.c
			
 
				- *
			
 
				- * Memory Descriptor management routines
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-/* must be called with lnet_res_lock held */
			
 
				-void
			
 
				-lnet_md_unlink(struct lnet_libmd *md)
			
 
				-{
			
 
				-	if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) {
			
 
				-		/* first unlink attempt... */
			
 
				-		struct lnet_me *me = md->md_me;
			
 
				-
			
 
				-		md->md_flags |= LNET_MD_FLAG_ZOMBIE;
			
 
				-
			
 
				-		/*
			
 
				-		 * Disassociate from ME (if any),
			
 
				-		 * and unlink it if it was created
			
 
				-		 * with LNET_UNLINK
			
 
				-		 */
			
 
				-		if (me) {
			
 
				-			/* detach MD from portal */
			
 
				-			lnet_ptl_detach_md(me, md);
			
 
				-			if (me->me_unlink == LNET_UNLINK)
			
 
				-				lnet_me_unlink(me);
			
 
				-		}
			
 
				-
			
 
				-		/* ensure all future handle lookups fail */
			
 
				-		lnet_res_lh_invalidate(&md->md_lh);
			
 
				-	}
			
 
				-
			
 
				-	if (md->md_refcount) {
			
 
				-		CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Unlinking md %p\n", md);
			
 
				-
			
 
				-	if (md->md_eq) {
			
 
				-		int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
			
 
				-
			
 
				-		LASSERT(*md->md_eq->eq_refs[cpt] > 0);
			
 
				-		(*md->md_eq->eq_refs[cpt])--;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!list_empty(&md->md_list));
			
 
				-	list_del_init(&md->md_list);
			
 
				-	kfree(md);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink)
			
 
				-{
			
 
				-	int i;
			
 
				-	unsigned int niov;
			
 
				-	int total_length = 0;
			
 
				-
			
 
				-	lmd->md_me = NULL;
			
 
				-	lmd->md_start = umd->start;
			
 
				-	lmd->md_offset = 0;
			
 
				-	lmd->md_max_size = umd->max_size;
			
 
				-	lmd->md_options = umd->options;
			
 
				-	lmd->md_user_ptr = umd->user_ptr;
			
 
				-	lmd->md_eq = NULL;
			
 
				-	lmd->md_threshold = umd->threshold;
			
 
				-	lmd->md_refcount = 0;
			
 
				-	lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
			
 
				-
			
 
				-	if (umd->options & LNET_MD_IOVEC) {
			
 
				-		if (umd->options & LNET_MD_KIOV) /* Can't specify both */
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		niov = umd->length;
			
 
				-		lmd->md_niov = umd->length;
			
 
				-		memcpy(lmd->md_iov.iov, umd->start,
			
 
				-		       niov * sizeof(lmd->md_iov.iov[0]));
			
 
				-
			
 
				-		for (i = 0; i < (int)niov; i++) {
			
 
				-			/* We take the base address on trust */
			
 
				-			/* invalid length */
			
 
				-			if (lmd->md_iov.iov[i].iov_len <= 0)
			
 
				-				return -EINVAL;
			
 
				-
			
 
				-			total_length += lmd->md_iov.iov[i].iov_len;
			
 
				-		}
			
 
				-
			
 
				-		lmd->md_length = total_length;
			
 
				-
			
 
				-		if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */
			
 
				-		    (umd->max_size < 0 ||
			
 
				-		     umd->max_size > total_length)) /* illegal max_size */
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-	} else if (umd->options & LNET_MD_KIOV) {
			
 
				-		niov = umd->length;
			
 
				-		lmd->md_niov = umd->length;
			
 
				-		memcpy(lmd->md_iov.kiov, umd->start,
			
 
				-		       niov * sizeof(lmd->md_iov.kiov[0]));
			
 
				-
			
 
				-		for (i = 0; i < (int)niov; i++) {
			
 
				-			/* We take the page pointer on trust */
			
 
				-			if (lmd->md_iov.kiov[i].bv_offset +
			
 
				-			    lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
			
 
				-				return -EINVAL; /* invalid length */
			
 
				-
			
 
				-			total_length += lmd->md_iov.kiov[i].bv_len;
			
 
				-		}
			
 
				-
			
 
				-		lmd->md_length = total_length;
			
 
				-
			
 
				-		if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
			
 
				-		    (umd->max_size < 0 ||
			
 
				-		     umd->max_size > total_length)) /* illegal max_size */
			
 
				-			return -EINVAL;
			
 
				-	} else {   /* contiguous */
			
 
				-		lmd->md_length = umd->length;
			
 
				-		niov = 1;
			
 
				-		lmd->md_niov = 1;
			
 
				-		lmd->md_iov.iov[0].iov_base = umd->start;
			
 
				-		lmd->md_iov.iov[0].iov_len = umd->length;
			
 
				-
			
 
				-		if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
			
 
				-		    (umd->max_size < 0 ||
			
 
				-		     umd->max_size > (int)umd->length)) /* illegal max_size */
			
 
				-			return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/* must be called with resource lock held */
			
 
				-static int
			
 
				-lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt)
			
 
				-{
			
 
				-	struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
			
 
				-
			
 
				-	/*
			
 
				-	 * NB we are passed an allocated, but inactive md.
			
 
				-	 * if we return success, caller may lnet_md_unlink() it.
			
 
				-	 * otherwise caller may only kfree() it.
			
 
				-	 */
			
 
				-	/*
			
 
				-	 * This implementation doesn't know how to create START events or
			
 
				-	 * disable END events.  Best to LASSERT our caller is compliant so
			
 
				-	 * we find out quickly...
			
 
				-	 */
			
 
				-	/*
			
 
				-	 * TODO - reevaluate what should be here in light of
			
 
				-	 * the removal of the start and end events
			
 
				-	 * maybe there we shouldn't even allow LNET_EQ_NONE!)
			
 
				-	 * LASSERT(!eq);
			
 
				-	 */
			
 
				-	if (!LNetEQHandleIsInvalid(eq_handle)) {
			
 
				-		md->md_eq = lnet_handle2eq(&eq_handle);
			
 
				-
			
 
				-		if (!md->md_eq)
			
 
				-			return -ENOENT;
			
 
				-
			
 
				-		(*md->md_eq->eq_refs[cpt])++;
			
 
				-	}
			
 
				-
			
 
				-	lnet_res_lh_initialize(container, &md->md_lh);
			
 
				-
			
 
				-	LASSERT(list_empty(&md->md_list));
			
 
				-	list_add(&md->md_list, &container->rec_active);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/* must be called with lnet_res_lock held */
			
 
				-void
			
 
				-lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd)
			
 
				-{
			
 
				-	/* NB this doesn't copy out all the iov entries so when a
			
 
				-	 * discontiguous MD is copied out, the target gets to know the
			
 
				-	 * original iov pointer (in start) and the number of entries it had
			
 
				-	 * and that's all.
			
 
				-	 */
			
 
				-	umd->start = lmd->md_start;
			
 
				-	umd->length = !(lmd->md_options &
			
 
				-		      (LNET_MD_IOVEC | LNET_MD_KIOV)) ?
			
 
				-		      lmd->md_length : lmd->md_niov;
			
 
				-	umd->threshold = lmd->md_threshold;
			
 
				-	umd->max_size = lmd->md_max_size;
			
 
				-	umd->options = lmd->md_options;
			
 
				-	umd->user_ptr = lmd->md_user_ptr;
			
 
				-	lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_md_validate(struct lnet_md *umd)
			
 
				-{
			
 
				-	if (!umd->start && umd->length) {
			
 
				-		CERROR("MD start pointer can not be NULL with length %u\n",
			
 
				-		       umd->length);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) &&
			
 
				-	    umd->length > LNET_MAX_IOV) {
			
 
				-		CERROR("Invalid option: too many fragments %u, %d max\n",
			
 
				-		       umd->length, LNET_MAX_IOV);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Create a memory descriptor and attach it to a ME
			
 
				- *
			
 
				- * \param meh A handle for a ME to associate the new MD with.
			
 
				- * \param umd Provides initial values for the user-visible parts of a MD.
			
 
				- * Other than its use for initialization, there is no linkage between this
			
 
				- * structure and the MD maintained by the LNet.
			
 
				- * \param unlink A flag to indicate whether the MD is automatically unlinked
			
 
				- * when it becomes inactive, either because the operation threshold drops to
			
 
				- * zero or because the available memory becomes less than \a umd.max_size.
			
 
				- * (Note that the check for unlinking a MD only occurs after the completion
			
 
				- * of a successful operation on the MD.) The value LNET_UNLINK enables auto
			
 
				- * unlinking; the value LNET_RETAIN disables it.
			
 
				- * \param handle On successful returns, a handle to the newly created MD is
			
 
				- * saved here. This handle can be used later in LNetMDUnlink().
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -EINVAL If \a umd is not valid.
			
 
				- * \retval -ENOMEM If new MD cannot be allocated.
			
 
				- * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
			
 
				- * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
			
 
				- * calling LNetInvalidateHandle() on it.
			
 
				- * \retval -EBUSY  If the ME pointed to by \a meh is already associated with
			
 
				- * a MD.
			
 
				- */
			
 
				-int
			
 
				-LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd,
			
 
				-	     enum lnet_unlink unlink, struct lnet_handle_md *handle)
			
 
				-{
			
 
				-	LIST_HEAD(matches);
			
 
				-	LIST_HEAD(drops);
			
 
				-	struct lnet_me *me;
			
 
				-	struct lnet_libmd *md;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (lnet_md_validate(&umd))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
			
 
				-		CERROR("Invalid option: no MD_OP set\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	md = lnet_md_alloc(&umd);
			
 
				-	if (!md)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	rc = lnet_md_build(md, &umd, unlink);
			
 
				-	if (rc)
			
 
				-		goto out_free;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(meh.cookie);
			
 
				-
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	me = lnet_handle2me(&meh);
			
 
				-	if (!me)
			
 
				-		rc = -ENOENT;
			
 
				-	else if (me->me_md)
			
 
				-		rc = -EBUSY;
			
 
				-	else
			
 
				-		rc = lnet_md_link(md, umd.eq_handle, cpt);
			
 
				-
			
 
				-	if (rc)
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	/*
			
 
				-	 * attach this MD to portal of ME and check if it matches any
			
 
				-	 * blocked msgs on this portal
			
 
				-	 */
			
 
				-	lnet_ptl_attach_md(me, md, &matches, &drops);
			
 
				-
			
 
				-	lnet_md2handle(handle, md);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	lnet_drop_delayed_msg_list(&drops, "Bad match");
			
 
				-	lnet_recv_delayed_msg_list(&matches);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-out_unlock:
			
 
				-	lnet_res_unlock(cpt);
			
 
				-out_free:
			
 
				-	kfree(md);
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetMDAttach);
			
 
				-
			
 
				-/**
			
 
				- * Create a "free floating" memory descriptor - a MD that is not associated
			
 
				- * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
			
 
				- *
			
 
				- * \param umd,unlink See the discussion for LNetMDAttach().
			
 
				- * \param handle On successful returns, a handle to the newly created MD is
			
 
				- * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
			
 
				- * and LNetGet() operations.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -EINVAL If \a umd is not valid.
			
 
				- * \retval -ENOMEM If new MD cannot be allocated.
			
 
				- * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
			
 
				- * it's OK to supply a NULL \a umd.eq_handle by calling
			
 
				- * LNetInvalidateHandle() on it.
			
 
				- */
			
 
				-int
			
 
				-LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink,
			
 
				-	   struct lnet_handle_md *handle)
			
 
				-{
			
 
				-	struct lnet_libmd *md;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (lnet_md_validate(&umd))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
			
 
				-		CERROR("Invalid option: GET|PUT illegal on active MDs\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	md = lnet_md_alloc(&umd);
			
 
				-	if (!md)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	rc = lnet_md_build(md, &umd, unlink);
			
 
				-	if (rc)
			
 
				-		goto out_free;
			
 
				-
			
 
				-	cpt = lnet_res_lock_current();
			
 
				-
			
 
				-	rc = lnet_md_link(md, umd.eq_handle, cpt);
			
 
				-	if (rc)
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	lnet_md2handle(handle, md);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-	return 0;
			
 
				-
			
 
				-out_unlock:
			
 
				-	lnet_res_unlock(cpt);
			
 
				-out_free:
			
 
				-	kfree(md);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetMDBind);
			
 
				-
			
 
				-/**
			
 
				- * Unlink the memory descriptor from any ME it may be linked to and release
			
 
				- * the internal resources associated with it. As a result, active messages
			
 
				- * associated with the MD may get aborted.
			
 
				- *
			
 
				- * This function does not free the memory region associated with the MD;
			
 
				- * i.e., the memory the user allocated for this MD. If the ME associated with
			
 
				- * this MD is not NULL and was created with auto unlink enabled, the ME is
			
 
				- * unlinked as well (see LNetMEAttach()).
			
 
				- *
			
 
				- * Explicitly unlinking a MD via this function call has the same behavior as
			
 
				- * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
			
 
				- * is generated in the latter case.
			
 
				- *
			
 
				- * An unlinked event can be reported in two ways:
			
 
				- * - If there's no pending operations on the MD, it's unlinked immediately
			
 
				- *   and an LNET_EVENT_UNLINK event is logged before this function returns.
			
 
				- * - Otherwise, the MD is only marked for deletion when this function
			
 
				- *   returns, and the unlinked event will be piggybacked on the event of
			
 
				- *   the completion of the last operation by setting the unlinked field of
			
 
				- *   the event. No dedicated LNET_EVENT_UNLINK event is generated.
			
 
				- *
			
 
				- * Note that in both cases the unlinked field of the event is always set; no
			
 
				- * more event will happen on the MD after such an event is logged.
			
 
				- *
			
 
				- * \param mdh A handle for the MD to be unlinked.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -ENOENT If \a mdh does not point to a valid MD object.
			
 
				- */
			
 
				-int
			
 
				-LNetMDUnlink(struct lnet_handle_md mdh)
			
 
				-{
			
 
				-	struct lnet_event ev;
			
 
				-	struct lnet_libmd *md;
			
 
				-	int cpt;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(mdh.cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	md = lnet_handle2md(&mdh);
			
 
				-	if (!md) {
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	md->md_flags |= LNET_MD_FLAG_ABORTED;
			
 
				-	/*
			
 
				-	 * If the MD is busy, lnet_md_unlink just marks it for deletion, and
			
 
				-	 * when the LND is done, the completion event flags that the MD was
			
 
				-	 * unlinked.  Otherwise, we enqueue an event now...
			
 
				-	 */
			
 
				-	if (md->md_eq && !md->md_refcount) {
			
 
				-		lnet_build_unlink_event(md, &ev);
			
 
				-		lnet_eq_enqueue_event(md->md_eq, &ev);
			
 
				-	}
			
 
				-
			
 
				-	lnet_md_unlink(md);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetMDUnlink);
			
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-me.c
@@ -1,274 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/lib-me.c
			
 
				- *
			
 
				- * Match Entry management routines
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-/**
			
 
				- * Create and attach a match entry to the match list of \a portal. The new
			
 
				- * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
			
 
				- * can be used to attach a MD to an empty ME.
			
 
				- *
			
 
				- * \param portal The portal table index where the ME should be attached.
			
 
				- * \param match_id Specifies the match criteria for the process ID of
			
 
				- * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
			
 
				- * used to wildcard either of the identifiers in the lnet_process_id
			
 
				- * structure.
			
 
				- * \param match_bits,ignore_bits Specify the match criteria to apply
			
 
				- * to the match bits in the incoming request. The ignore bits are used
			
 
				- * to mask out insignificant bits in the incoming match bits. The resulting
			
 
				- * bits are then compared to the ME's match bits to determine if the
			
 
				- * incoming request meets the match criteria.
			
 
				- * \param unlink Indicates whether the ME should be unlinked when the memory
			
 
				- * descriptor associated with it is unlinked (Note that the check for
			
 
				- * unlinking a ME only occurs when the memory descriptor is unlinked.).
			
 
				- * Valid values are LNET_RETAIN and LNET_UNLINK.
			
 
				- * \param pos Indicates whether the new ME should be prepended or
			
 
				- * appended to the match list. Allowed constants: LNET_INS_BEFORE,
			
 
				- * LNET_INS_AFTER.
			
 
				- * \param handle On successful returns, a handle to the newly created ME
			
 
				- * object is saved here. This handle can be used later in LNetMEInsert(),
			
 
				- * LNetMEUnlink(), or LNetMDAttach() functions.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -EINVAL If \a portal is invalid.
			
 
				- * \retval -ENOMEM If new ME object cannot be allocated.
			
 
				- */
			
 
				-int
			
 
				-LNetMEAttach(unsigned int portal,
			
 
				-	     struct lnet_process_id match_id,
			
 
				-	     __u64 match_bits, __u64 ignore_bits,
			
 
				-	     enum lnet_unlink unlink, enum lnet_ins_pos pos,
			
 
				-	     struct lnet_handle_me *handle)
			
 
				-{
			
 
				-	struct lnet_match_table *mtable;
			
 
				-	struct lnet_me *me;
			
 
				-	struct list_head *head;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if ((int)portal >= the_lnet.ln_nportals)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	mtable = lnet_mt_of_attach(portal, match_id,
			
 
				-				   match_bits, ignore_bits, pos);
			
 
				-	if (!mtable) /* can't match portal type */
			
 
				-		return -EPERM;
			
 
				-
			
 
				-	me = kzalloc(sizeof(*me), GFP_NOFS);
			
 
				-	if (!me)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	lnet_res_lock(mtable->mt_cpt);
			
 
				-
			
 
				-	me->me_portal = portal;
			
 
				-	me->me_match_id = match_id;
			
 
				-	me->me_match_bits = match_bits;
			
 
				-	me->me_ignore_bits = ignore_bits;
			
 
				-	me->me_unlink = unlink;
			
 
				-	me->me_md = NULL;
			
 
				-
			
 
				-	lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
			
 
				-			       &me->me_lh);
			
 
				-	if (ignore_bits)
			
 
				-		head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
			
 
				-	else
			
 
				-		head = lnet_mt_match_head(mtable, match_id, match_bits);
			
 
				-
			
 
				-	me->me_pos = head - &mtable->mt_mhash[0];
			
 
				-	if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
			
 
				-		list_add_tail(&me->me_list, head);
			
 
				-	else
			
 
				-		list_add(&me->me_list, head);
			
 
				-
			
 
				-	lnet_me2handle(handle, me);
			
 
				-
			
 
				-	lnet_res_unlock(mtable->mt_cpt);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetMEAttach);
			
 
				-
			
 
				-/**
			
 
				- * Create and a match entry and insert it before or after the ME pointed to by
			
 
				- * \a current_meh. The new ME is empty, i.e. not associated with a memory
			
 
				- * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
			
 
				- *
			
 
				- * This function is identical to LNetMEAttach() except for the position
			
 
				- * where the new ME is inserted.
			
 
				- *
			
 
				- * \param current_meh A handle for a ME. The new ME will be inserted
			
 
				- * immediately before or immediately after this ME.
			
 
				- * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
			
 
				- * for LNetMEAttach().
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -ENOMEM If new ME object cannot be allocated.
			
 
				- * \retval -ENOENT If \a current_meh does not point to a valid match entry.
			
 
				- */
			
 
				-int
			
 
				-LNetMEInsert(struct lnet_handle_me current_meh,
			
 
				-	     struct lnet_process_id match_id,
			
 
				-	     __u64 match_bits, __u64 ignore_bits,
			
 
				-	     enum lnet_unlink unlink, enum lnet_ins_pos pos,
			
 
				-	     struct lnet_handle_me *handle)
			
 
				-{
			
 
				-	struct lnet_me *current_me;
			
 
				-	struct lnet_me *new_me;
			
 
				-	struct lnet_portal *ptl;
			
 
				-	int cpt;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (pos == LNET_INS_LOCAL)
			
 
				-		return -EPERM;
			
 
				-
			
 
				-	new_me = kzalloc(sizeof(*new_me), GFP_NOFS);
			
 
				-	if (!new_me)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(current_meh.cookie);
			
 
				-
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	current_me = lnet_handle2me(&current_meh);
			
 
				-	if (!current_me) {
			
 
				-		kfree(new_me);
			
 
				-
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(current_me->me_portal < the_lnet.ln_nportals);
			
 
				-
			
 
				-	ptl = the_lnet.ln_portals[current_me->me_portal];
			
 
				-	if (lnet_ptl_is_unique(ptl)) {
			
 
				-		/* nosense to insertion on unique portal */
			
 
				-		kfree(new_me);
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -EPERM;
			
 
				-	}
			
 
				-
			
 
				-	new_me->me_pos = current_me->me_pos;
			
 
				-	new_me->me_portal = current_me->me_portal;
			
 
				-	new_me->me_match_id = match_id;
			
 
				-	new_me->me_match_bits = match_bits;
			
 
				-	new_me->me_ignore_bits = ignore_bits;
			
 
				-	new_me->me_unlink = unlink;
			
 
				-	new_me->me_md = NULL;
			
 
				-
			
 
				-	lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
			
 
				-
			
 
				-	if (pos == LNET_INS_AFTER)
			
 
				-		list_add(&new_me->me_list, &current_me->me_list);
			
 
				-	else
			
 
				-		list_add_tail(&new_me->me_list, &current_me->me_list);
			
 
				-
			
 
				-	lnet_me2handle(handle, new_me);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetMEInsert);
			
 
				-
			
 
				-/**
			
 
				- * Unlink a match entry from its match list.
			
 
				- *
			
 
				- * This operation also releases any resources associated with the ME. If a
			
 
				- * memory descriptor is attached to the ME, then it will be unlinked as well
			
 
				- * and an unlink event will be generated. It is an error to use the ME handle
			
 
				- * after calling LNetMEUnlink().
			
 
				- *
			
 
				- * \param meh A handle for the ME to be unlinked.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -ENOENT If \a meh does not point to a valid ME.
			
 
				- * \see LNetMDUnlink() for the discussion on delivering unlink event.
			
 
				- */
			
 
				-int
			
 
				-LNetMEUnlink(struct lnet_handle_me meh)
			
 
				-{
			
 
				-	struct lnet_me *me;
			
 
				-	struct lnet_libmd *md;
			
 
				-	struct lnet_event ev;
			
 
				-	int cpt;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(meh.cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	me = lnet_handle2me(&meh);
			
 
				-	if (!me) {
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	md = me->me_md;
			
 
				-	if (md) {
			
 
				-		md->md_flags |= LNET_MD_FLAG_ABORTED;
			
 
				-		if (md->md_eq && !md->md_refcount) {
			
 
				-			lnet_build_unlink_event(md, &ev);
			
 
				-			lnet_eq_enqueue_event(md->md_eq, &ev);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_me_unlink(me);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetMEUnlink);
			
 
				-
			
 
				-/* call with lnet_res_lock please */
			
 
				-void
			
 
				-lnet_me_unlink(struct lnet_me *me)
			
 
				-{
			
 
				-	list_del(&me->me_list);
			
 
				-
			
 
				-	if (me->me_md) {
			
 
				-		struct lnet_libmd *md = me->me_md;
			
 
				-
			
 
				-		/* detach MD from portal of this ME */
			
 
				-		lnet_ptl_detach_md(me, md);
			
 
				-		lnet_md_unlink(md);
			
 
				-	}
			
 
				-
			
 
				-	lnet_res_lh_invalidate(&me->me_lh);
			
 
				-	kfree(me);
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -1,2386 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/lib-move.c
			
 
				- *
			
 
				- * Data movement routines
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <linux/nsproxy.h>
			
 
				-#include <net/net_namespace.h>
			
 
				-
			
 
				-static int local_nid_dist_zero = 1;
			
 
				-module_param(local_nid_dist_zero, int, 0444);
			
 
				-MODULE_PARM_DESC(local_nid_dist_zero, "Reserved");
			
 
				-
			
 
				-int
			
 
				-lnet_fail_nid(lnet_nid_t nid, unsigned int threshold)
			
 
				-{
			
 
				-	struct lnet_test_peer *tp;
			
 
				-	struct lnet_test_peer *temp;
			
 
				-	struct list_head *el;
			
 
				-	struct list_head *next;
			
 
				-	struct list_head cull;
			
 
				-
			
 
				-	/* NB: use lnet_net_lock(0) to serialize operations on test peers */
			
 
				-	if (threshold) {
			
 
				-		/* Adding a new entry */
			
 
				-		tp = kzalloc(sizeof(*tp), GFP_NOFS);
			
 
				-		if (!tp)
			
 
				-			return -ENOMEM;
			
 
				-
			
 
				-		tp->tp_nid = nid;
			
 
				-		tp->tp_threshold = threshold;
			
 
				-
			
 
				-		lnet_net_lock(0);
			
 
				-		list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
			
 
				-		lnet_net_unlock(0);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* removing entries */
			
 
				-	INIT_LIST_HEAD(&cull);
			
 
				-
			
 
				-	lnet_net_lock(0);
			
 
				-
			
 
				-	list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
			
 
				-		tp = list_entry(el, struct lnet_test_peer, tp_list);
			
 
				-
			
 
				-		if (!tp->tp_threshold ||    /* needs culling anyway */
			
 
				-		    nid == LNET_NID_ANY ||       /* removing all entries */
			
 
				-		    tp->tp_nid == nid) {	  /* matched this one */
			
 
				-			list_del(&tp->tp_list);
			
 
				-			list_add(&tp->tp_list, &cull);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(0);
			
 
				-
			
 
				-	list_for_each_entry_safe(tp, temp, &cull, tp_list) {
			
 
				-		list_del(&tp->tp_list);
			
 
				-		kfree(tp);
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-fail_peer(lnet_nid_t nid, int outgoing)
			
 
				-{
			
 
				-	struct lnet_test_peer *tp;
			
 
				-	struct lnet_test_peer *temp;
			
 
				-	struct list_head *el;
			
 
				-	struct list_head *next;
			
 
				-	struct list_head cull;
			
 
				-	int fail = 0;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&cull);
			
 
				-
			
 
				-	/* NB: use lnet_net_lock(0) to serialize operations on test peers */
			
 
				-	lnet_net_lock(0);
			
 
				-
			
 
				-	list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
			
 
				-		tp = list_entry(el, struct lnet_test_peer, tp_list);
			
 
				-
			
 
				-		if (!tp->tp_threshold) {
			
 
				-			/* zombie entry */
			
 
				-			if (outgoing) {
			
 
				-				/*
			
 
				-				 * only cull zombies on outgoing tests,
			
 
				-				 * since we may be at interrupt priority on
			
 
				-				 * incoming messages.
			
 
				-				 */
			
 
				-				list_del(&tp->tp_list);
			
 
				-				list_add(&tp->tp_list, &cull);
			
 
				-			}
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
			
 
				-		    nid == tp->tp_nid) {	/* fail this peer */
			
 
				-			fail = 1;
			
 
				-
			
 
				-			if (tp->tp_threshold != LNET_MD_THRESH_INF) {
			
 
				-				tp->tp_threshold--;
			
 
				-				if (outgoing &&
			
 
				-				    !tp->tp_threshold) {
			
 
				-					/* see above */
			
 
				-					list_del(&tp->tp_list);
			
 
				-					list_add(&tp->tp_list, &cull);
			
 
				-				}
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(0);
			
 
				-
			
 
				-	list_for_each_entry_safe(tp, temp, &cull, tp_list) {
			
 
				-		list_del(&tp->tp_list);
			
 
				-
			
 
				-		kfree(tp);
			
 
				-	}
			
 
				-
			
 
				-	return fail;
			
 
				-}
			
 
				-
			
 
				-unsigned int
			
 
				-lnet_iov_nob(unsigned int niov, struct kvec *iov)
			
 
				-{
			
 
				-	unsigned int nob = 0;
			
 
				-
			
 
				-	LASSERT(!niov || iov);
			
 
				-	while (niov-- > 0)
			
 
				-		nob += (iov++)->iov_len;
			
 
				-
			
 
				-	return nob;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_iov_nob);
			
 
				-
			
 
				-void
			
 
				-lnet_copy_iov2iter(struct iov_iter *to,
			
 
				-		   unsigned int nsiov, const struct kvec *siov,
			
 
				-		   unsigned int soffset, unsigned int nob)
			
 
				-{
			
 
				-	/* NB diov, siov are READ-ONLY */
			
 
				-	const char *s;
			
 
				-	size_t left;
			
 
				-
			
 
				-	if (!nob)
			
 
				-		return;
			
 
				-
			
 
				-	/* skip complete frags before 'soffset' */
			
 
				-	LASSERT(nsiov > 0);
			
 
				-	while (soffset >= siov->iov_len) {
			
 
				-		soffset -= siov->iov_len;
			
 
				-		siov++;
			
 
				-		nsiov--;
			
 
				-		LASSERT(nsiov > 0);
			
 
				-	}
			
 
				-
			
 
				-	s = (char *)siov->iov_base + soffset;
			
 
				-	left = siov->iov_len - soffset;
			
 
				-	do {
			
 
				-		size_t n, copy = left;
			
 
				-
			
 
				-		LASSERT(nsiov > 0);
			
 
				-
			
 
				-		if (copy > nob)
			
 
				-			copy = nob;
			
 
				-		n = copy_to_iter(s, copy, to);
			
 
				-		if (n != copy)
			
 
				-			return;
			
 
				-		nob -= n;
			
 
				-
			
 
				-		siov++;
			
 
				-		s = (char *)siov->iov_base;
			
 
				-		left = siov->iov_len;
			
 
				-		nsiov--;
			
 
				-	} while (nob > 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_copy_iov2iter);
			
 
				-
			
 
				-void
			
 
				-lnet_copy_kiov2iter(struct iov_iter *to,
			
 
				-		    unsigned int nsiov, const struct bio_vec *siov,
			
 
				-		    unsigned int soffset, unsigned int nob)
			
 
				-{
			
 
				-	if (!nob)
			
 
				-		return;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	LASSERT(nsiov > 0);
			
 
				-	while (soffset >= siov->bv_len) {
			
 
				-		soffset -= siov->bv_len;
			
 
				-		siov++;
			
 
				-		nsiov--;
			
 
				-		LASSERT(nsiov > 0);
			
 
				-	}
			
 
				-
			
 
				-	do {
			
 
				-		size_t copy = siov->bv_len - soffset, n;
			
 
				-
			
 
				-		LASSERT(nsiov > 0);
			
 
				-
			
 
				-		if (copy > nob)
			
 
				-			copy = nob;
			
 
				-		n = copy_page_to_iter(siov->bv_page,
			
 
				-				      siov->bv_offset + soffset,
			
 
				-				      copy, to);
			
 
				-		if (n != copy)
			
 
				-			return;
			
 
				-		nob -= n;
			
 
				-		siov++;
			
 
				-		nsiov--;
			
 
				-		soffset = 0;
			
 
				-	} while (nob > 0);
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_copy_kiov2iter);
			
 
				-
			
 
				-int
			
 
				-lnet_extract_iov(int dst_niov, struct kvec *dst,
			
 
				-		 int src_niov, const struct kvec *src,
			
 
				-		 unsigned int offset, unsigned int len)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Initialise 'dst' to the subset of 'src' starting at 'offset',
			
 
				-	 * for exactly 'len' bytes, and return the number of entries.
			
 
				-	 * NB not destructive to 'src'
			
 
				-	 */
			
 
				-	unsigned int frag_len;
			
 
				-	unsigned int niov;
			
 
				-
			
 
				-	if (!len)			   /* no data => */
			
 
				-		return 0;		     /* no frags */
			
 
				-
			
 
				-	LASSERT(src_niov > 0);
			
 
				-	while (offset >= src->iov_len) {      /* skip initial frags */
			
 
				-		offset -= src->iov_len;
			
 
				-		src_niov--;
			
 
				-		src++;
			
 
				-		LASSERT(src_niov > 0);
			
 
				-	}
			
 
				-
			
 
				-	niov = 1;
			
 
				-	for (;;) {
			
 
				-		LASSERT(src_niov > 0);
			
 
				-		LASSERT((int)niov <= dst_niov);
			
 
				-
			
 
				-		frag_len = src->iov_len - offset;
			
 
				-		dst->iov_base = ((char *)src->iov_base) + offset;
			
 
				-
			
 
				-		if (len <= frag_len) {
			
 
				-			dst->iov_len = len;
			
 
				-			return niov;
			
 
				-		}
			
 
				-
			
 
				-		dst->iov_len = frag_len;
			
 
				-
			
 
				-		len -= frag_len;
			
 
				-		dst++;
			
 
				-		src++;
			
 
				-		niov++;
			
 
				-		src_niov--;
			
 
				-		offset = 0;
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_extract_iov);
			
 
				-
			
 
				-unsigned int
			
 
				-lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov)
			
 
				-{
			
 
				-	unsigned int nob = 0;
			
 
				-
			
 
				-	LASSERT(!niov || kiov);
			
 
				-	while (niov-- > 0)
			
 
				-		nob += (kiov++)->bv_len;
			
 
				-
			
 
				-	return nob;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_kiov_nob);
			
 
				-
			
 
				-int
			
 
				-lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
			
 
				-		  int src_niov, const struct bio_vec *src,
			
 
				-		  unsigned int offset, unsigned int len)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Initialise 'dst' to the subset of 'src' starting at 'offset',
			
 
				-	 * for exactly 'len' bytes, and return the number of entries.
			
 
				-	 * NB not destructive to 'src'
			
 
				-	 */
			
 
				-	unsigned int frag_len;
			
 
				-	unsigned int niov;
			
 
				-
			
 
				-	if (!len)			   /* no data => */
			
 
				-		return 0;		     /* no frags */
			
 
				-
			
 
				-	LASSERT(src_niov > 0);
			
 
				-	while (offset >= src->bv_len) {      /* skip initial frags */
			
 
				-		offset -= src->bv_len;
			
 
				-		src_niov--;
			
 
				-		src++;
			
 
				-		LASSERT(src_niov > 0);
			
 
				-	}
			
 
				-
			
 
				-	niov = 1;
			
 
				-	for (;;) {
			
 
				-		LASSERT(src_niov > 0);
			
 
				-		LASSERT((int)niov <= dst_niov);
			
 
				-
			
 
				-		frag_len = src->bv_len - offset;
			
 
				-		dst->bv_page = src->bv_page;
			
 
				-		dst->bv_offset = src->bv_offset + offset;
			
 
				-
			
 
				-		if (len <= frag_len) {
			
 
				-			dst->bv_len = len;
			
 
				-			LASSERT(dst->bv_offset + dst->bv_len
			
 
				-					<= PAGE_SIZE);
			
 
				-			return niov;
			
 
				-		}
			
 
				-
			
 
				-		dst->bv_len = frag_len;
			
 
				-		LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
			
 
				-
			
 
				-		len -= frag_len;
			
 
				-		dst++;
			
 
				-		src++;
			
 
				-		niov++;
			
 
				-		src_niov--;
			
 
				-		offset = 0;
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_extract_kiov);
			
 
				-
			
 
				-void
			
 
				-lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
			
 
				-	     int delayed, unsigned int offset, unsigned int mlen,
			
 
				-	     unsigned int rlen)
			
 
				-{
			
 
				-	unsigned int niov = 0;
			
 
				-	struct kvec *iov = NULL;
			
 
				-	struct bio_vec *kiov = NULL;
			
 
				-	struct iov_iter to;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(!mlen || msg);
			
 
				-
			
 
				-	if (msg) {
			
 
				-		LASSERT(msg->msg_receiving);
			
 
				-		LASSERT(!msg->msg_sending);
			
 
				-		LASSERT(rlen == msg->msg_len);
			
 
				-		LASSERT(mlen <= msg->msg_len);
			
 
				-		LASSERT(msg->msg_offset == offset);
			
 
				-		LASSERT(msg->msg_wanted == mlen);
			
 
				-
			
 
				-		msg->msg_receiving = 0;
			
 
				-
			
 
				-		if (mlen) {
			
 
				-			niov = msg->msg_niov;
			
 
				-			iov  = msg->msg_iov;
			
 
				-			kiov = msg->msg_kiov;
			
 
				-
			
 
				-			LASSERT(niov > 0);
			
 
				-			LASSERT(!iov != !kiov);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (iov) {
			
 
				-		iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
			
 
				-		iov_iter_advance(&to, offset);
			
 
				-	} else {
			
 
				-		iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
			
 
				-		iov_iter_advance(&to, offset);
			
 
				-	}
			
 
				-	rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
			
 
				-	if (rc < 0)
			
 
				-		lnet_finalize(ni, msg, rc);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_setpayloadbuffer(struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_libmd *md = msg->msg_md;
			
 
				-
			
 
				-	LASSERT(msg->msg_len > 0);
			
 
				-	LASSERT(!msg->msg_routing);
			
 
				-	LASSERT(md);
			
 
				-	LASSERT(!msg->msg_niov);
			
 
				-	LASSERT(!msg->msg_iov);
			
 
				-	LASSERT(!msg->msg_kiov);
			
 
				-
			
 
				-	msg->msg_niov = md->md_niov;
			
 
				-	if (md->md_options & LNET_MD_KIOV)
			
 
				-		msg->msg_kiov = md->md_iov.kiov;
			
 
				-	else
			
 
				-		msg->msg_iov = md->md_iov.iov;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target,
			
 
				-	       unsigned int offset, unsigned int len)
			
 
				-{
			
 
				-	msg->msg_type = type;
			
 
				-	msg->msg_target = target;
			
 
				-	msg->msg_len = len;
			
 
				-	msg->msg_offset = offset;
			
 
				-
			
 
				-	if (len)
			
 
				-		lnet_setpayloadbuffer(msg);
			
 
				-
			
 
				-	memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr));
			
 
				-	msg->msg_hdr.type	   = cpu_to_le32(type);
			
 
				-	msg->msg_hdr.dest_nid       = cpu_to_le64(target.nid);
			
 
				-	msg->msg_hdr.dest_pid       = cpu_to_le32(target.pid);
			
 
				-	/* src_nid will be set later */
			
 
				-	msg->msg_hdr.src_pid	= cpu_to_le32(the_lnet.ln_pid);
			
 
				-	msg->msg_hdr.payload_length = cpu_to_le32(len);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	void *priv = msg->msg_private;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-	LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
			
 
				-		(msg->msg_txcredit && msg->msg_peertxcredit));
			
 
				-
			
 
				-	rc = ni->ni_lnd->lnd_send(ni, priv, msg);
			
 
				-	if (rc < 0)
			
 
				-		lnet_finalize(ni, msg, rc);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(!msg->msg_sending);
			
 
				-	LASSERT(msg->msg_receiving);
			
 
				-	LASSERT(!msg->msg_rx_ready_delay);
			
 
				-	LASSERT(ni->ni_lnd->lnd_eager_recv);
			
 
				-
			
 
				-	msg->msg_rx_ready_delay = 1;
			
 
				-	rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg,
			
 
				-					&msg->msg_private);
			
 
				-	if (rc) {
			
 
				-		CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n",
			
 
				-		       libcfs_nid2str(msg->msg_rxpeer->lp_nid),
			
 
				-		       libcfs_id2str(msg->msg_target), rc);
			
 
				-		LASSERT(rc < 0); /* required by my callers */
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
			
 
				-static void
			
 
				-lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp)
			
 
				-{
			
 
				-	unsigned long last_alive = 0;
			
 
				-
			
 
				-	LASSERT(lnet_peer_aliveness_enabled(lp));
			
 
				-	LASSERT(ni->ni_lnd->lnd_query);
			
 
				-
			
 
				-	lnet_net_unlock(lp->lp_cpt);
			
 
				-	ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive);
			
 
				-	lnet_net_lock(lp->lp_cpt);
			
 
				-
			
 
				-	lp->lp_last_query = jiffies;
			
 
				-
			
 
				-	if (last_alive) /* NI has updated timestamp */
			
 
				-		lp->lp_last_alive = last_alive;
			
 
				-}
			
 
				-
			
 
				-/* NB: always called with lnet_net_lock held */
			
 
				-static inline int
			
 
				-lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
			
 
				-{
			
 
				-	int alive;
			
 
				-	unsigned long deadline;
			
 
				-
			
 
				-	LASSERT(lnet_peer_aliveness_enabled(lp));
			
 
				-
			
 
				-	/* Trust lnet_notify() if it has more recent aliveness news, but
			
 
				-	 * ignore the initial assumed death (see lnet_peers_start_down()).
			
 
				-	 */
			
 
				-	if (!lp->lp_alive && lp->lp_alive_count > 0 &&
			
 
				-	    time_after_eq(lp->lp_timestamp, lp->lp_last_alive))
			
 
				-		return 0;
			
 
				-
			
 
				-	deadline = lp->lp_last_alive + lp->lp_ni->ni_peertimeout * HZ;
			
 
				-	alive = time_after(deadline, now);
			
 
				-
			
 
				-	/* Update obsolete lp_alive except for routers assumed to be dead
			
 
				-	 * initially, because router checker would update aliveness in this
			
 
				-	 * case, and moreover lp_last_alive at peer creation is assumed.
			
 
				-	 */
			
 
				-	if (alive && !lp->lp_alive &&
			
 
				-	    !(lnet_isrouter(lp) && !lp->lp_alive_count))
			
 
				-		lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
			
 
				-
			
 
				-	return alive;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * NB: returns 1 when alive, 0 when dead, negative when error;
			
 
				- *     may drop the lnet_net_lock
			
 
				- */
			
 
				-static int
			
 
				-lnet_peer_alive_locked(struct lnet_peer *lp)
			
 
				-{
			
 
				-	unsigned long now = jiffies;
			
 
				-
			
 
				-	if (!lnet_peer_aliveness_enabled(lp))
			
 
				-		return -ENODEV;
			
 
				-
			
 
				-	if (lnet_peer_is_alive(lp, now))
			
 
				-		return 1;
			
 
				-
			
 
				-	/*
			
 
				-	 * Peer appears dead, but we should avoid frequent NI queries (at
			
 
				-	 * most once per lnet_queryinterval seconds).
			
 
				-	 */
			
 
				-	if (lp->lp_last_query) {
			
 
				-		static const int lnet_queryinterval = 1;
			
 
				-
			
 
				-		unsigned long next_query =
			
 
				-			   lp->lp_last_query + lnet_queryinterval * HZ;
			
 
				-
			
 
				-		if (time_before(now, next_query)) {
			
 
				-			if (lp->lp_alive)
			
 
				-				CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n",
			
 
				-				      libcfs_nid2str(lp->lp_nid),
			
 
				-				      (int)now, (int)next_query,
			
 
				-				      lnet_queryinterval,
			
 
				-				      lp->lp_ni->ni_peertimeout);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* query NI for latest aliveness news */
			
 
				-	lnet_ni_query_locked(lp->lp_ni, lp);
			
 
				-
			
 
				-	if (lnet_peer_is_alive(lp, now))
			
 
				-		return 1;
			
 
				-
			
 
				-	lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * \param msg The message to be sent.
			
 
				- * \param do_send True if lnet_ni_send() should be called in this function.
			
 
				- *	  lnet_send() is going to lnet_net_unlock immediately after this, so
			
 
				- *	  it sets do_send FALSE and I don't do the unlock/send/lock bit.
			
 
				- *
			
 
				- * \retval LNET_CREDIT_OK If \a msg sent or OK to send.
			
 
				- * \retval LNET_CREDIT_WAIT If \a msg blocked for credit.
			
 
				- * \retval -EHOSTUNREACH If the next hop of the message appears dead.
			
 
				- * \retval -ECANCELED If the MD of the message has been unlinked.
			
 
				- */
			
 
				-static int
			
 
				-lnet_post_send_locked(struct lnet_msg *msg, int do_send)
			
 
				-{
			
 
				-	struct lnet_peer *lp = msg->msg_txpeer;
			
 
				-	struct lnet_ni *ni = lp->lp_ni;
			
 
				-	int cpt = msg->msg_tx_cpt;
			
 
				-	struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
			
 
				-
			
 
				-	/* non-lnet_send() callers have checked before */
			
 
				-	LASSERT(!do_send || msg->msg_tx_delayed);
			
 
				-	LASSERT(!msg->msg_receiving);
			
 
				-	LASSERT(msg->msg_tx_committed);
			
 
				-
			
 
				-	/* NB 'lp' is always the next hop */
			
 
				-	if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
			
 
				-	    !lnet_peer_alive_locked(lp)) {
			
 
				-		the_lnet.ln_counters[cpt]->drop_count++;
			
 
				-		the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		CNETERR("Dropping message for %s: peer not alive\n",
			
 
				-			libcfs_id2str(msg->msg_target));
			
 
				-		if (do_send)
			
 
				-			lnet_finalize(ni, msg, -EHOSTUNREACH);
			
 
				-
			
 
				-		lnet_net_lock(cpt);
			
 
				-		return -EHOSTUNREACH;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_md &&
			
 
				-	    (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n",
			
 
				-			libcfs_id2str(msg->msg_target));
			
 
				-		if (do_send)
			
 
				-			lnet_finalize(ni, msg, -ECANCELED);
			
 
				-
			
 
				-		lnet_net_lock(cpt);
			
 
				-		return -ECANCELED;
			
 
				-	}
			
 
				-
			
 
				-	if (!msg->msg_peertxcredit) {
			
 
				-		LASSERT((lp->lp_txcredits < 0) ==
			
 
				-			!list_empty(&lp->lp_txq));
			
 
				-
			
 
				-		msg->msg_peertxcredit = 1;
			
 
				-		lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr);
			
 
				-		lp->lp_txcredits--;
			
 
				-
			
 
				-		if (lp->lp_txcredits < lp->lp_mintxcredits)
			
 
				-			lp->lp_mintxcredits = lp->lp_txcredits;
			
 
				-
			
 
				-		if (lp->lp_txcredits < 0) {
			
 
				-			msg->msg_tx_delayed = 1;
			
 
				-			list_add_tail(&msg->msg_list, &lp->lp_txq);
			
 
				-			return LNET_CREDIT_WAIT;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (!msg->msg_txcredit) {
			
 
				-		LASSERT((tq->tq_credits < 0) ==
			
 
				-			!list_empty(&tq->tq_delayed));
			
 
				-
			
 
				-		msg->msg_txcredit = 1;
			
 
				-		tq->tq_credits--;
			
 
				-
			
 
				-		if (tq->tq_credits < tq->tq_credits_min)
			
 
				-			tq->tq_credits_min = tq->tq_credits;
			
 
				-
			
 
				-		if (tq->tq_credits < 0) {
			
 
				-			msg->msg_tx_delayed = 1;
			
 
				-			list_add_tail(&msg->msg_list, &tq->tq_delayed);
			
 
				-			return LNET_CREDIT_WAIT;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (do_send) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		lnet_ni_send(ni, msg);
			
 
				-		lnet_net_lock(cpt);
			
 
				-	}
			
 
				-	return LNET_CREDIT_OK;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_rtrbufpool *
			
 
				-lnet_msg2bufpool(struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_rtrbufpool *rbp;
			
 
				-	int cpt;
			
 
				-
			
 
				-	LASSERT(msg->msg_rx_committed);
			
 
				-
			
 
				-	cpt = msg->msg_rx_cpt;
			
 
				-	rbp = &the_lnet.ln_rtrpools[cpt][0];
			
 
				-
			
 
				-	LASSERT(msg->msg_len <= LNET_MTU);
			
 
				-	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
			
 
				-		rbp++;
			
 
				-		LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
			
 
				-	}
			
 
				-
			
 
				-	return rbp;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
			
 
				-{
			
 
				-	/*
			
 
				-	 * lnet_parse is going to lnet_net_unlock immediately after this, so it
			
 
				-	 * sets do_recv FALSE and I don't do the unlock/send/lock bit.
			
 
				-	 * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if
			
 
				-	 * received or OK to receive
			
 
				-	 */
			
 
				-	struct lnet_peer *lp = msg->msg_rxpeer;
			
 
				-	struct lnet_rtrbufpool *rbp;
			
 
				-	struct lnet_rtrbuf *rb;
			
 
				-
			
 
				-	LASSERT(!msg->msg_iov);
			
 
				-	LASSERT(!msg->msg_kiov);
			
 
				-	LASSERT(!msg->msg_niov);
			
 
				-	LASSERT(msg->msg_routing);
			
 
				-	LASSERT(msg->msg_receiving);
			
 
				-	LASSERT(!msg->msg_sending);
			
 
				-
			
 
				-	/* non-lnet_parse callers only receive delayed messages */
			
 
				-	LASSERT(!do_recv || msg->msg_rx_delayed);
			
 
				-
			
 
				-	if (!msg->msg_peerrtrcredit) {
			
 
				-		LASSERT((lp->lp_rtrcredits < 0) ==
			
 
				-			!list_empty(&lp->lp_rtrq));
			
 
				-
			
 
				-		msg->msg_peerrtrcredit = 1;
			
 
				-		lp->lp_rtrcredits--;
			
 
				-		if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
			
 
				-			lp->lp_minrtrcredits = lp->lp_rtrcredits;
			
 
				-
			
 
				-		if (lp->lp_rtrcredits < 0) {
			
 
				-			/* must have checked eager_recv before here */
			
 
				-			LASSERT(msg->msg_rx_ready_delay);
			
 
				-			msg->msg_rx_delayed = 1;
			
 
				-			list_add_tail(&msg->msg_list, &lp->lp_rtrq);
			
 
				-			return LNET_CREDIT_WAIT;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	rbp = lnet_msg2bufpool(msg);
			
 
				-
			
 
				-	if (!msg->msg_rtrcredit) {
			
 
				-		msg->msg_rtrcredit = 1;
			
 
				-		rbp->rbp_credits--;
			
 
				-		if (rbp->rbp_credits < rbp->rbp_mincredits)
			
 
				-			rbp->rbp_mincredits = rbp->rbp_credits;
			
 
				-
			
 
				-		if (rbp->rbp_credits < 0) {
			
 
				-			/* must have checked eager_recv before here */
			
 
				-			LASSERT(msg->msg_rx_ready_delay);
			
 
				-			msg->msg_rx_delayed = 1;
			
 
				-			list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
			
 
				-			return LNET_CREDIT_WAIT;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!list_empty(&rbp->rbp_bufs));
			
 
				-	rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list);
			
 
				-	list_del(&rb->rb_list);
			
 
				-
			
 
				-	msg->msg_niov = rbp->rbp_npages;
			
 
				-	msg->msg_kiov = &rb->rb_kiov[0];
			
 
				-
			
 
				-	if (do_recv) {
			
 
				-		int cpt = msg->msg_rx_cpt;
			
 
				-
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
			
 
				-			     0, msg->msg_len, msg->msg_len);
			
 
				-		lnet_net_lock(cpt);
			
 
				-	}
			
 
				-	return LNET_CREDIT_OK;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_return_tx_credits_locked(struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_peer *txpeer = msg->msg_txpeer;
			
 
				-	struct lnet_msg *msg2;
			
 
				-
			
 
				-	if (msg->msg_txcredit) {
			
 
				-		struct lnet_ni *ni = txpeer->lp_ni;
			
 
				-		struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
			
 
				-
			
 
				-		/* give back NI txcredits */
			
 
				-		msg->msg_txcredit = 0;
			
 
				-
			
 
				-		LASSERT((tq->tq_credits < 0) ==
			
 
				-			!list_empty(&tq->tq_delayed));
			
 
				-
			
 
				-		tq->tq_credits++;
			
 
				-		if (tq->tq_credits <= 0) {
			
 
				-			msg2 = list_entry(tq->tq_delayed.next,
			
 
				-					  struct lnet_msg, msg_list);
			
 
				-			list_del(&msg2->msg_list);
			
 
				-
			
 
				-			LASSERT(msg2->msg_txpeer->lp_ni == ni);
			
 
				-			LASSERT(msg2->msg_tx_delayed);
			
 
				-
			
 
				-			(void)lnet_post_send_locked(msg2, 1);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_peertxcredit) {
			
 
				-		/* give back peer txcredits */
			
 
				-		msg->msg_peertxcredit = 0;
			
 
				-
			
 
				-		LASSERT((txpeer->lp_txcredits < 0) ==
			
 
				-			!list_empty(&txpeer->lp_txq));
			
 
				-
			
 
				-		txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr);
			
 
				-		LASSERT(txpeer->lp_txqnob >= 0);
			
 
				-
			
 
				-		txpeer->lp_txcredits++;
			
 
				-		if (txpeer->lp_txcredits <= 0) {
			
 
				-			msg2 = list_entry(txpeer->lp_txq.next,
			
 
				-					  struct lnet_msg, msg_list);
			
 
				-			list_del(&msg2->msg_list);
			
 
				-
			
 
				-			LASSERT(msg2->msg_txpeer == txpeer);
			
 
				-			LASSERT(msg2->msg_tx_delayed);
			
 
				-
			
 
				-			(void)lnet_post_send_locked(msg2, 1);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (txpeer) {
			
 
				-		msg->msg_txpeer = NULL;
			
 
				-		lnet_peer_decref_locked(txpeer);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp)
			
 
				-{
			
 
				-	struct lnet_msg *msg;
			
 
				-
			
 
				-	if (list_empty(&rbp->rbp_msgs))
			
 
				-		return;
			
 
				-	msg = list_entry(rbp->rbp_msgs.next,
			
 
				-			 struct lnet_msg, msg_list);
			
 
				-	list_del(&msg->msg_list);
			
 
				-
			
 
				-	(void)lnet_post_routed_recv_locked(msg, 1);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
			
 
				-{
			
 
				-	struct list_head drop;
			
 
				-	struct lnet_msg *msg;
			
 
				-	struct lnet_msg *tmp;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&drop);
			
 
				-
			
 
				-	list_splice_init(list, &drop);
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
			
 
				-		lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
			
 
				-			     0, 0, 0, msg->msg_hdr.payload_length);
			
 
				-		list_del_init(&msg->msg_list);
			
 
				-		lnet_finalize(NULL, msg, -ECANCELED);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_return_rx_credits_locked(struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_peer *rxpeer = msg->msg_rxpeer;
			
 
				-	struct lnet_msg *msg2;
			
 
				-
			
 
				-	if (msg->msg_rtrcredit) {
			
 
				-		/* give back global router credits */
			
 
				-		struct lnet_rtrbuf *rb;
			
 
				-		struct lnet_rtrbufpool *rbp;
			
 
				-
			
 
				-		/*
			
 
				-		 * NB If a msg ever blocks for a buffer in rbp_msgs, it stays
			
 
				-		 * there until it gets one allocated, or aborts the wait
			
 
				-		 * itself
			
 
				-		 */
			
 
				-		LASSERT(msg->msg_kiov);
			
 
				-
			
 
				-		rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
			
 
				-		rbp = rb->rb_pool;
			
 
				-
			
 
				-		msg->msg_kiov = NULL;
			
 
				-		msg->msg_rtrcredit = 0;
			
 
				-
			
 
				-		LASSERT(rbp == lnet_msg2bufpool(msg));
			
 
				-
			
 
				-		LASSERT((rbp->rbp_credits > 0) ==
			
 
				-			!list_empty(&rbp->rbp_bufs));
			
 
				-
			
 
				-		/*
			
 
				-		 * If routing is now turned off, we just drop this buffer and
			
 
				-		 * don't bother trying to return credits.
			
 
				-		 */
			
 
				-		if (!the_lnet.ln_routing) {
			
 
				-			lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
			
 
				-			goto routing_off;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * It is possible that a user has lowered the desired number of
			
 
				-		 * buffers in this pool.  Make sure we never put back
			
 
				-		 * more buffers than the stated number.
			
 
				-		 */
			
 
				-		if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) {
			
 
				-			/* Discard this buffer so we don't have too many. */
			
 
				-			lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
			
 
				-			rbp->rbp_nbuffers--;
			
 
				-		} else {
			
 
				-			list_add(&rb->rb_list, &rbp->rbp_bufs);
			
 
				-			rbp->rbp_credits++;
			
 
				-			if (rbp->rbp_credits <= 0)
			
 
				-				lnet_schedule_blocked_locked(rbp);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-routing_off:
			
 
				-	if (msg->msg_peerrtrcredit) {
			
 
				-		/* give back peer router credits */
			
 
				-		msg->msg_peerrtrcredit = 0;
			
 
				-
			
 
				-		LASSERT((rxpeer->lp_rtrcredits < 0) ==
			
 
				-			!list_empty(&rxpeer->lp_rtrq));
			
 
				-
			
 
				-		rxpeer->lp_rtrcredits++;
			
 
				-		/*
			
 
				-		 * drop all messages which are queued to be routed on that
			
 
				-		 * peer.
			
 
				-		 */
			
 
				-		if (!the_lnet.ln_routing) {
			
 
				-			lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq,
			
 
				-						     msg->msg_rx_cpt);
			
 
				-		} else if (rxpeer->lp_rtrcredits <= 0) {
			
 
				-			msg2 = list_entry(rxpeer->lp_rtrq.next,
			
 
				-					  struct lnet_msg, msg_list);
			
 
				-			list_del(&msg2->msg_list);
			
 
				-
			
 
				-			(void)lnet_post_routed_recv_locked(msg2, 1);
			
 
				-		}
			
 
				-	}
			
 
				-	if (rxpeer) {
			
 
				-		msg->msg_rxpeer = NULL;
			
 
				-		lnet_peer_decref_locked(rxpeer);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
			
 
				-{
			
 
				-	struct lnet_peer *p1 = r1->lr_gateway;
			
 
				-	struct lnet_peer *p2 = r2->lr_gateway;
			
 
				-	int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops;
			
 
				-	int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops;
			
 
				-
			
 
				-	if (r1->lr_priority < r2->lr_priority)
			
 
				-		return 1;
			
 
				-
			
 
				-	if (r1->lr_priority > r2->lr_priority)
			
 
				-		return -ERANGE;
			
 
				-
			
 
				-	if (r1_hops < r2_hops)
			
 
				-		return 1;
			
 
				-
			
 
				-	if (r1_hops > r2_hops)
			
 
				-		return -ERANGE;
			
 
				-
			
 
				-	if (p1->lp_txqnob < p2->lp_txqnob)
			
 
				-		return 1;
			
 
				-
			
 
				-	if (p1->lp_txqnob > p2->lp_txqnob)
			
 
				-		return -ERANGE;
			
 
				-
			
 
				-	if (p1->lp_txcredits > p2->lp_txcredits)
			
 
				-		return 1;
			
 
				-
			
 
				-	if (p1->lp_txcredits < p2->lp_txcredits)
			
 
				-		return -ERANGE;
			
 
				-
			
 
				-	if (r1->lr_seq - r2->lr_seq <= 0)
			
 
				-		return 1;
			
 
				-
			
 
				-	return -ERANGE;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_peer *
			
 
				-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
			
 
				-		       lnet_nid_t rtr_nid)
			
 
				-{
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	struct lnet_route *route;
			
 
				-	struct lnet_route *best_route;
			
 
				-	struct lnet_route *last_route;
			
 
				-	struct lnet_peer *lp_best;
			
 
				-	struct lnet_peer *lp;
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * If @rtr_nid is not LNET_NID_ANY, return the gateway with
			
 
				-	 * rtr_nid nid, otherwise find the best gateway I can use
			
 
				-	 */
			
 
				-	rnet = lnet_find_net_locked(LNET_NIDNET(target));
			
 
				-	if (!rnet)
			
 
				-		return NULL;
			
 
				-
			
 
				-	lp_best = NULL;
			
 
				-	best_route = NULL;
			
 
				-	last_route = NULL;
			
 
				-	list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
			
 
				-		lp = route->lr_gateway;
			
 
				-
			
 
				-		if (!lnet_is_route_alive(route))
			
 
				-			continue;
			
 
				-
			
 
				-		if (ni && lp->lp_ni != ni)
			
 
				-			continue;
			
 
				-
			
 
				-		if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
			
 
				-			return lp;
			
 
				-
			
 
				-		if (!lp_best) {
			
 
				-			best_route = route;
			
 
				-			last_route = route;
			
 
				-			lp_best = lp;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* no protection on below fields, but it's harmless */
			
 
				-		if (last_route->lr_seq - route->lr_seq < 0)
			
 
				-			last_route = route;
			
 
				-
			
 
				-		rc = lnet_compare_routes(route, best_route);
			
 
				-		if (rc < 0)
			
 
				-			continue;
			
 
				-
			
 
				-		best_route = route;
			
 
				-		lp_best = lp;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * set sequence number on the best router to the latest sequence + 1
			
 
				-	 * so we can round-robin all routers, it's race and inaccurate but
			
 
				-	 * harmless and functional
			
 
				-	 */
			
 
				-	if (best_route)
			
 
				-		best_route->lr_seq = last_route->lr_seq + 1;
			
 
				-	return lp_best;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
			
 
				-{
			
 
				-	lnet_nid_t dst_nid = msg->msg_target.nid;
			
 
				-	struct lnet_ni *src_ni;
			
 
				-	struct lnet_ni *local_ni;
			
 
				-	struct lnet_peer *lp;
			
 
				-	int cpt;
			
 
				-	int cpt2;
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
			
 
				-	 * but we might want to use pre-determined router for ACK/REPLY
			
 
				-	 * in the future
			
 
				-	 */
			
 
				-	/* NB: ni == interface pre-determined (ACK/REPLY) */
			
 
				-	LASSERT(!msg->msg_txpeer);
			
 
				-	LASSERT(!msg->msg_sending);
			
 
				-	LASSERT(!msg->msg_target_is_router);
			
 
				-	LASSERT(!msg->msg_receiving);
			
 
				-
			
 
				-	msg->msg_sending = 1;
			
 
				-
			
 
				-	LASSERT(!msg->msg_tx_committed);
			
 
				-	cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
			
 
				- again:
			
 
				-	lnet_net_lock(cpt);
			
 
				-
			
 
				-	if (the_lnet.ln_shutdown) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	if (src_nid == LNET_NID_ANY) {
			
 
				-		src_ni = NULL;
			
 
				-	} else {
			
 
				-		src_ni = lnet_nid2ni_locked(src_nid, cpt);
			
 
				-		if (!src_ni) {
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n",
			
 
				-				      libcfs_nid2str(dst_nid),
			
 
				-				      libcfs_nid2str(src_nid));
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				-		LASSERT(!msg->msg_routing);
			
 
				-	}
			
 
				-
			
 
				-	/* Is this for someone on a local network? */
			
 
				-	local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
			
 
				-
			
 
				-	if (local_ni) {
			
 
				-		if (!src_ni) {
			
 
				-			src_ni = local_ni;
			
 
				-			src_nid = src_ni->ni_nid;
			
 
				-		} else if (src_ni == local_ni) {
			
 
				-			lnet_ni_decref_locked(local_ni, cpt);
			
 
				-		} else {
			
 
				-			lnet_ni_decref_locked(local_ni, cpt);
			
 
				-			lnet_ni_decref_locked(src_ni, cpt);
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			LCONSOLE_WARN("No route to %s via from %s\n",
			
 
				-				      libcfs_nid2str(dst_nid),
			
 
				-				      libcfs_nid2str(src_nid));
			
 
				-			return -EINVAL;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(src_nid != LNET_NID_ANY);
			
 
				-		lnet_msg_commit(msg, cpt);
			
 
				-
			
 
				-		if (!msg->msg_routing)
			
 
				-			msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
			
 
				-
			
 
				-		if (src_ni == the_lnet.ln_loni) {
			
 
				-			/* No send credit hassles with LOLND */
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			lnet_ni_send(src_ni, msg);
			
 
				-
			
 
				-			lnet_net_lock(cpt);
			
 
				-			lnet_ni_decref_locked(src_ni, cpt);
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
			
 
				-		/* lp has ref on src_ni; lose mine */
			
 
				-		lnet_ni_decref_locked(src_ni, cpt);
			
 
				-		if (rc) {
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			LCONSOLE_WARN("Error %d finding peer %s\n", rc,
			
 
				-				      libcfs_nid2str(dst_nid));
			
 
				-			/* ENOMEM or shutting down */
			
 
				-			return rc;
			
 
				-		}
			
 
				-		LASSERT(lp->lp_ni == src_ni);
			
 
				-	} else {
			
 
				-		/* sending to a remote network */
			
 
				-		lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
			
 
				-		if (!lp) {
			
 
				-			if (src_ni)
			
 
				-				lnet_ni_decref_locked(src_ni, cpt);
			
 
				-			lnet_net_unlock(cpt);
			
 
				-
			
 
				-			LCONSOLE_WARN("No route to %s via %s (all routers down)\n",
			
 
				-				      libcfs_id2str(msg->msg_target),
			
 
				-				      libcfs_nid2str(src_nid));
			
 
				-			return -EHOSTUNREACH;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * rtr_nid is LNET_NID_ANY or NID of pre-determined router,
			
 
				-		 * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
			
 
				-		 * pre-determined router, this can happen if router table
			
 
				-		 * was changed when we release the lock
			
 
				-		 */
			
 
				-		if (rtr_nid != lp->lp_nid) {
			
 
				-			cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
			
 
				-			if (cpt2 != cpt) {
			
 
				-				if (src_ni)
			
 
				-					lnet_ni_decref_locked(src_ni, cpt);
			
 
				-				lnet_net_unlock(cpt);
			
 
				-
			
 
				-				rtr_nid = lp->lp_nid;
			
 
				-				cpt = cpt2;
			
 
				-				goto again;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
			
 
				-		       libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
			
 
				-		       lnet_msgtyp2str(msg->msg_type), msg->msg_len);
			
 
				-
			
 
				-		if (!src_ni) {
			
 
				-			src_ni = lp->lp_ni;
			
 
				-			src_nid = src_ni->ni_nid;
			
 
				-		} else {
			
 
				-			LASSERT(src_ni == lp->lp_ni);
			
 
				-			lnet_ni_decref_locked(src_ni, cpt);
			
 
				-		}
			
 
				-
			
 
				-		lnet_peer_addref_locked(lp);
			
 
				-
			
 
				-		LASSERT(src_nid != LNET_NID_ANY);
			
 
				-		lnet_msg_commit(msg, cpt);
			
 
				-
			
 
				-		if (!msg->msg_routing) {
			
 
				-			/* I'm the source and now I know which NI to send on */
			
 
				-			msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
			
 
				-		}
			
 
				-
			
 
				-		msg->msg_target_is_router = 1;
			
 
				-		msg->msg_target.nid = lp->lp_nid;
			
 
				-		msg->msg_target.pid = LNET_PID_LUSTRE;
			
 
				-	}
			
 
				-
			
 
				-	/* 'lp' is our best choice of peer */
			
 
				-
			
 
				-	LASSERT(!msg->msg_peertxcredit);
			
 
				-	LASSERT(!msg->msg_txcredit);
			
 
				-	LASSERT(!msg->msg_txpeer);
			
 
				-
			
 
				-	msg->msg_txpeer = lp;		   /* msg takes my ref on lp */
			
 
				-
			
 
				-	rc = lnet_post_send_locked(msg, 0);
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	if (rc < 0)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (rc == LNET_CREDIT_OK)
			
 
				-		lnet_ni_send(src_ni, msg);
			
 
				-
			
 
				-	return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob)
			
 
				-{
			
 
				-	lnet_net_lock(cpt);
			
 
				-	the_lnet.ln_counters[cpt]->drop_count++;
			
 
				-	the_lnet.ln_counters[cpt]->drop_length += nob;
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_hdr *hdr = &msg->msg_hdr;
			
 
				-
			
 
				-	if (msg->msg_wanted)
			
 
				-		lnet_setpayloadbuffer(msg);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_PUT);
			
 
				-
			
 
				-	/*
			
 
				-	 * Must I ACK?  If so I'll grab the ack_wmd out of the header and put
			
 
				-	 * it back into the ACK during lnet_finalize()
			
 
				-	 */
			
 
				-	msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
			
 
				-		       !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE);
			
 
				-
			
 
				-	lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
			
 
				-		     msg->msg_offset, msg->msg_wanted, hdr->payload_length);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_hdr *hdr = &msg->msg_hdr;
			
 
				-	struct lnet_match_info info;
			
 
				-	bool ready_delay;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* Convert put fields to host byte order */
			
 
				-	le64_to_cpus(&hdr->msg.put.match_bits);
			
 
				-	le32_to_cpus(&hdr->msg.put.ptl_index);
			
 
				-	le32_to_cpus(&hdr->msg.put.offset);
			
 
				-
			
 
				-	info.mi_id.nid	= hdr->src_nid;
			
 
				-	info.mi_id.pid	= hdr->src_pid;
			
 
				-	info.mi_opc	= LNET_MD_OP_PUT;
			
 
				-	info.mi_portal	= hdr->msg.put.ptl_index;
			
 
				-	info.mi_rlength	= hdr->payload_length;
			
 
				-	info.mi_roffset	= hdr->msg.put.offset;
			
 
				-	info.mi_mbits	= hdr->msg.put.match_bits;
			
 
				-
			
 
				-	msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv;
			
 
				-	ready_delay = msg->msg_rx_ready_delay;
			
 
				-
			
 
				- again:
			
 
				-	rc = lnet_ptl_match_md(&info, msg);
			
 
				-	switch (rc) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case LNET_MATCHMD_OK:
			
 
				-		lnet_recv_put(ni, msg);
			
 
				-		return 0;
			
 
				-
			
 
				-	case LNET_MATCHMD_NONE:
			
 
				-		/**
			
 
				-		 * no eager_recv or has already called it, should
			
 
				-		 * have been attached on delayed list
			
 
				-		 */
			
 
				-		if (ready_delay)
			
 
				-			return 0;
			
 
				-
			
 
				-		rc = lnet_ni_eager_recv(ni, msg);
			
 
				-		if (!rc) {
			
 
				-			ready_delay = true;
			
 
				-			goto again;
			
 
				-		}
			
 
				-		/* fall through */
			
 
				-
			
 
				-	case LNET_MATCHMD_DROP:
			
 
				-		CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n",
			
 
				-			libcfs_id2str(info.mi_id), info.mi_portal,
			
 
				-			info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
			
 
				-
			
 
				-		return -ENOENT;	/* -ve: OK but no match */
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get)
			
 
				-{
			
 
				-	struct lnet_match_info info;
			
 
				-	struct lnet_hdr *hdr = &msg->msg_hdr;
			
 
				-	struct lnet_handle_wire reply_wmd;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* Convert get fields to host byte order */
			
 
				-	le64_to_cpus(&hdr->msg.get.match_bits);
			
 
				-	le32_to_cpus(&hdr->msg.get.ptl_index);
			
 
				-	le32_to_cpus(&hdr->msg.get.sink_length);
			
 
				-	le32_to_cpus(&hdr->msg.get.src_offset);
			
 
				-
			
 
				-	info.mi_id.nid  = hdr->src_nid;
			
 
				-	info.mi_id.pid  = hdr->src_pid;
			
 
				-	info.mi_opc     = LNET_MD_OP_GET;
			
 
				-	info.mi_portal  = hdr->msg.get.ptl_index;
			
 
				-	info.mi_rlength = hdr->msg.get.sink_length;
			
 
				-	info.mi_roffset = hdr->msg.get.src_offset;
			
 
				-	info.mi_mbits   = hdr->msg.get.match_bits;
			
 
				-
			
 
				-	rc = lnet_ptl_match_md(&info, msg);
			
 
				-	if (rc == LNET_MATCHMD_DROP) {
			
 
				-		CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n",
			
 
				-			libcfs_id2str(info.mi_id), info.mi_portal,
			
 
				-			info.mi_mbits, info.mi_roffset, info.mi_rlength);
			
 
				-		return -ENOENT;	/* -ve: OK but no match */
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(rc == LNET_MATCHMD_OK);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_GET);
			
 
				-
			
 
				-	reply_wmd = hdr->msg.get.return_wmd;
			
 
				-
			
 
				-	lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
			
 
				-		       msg->msg_offset, msg->msg_wanted);
			
 
				-
			
 
				-	msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
			
 
				-
			
 
				-	if (rdma_get) {
			
 
				-		/* The LND completes the REPLY from her recv procedure */
			
 
				-		lnet_ni_recv(ni, msg->msg_private, msg, 0,
			
 
				-			     msg->msg_offset, msg->msg_len, msg->msg_len);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
			
 
				-	msg->msg_receiving = 0;
			
 
				-
			
 
				-	rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
			
 
				-	if (rc < 0) {
			
 
				-		/* didn't get as far as lnet_ni_send() */
			
 
				-		CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
			
 
				-		       libcfs_nid2str(ni->ni_nid),
			
 
				-		       libcfs_id2str(info.mi_id), rc);
			
 
				-
			
 
				-		lnet_finalize(ni, msg, rc);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	void *private = msg->msg_private;
			
 
				-	struct lnet_hdr *hdr = &msg->msg_hdr;
			
 
				-	struct lnet_process_id src = {0};
			
 
				-	struct lnet_libmd *md;
			
 
				-	int rlength;
			
 
				-	int mlength;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	src.nid = hdr->src_nid;
			
 
				-	src.pid = hdr->src_pid;
			
 
				-
			
 
				-	/* NB handles only looked up by creator (no flips) */
			
 
				-	md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
			
 
				-	if (!md || !md->md_threshold || md->md_me) {
			
 
				-		CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n",
			
 
				-			libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				-			!md ? "invalid" : "inactive",
			
 
				-			hdr->msg.reply.dst_wmd.wh_interface_cookie,
			
 
				-			hdr->msg.reply.dst_wmd.wh_object_cookie);
			
 
				-		if (md && md->md_me)
			
 
				-			CERROR("REPLY MD also attached to portal %d\n",
			
 
				-			       md->md_me->me_portal);
			
 
				-
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -ENOENT;	/* -ve: OK but no match */
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!md->md_offset);
			
 
				-
			
 
				-	rlength = hdr->payload_length;
			
 
				-	mlength = min_t(uint, rlength, md->md_length);
			
 
				-
			
 
				-	if (mlength < rlength &&
			
 
				-	    !(md->md_options & LNET_MD_TRUNCATE)) {
			
 
				-		CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n",
			
 
				-			libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				-			rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
			
 
				-			mlength);
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -ENOENT;	/* -ve: OK but no match */
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n",
			
 
				-	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				-	       mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
			
 
				-
			
 
				-	lnet_msg_attach_md(msg, md, 0, mlength);
			
 
				-
			
 
				-	if (mlength)
			
 
				-		lnet_setpayloadbuffer(msg);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_REPLY);
			
 
				-
			
 
				-	lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_hdr *hdr = &msg->msg_hdr;
			
 
				-	struct lnet_process_id src = {0};
			
 
				-	struct lnet_libmd *md;
			
 
				-	int cpt;
			
 
				-
			
 
				-	src.nid = hdr->src_nid;
			
 
				-	src.pid = hdr->src_pid;
			
 
				-
			
 
				-	/* Convert ack fields to host byte order */
			
 
				-	le64_to_cpus(&hdr->msg.ack.match_bits);
			
 
				-	le32_to_cpus(&hdr->msg.ack.mlength);
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	/* NB handles only looked up by creator (no flips) */
			
 
				-	md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
			
 
				-	if (!md || !md->md_threshold || md->md_me) {
			
 
				-		/* Don't moan; this is expected */
			
 
				-		CDEBUG(D_NET,
			
 
				-		       "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n",
			
 
				-		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				-		       !md ? "invalid" : "inactive",
			
 
				-		       hdr->msg.ack.dst_wmd.wh_interface_cookie,
			
 
				-		       hdr->msg.ack.dst_wmd.wh_object_cookie);
			
 
				-		if (md && md->md_me)
			
 
				-			CERROR("Source MD also attached to portal %d\n",
			
 
				-			       md->md_me->me_portal);
			
 
				-
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		return -ENOENT;	/* -ve! */
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n",
			
 
				-	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				-	       hdr->msg.ack.dst_wmd.wh_object_cookie);
			
 
				-
			
 
				-	lnet_msg_attach_md(msg, md, 0, 0);
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_ACK);
			
 
				-
			
 
				-	lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * \retval LNET_CREDIT_OK	If \a msg is forwarded
			
 
				- * \retval LNET_CREDIT_WAIT	If \a msg is blocked because w/o buffer
			
 
				- * \retval -ve			error code
			
 
				- */
			
 
				-int
			
 
				-lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (!the_lnet.ln_routing)
			
 
				-		return -ECANCELED;
			
 
				-
			
 
				-	if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
			
 
				-	    lnet_msg2bufpool(msg)->rbp_credits <= 0) {
			
 
				-		if (!ni->ni_lnd->lnd_eager_recv) {
			
 
				-			msg->msg_rx_ready_delay = 1;
			
 
				-		} else {
			
 
				-			lnet_net_unlock(msg->msg_rx_cpt);
			
 
				-			rc = lnet_ni_eager_recv(ni, msg);
			
 
				-			lnet_net_lock(msg->msg_rx_cpt);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (!rc)
			
 
				-		rc = lnet_post_routed_recv_locked(msg, 0);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	switch (msg->msg_type) {
			
 
				-	case LNET_MSG_ACK:
			
 
				-		rc = lnet_parse_ack(ni, msg);
			
 
				-		break;
			
 
				-	case LNET_MSG_PUT:
			
 
				-		rc = lnet_parse_put(ni, msg);
			
 
				-		break;
			
 
				-	case LNET_MSG_GET:
			
 
				-		rc = lnet_parse_get(ni, msg, msg->msg_rdma_get);
			
 
				-		break;
			
 
				-	case LNET_MSG_REPLY:
			
 
				-		rc = lnet_parse_reply(ni, msg);
			
 
				-		break;
			
 
				-	default: /* prevent an unused label if !kernel */
			
 
				-		LASSERT(0);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!rc || rc == -ENOENT);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-char *
			
 
				-lnet_msgtyp2str(int type)
			
 
				-{
			
 
				-	switch (type) {
			
 
				-	case LNET_MSG_ACK:
			
 
				-		return "ACK";
			
 
				-	case LNET_MSG_PUT:
			
 
				-		return "PUT";
			
 
				-	case LNET_MSG_GET:
			
 
				-		return "GET";
			
 
				-	case LNET_MSG_REPLY:
			
 
				-		return "REPLY";
			
 
				-	case LNET_MSG_HELLO:
			
 
				-		return "HELLO";
			
 
				-	default:
			
 
				-		return "<UNKNOWN>";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_print_hdr(struct lnet_hdr *hdr)
			
 
				-{
			
 
				-	struct lnet_process_id src = {0};
			
 
				-	struct lnet_process_id dst = {0};
			
 
				-	char *type_str = lnet_msgtyp2str(hdr->type);
			
 
				-
			
 
				-	src.nid = hdr->src_nid;
			
 
				-	src.pid = hdr->src_pid;
			
 
				-
			
 
				-	dst.nid = hdr->dest_nid;
			
 
				-	dst.pid = hdr->dest_pid;
			
 
				-
			
 
				-	CWARN("P3 Header at %p of type %s\n", hdr, type_str);
			
 
				-	CWARN("    From %s\n", libcfs_id2str(src));
			
 
				-	CWARN("    To   %s\n", libcfs_id2str(dst));
			
 
				-
			
 
				-	switch (hdr->type) {
			
 
				-	default:
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_MSG_PUT:
			
 
				-		CWARN("    Ptl index %d, ack md %#llx.%#llx, match bits %llu\n",
			
 
				-		      hdr->msg.put.ptl_index,
			
 
				-		      hdr->msg.put.ack_wmd.wh_interface_cookie,
			
 
				-		      hdr->msg.put.ack_wmd.wh_object_cookie,
			
 
				-		      hdr->msg.put.match_bits);
			
 
				-		CWARN("    Length %d, offset %d, hdr data %#llx\n",
			
 
				-		      hdr->payload_length, hdr->msg.put.offset,
			
 
				-		      hdr->msg.put.hdr_data);
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_MSG_GET:
			
 
				-		CWARN("    Ptl index %d, return md %#llx.%#llx, match bits %llu\n",
			
 
				-		      hdr->msg.get.ptl_index,
			
 
				-		      hdr->msg.get.return_wmd.wh_interface_cookie,
			
 
				-		      hdr->msg.get.return_wmd.wh_object_cookie,
			
 
				-		      hdr->msg.get.match_bits);
			
 
				-		CWARN("    Length %d, src offset %d\n",
			
 
				-		      hdr->msg.get.sink_length,
			
 
				-		      hdr->msg.get.src_offset);
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_MSG_ACK:
			
 
				-		CWARN("    dst md %#llx.%#llx, manipulated length %d\n",
			
 
				-		      hdr->msg.ack.dst_wmd.wh_interface_cookie,
			
 
				-		      hdr->msg.ack.dst_wmd.wh_object_cookie,
			
 
				-		      hdr->msg.ack.mlength);
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_MSG_REPLY:
			
 
				-		CWARN("    dst md %#llx.%#llx, length %d\n",
			
 
				-		      hdr->msg.reply.dst_wmd.wh_interface_cookie,
			
 
				-		      hdr->msg.reply.dst_wmd.wh_object_cookie,
			
 
				-		      hdr->payload_length);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
			
 
				-	   void *private, int rdma_req)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	int cpt;
			
 
				-	int for_me;
			
 
				-	struct lnet_msg *msg;
			
 
				-	lnet_pid_t dest_pid;
			
 
				-	lnet_nid_t dest_nid;
			
 
				-	lnet_nid_t src_nid;
			
 
				-	__u32 payload_length;
			
 
				-	__u32 type;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	type = le32_to_cpu(hdr->type);
			
 
				-	src_nid = le64_to_cpu(hdr->src_nid);
			
 
				-	dest_nid = le64_to_cpu(hdr->dest_nid);
			
 
				-	dest_pid = le32_to_cpu(hdr->dest_pid);
			
 
				-	payload_length = le32_to_cpu(hdr->payload_length);
			
 
				-
			
 
				-	for_me = (ni->ni_nid == dest_nid);
			
 
				-	cpt = lnet_cpt_of_nid(from_nid);
			
 
				-
			
 
				-	switch (type) {
			
 
				-	case LNET_MSG_ACK:
			
 
				-	case LNET_MSG_GET:
			
 
				-		if (payload_length > 0) {
			
 
				-			CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
			
 
				-			       libcfs_nid2str(from_nid),
			
 
				-			       libcfs_nid2str(src_nid),
			
 
				-			       lnet_msgtyp2str(type), payload_length);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_MSG_PUT:
			
 
				-	case LNET_MSG_REPLY:
			
 
				-		if (payload_length >
			
 
				-		   (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
			
 
				-			CERROR("%s, src %s: bad %s payload %d (%d max expected)\n",
			
 
				-			       libcfs_nid2str(from_nid),
			
 
				-			       libcfs_nid2str(src_nid),
			
 
				-			       lnet_msgtyp2str(type),
			
 
				-			       payload_length,
			
 
				-			       for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		CERROR("%s, src %s: Bad message type 0x%x\n",
			
 
				-		       libcfs_nid2str(from_nid),
			
 
				-		       libcfs_nid2str(src_nid), type);
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (the_lnet.ln_routing &&
			
 
				-	    ni->ni_last_alive != ktime_get_real_seconds()) {
			
 
				-		/* NB: so far here is the only place to set NI status to "up */
			
 
				-		lnet_ni_lock(ni);
			
 
				-		ni->ni_last_alive = ktime_get_real_seconds();
			
 
				-		if (ni->ni_status &&
			
 
				-		    ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
			
 
				-			ni->ni_status->ns_status = LNET_NI_STATUS_UP;
			
 
				-		lnet_ni_unlock(ni);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Regard a bad destination NID as a protocol error.  Senders should
			
 
				-	 * know what they're doing; if they don't they're misconfigured, buggy
			
 
				-	 * or malicious so we chop them off at the knees :)
			
 
				-	 */
			
 
				-	if (!for_me) {
			
 
				-		if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
			
 
				-			/* should have gone direct */
			
 
				-			CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n",
			
 
				-			       libcfs_nid2str(from_nid),
			
 
				-			       libcfs_nid2str(src_nid),
			
 
				-			       libcfs_nid2str(dest_nid));
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		if (lnet_islocalnid(dest_nid)) {
			
 
				-			/*
			
 
				-			 * dest is another local NI; sender should have used
			
 
				-			 * this node's NID on its own network
			
 
				-			 */
			
 
				-			CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n",
			
 
				-			       libcfs_nid2str(from_nid),
			
 
				-			       libcfs_nid2str(src_nid),
			
 
				-			       libcfs_nid2str(dest_nid));
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		if (rdma_req && type == LNET_MSG_GET) {
			
 
				-			CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n",
			
 
				-			       libcfs_nid2str(from_nid),
			
 
				-			       libcfs_nid2str(src_nid),
			
 
				-			       libcfs_nid2str(dest_nid));
			
 
				-			return -EPROTO;
			
 
				-		}
			
 
				-
			
 
				-		if (!the_lnet.ln_routing) {
			
 
				-			CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n",
			
 
				-			       libcfs_nid2str(from_nid),
			
 
				-			       libcfs_nid2str(src_nid),
			
 
				-			       libcfs_nid2str(dest_nid));
			
 
				-			goto drop;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Message looks OK; we're not going to return an error, so we MUST
			
 
				-	 * call back lnd_recv() come what may...
			
 
				-	 */
			
 
				-	if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
			
 
				-	    fail_peer(src_nid, 0)) {	     /* shall we now? */
			
 
				-		CERROR("%s, src %s: Dropping %s to simulate failure\n",
			
 
				-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				-		       lnet_msgtyp2str(type));
			
 
				-		goto drop;
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&the_lnet.ln_drop_rules) &&
			
 
				-	    lnet_drop_rule_match(hdr)) {
			
 
				-		CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n",
			
 
				-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				-		       libcfs_nid2str(dest_nid), lnet_msgtyp2str(type));
			
 
				-		goto drop;
			
 
				-	}
			
 
				-
			
 
				-	msg = kzalloc(sizeof(*msg), GFP_NOFS);
			
 
				-	if (!msg) {
			
 
				-		CERROR("%s, src %s: Dropping %s (out of memory)\n",
			
 
				-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				-		       lnet_msgtyp2str(type));
			
 
				-		goto drop;
			
 
				-	}
			
 
				-
			
 
				-	/* msg zeroed by kzalloc()
			
 
				-	 * i.e. flags all clear, pointers NULL etc
			
 
				-	 */
			
 
				-	msg->msg_type = type;
			
 
				-	msg->msg_private = private;
			
 
				-	msg->msg_receiving = 1;
			
 
				-	msg->msg_rdma_get = rdma_req;
			
 
				-	msg->msg_wanted = payload_length;
			
 
				-	msg->msg_len = payload_length;
			
 
				-	msg->msg_offset = 0;
			
 
				-	msg->msg_hdr = *hdr;
			
 
				-	/* for building message event */
			
 
				-	msg->msg_from = from_nid;
			
 
				-	if (!for_me) {
			
 
				-		msg->msg_target.pid	= dest_pid;
			
 
				-		msg->msg_target.nid	= dest_nid;
			
 
				-		msg->msg_routing	= 1;
			
 
				-
			
 
				-	} else {
			
 
				-		/* convert common msg->hdr fields to host byteorder */
			
 
				-		msg->msg_hdr.type	= type;
			
 
				-		msg->msg_hdr.src_nid	= src_nid;
			
 
				-		le32_to_cpus(&msg->msg_hdr.src_pid);
			
 
				-		msg->msg_hdr.dest_nid	= dest_nid;
			
 
				-		msg->msg_hdr.dest_pid	= dest_pid;
			
 
				-		msg->msg_hdr.payload_length = payload_length;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-	rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
			
 
				-	if (rc) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n",
			
 
				-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				-		       lnet_msgtyp2str(type), rc);
			
 
				-		kfree(msg);
			
 
				-		if (rc == -ESHUTDOWN)
			
 
				-			/* We are shutting down. Don't do anything more */
			
 
				-			return 0;
			
 
				-		goto drop;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_isrouter(msg->msg_rxpeer)) {
			
 
				-		lnet_peer_set_alive(msg->msg_rxpeer);
			
 
				-		if (avoid_asym_router_failure &&
			
 
				-		    LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
			
 
				-			/* received a remote message from router, update
			
 
				-			 * remote NI status on this router.
			
 
				-			 * NB: multi-hop routed message will be ignored.
			
 
				-			 */
			
 
				-			lnet_router_ni_update_locked(msg->msg_rxpeer,
			
 
				-						     LNET_NIDNET(src_nid));
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_msg_commit(msg, cpt);
			
 
				-
			
 
				-	/* message delay simulation */
			
 
				-	if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&
			
 
				-		     lnet_delay_rule_match_locked(hdr, msg))) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!for_me) {
			
 
				-		rc = lnet_parse_forward_locked(ni, msg);
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		if (rc < 0)
			
 
				-			goto free_drop;
			
 
				-
			
 
				-		if (rc == LNET_CREDIT_OK) {
			
 
				-			lnet_ni_recv(ni, msg->msg_private, msg, 0,
			
 
				-				     0, payload_length, payload_length);
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	rc = lnet_parse_local(ni, msg);
			
 
				-	if (rc)
			
 
				-		goto free_drop;
			
 
				-	return 0;
			
 
				-
			
 
				- free_drop:
			
 
				-	LASSERT(!msg->msg_md);
			
 
				-	lnet_finalize(ni, msg, rc);
			
 
				-
			
 
				- drop:
			
 
				-	lnet_drop_message(ni, cpt, private, payload_length);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_parse);
			
 
				-
			
 
				-void
			
 
				-lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
			
 
				-{
			
 
				-	while (!list_empty(head)) {
			
 
				-		struct lnet_process_id id = {0};
			
 
				-		struct lnet_msg *msg;
			
 
				-
			
 
				-		msg = list_entry(head->next, struct lnet_msg, msg_list);
			
 
				-		list_del(&msg->msg_list);
			
 
				-
			
 
				-		id.nid = msg->msg_hdr.src_nid;
			
 
				-		id.pid = msg->msg_hdr.src_pid;
			
 
				-
			
 
				-		LASSERT(!msg->msg_md);
			
 
				-		LASSERT(msg->msg_rx_delayed);
			
 
				-		LASSERT(msg->msg_rxpeer);
			
 
				-		LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
			
 
				-
			
 
				-		CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n",
			
 
				-		      libcfs_id2str(id),
			
 
				-		      msg->msg_hdr.msg.put.ptl_index,
			
 
				-		      msg->msg_hdr.msg.put.match_bits,
			
 
				-		      msg->msg_hdr.msg.put.offset,
			
 
				-		      msg->msg_hdr.payload_length, reason);
			
 
				-
			
 
				-		/*
			
 
				-		 * NB I can't drop msg's ref on msg_rxpeer until after I've
			
 
				-		 * called lnet_drop_message(), so I just hang onto msg as well
			
 
				-		 * until that's done
			
 
				-		 */
			
 
				-		lnet_drop_message(msg->msg_rxpeer->lp_ni,
			
 
				-				  msg->msg_rxpeer->lp_cpt,
			
 
				-				  msg->msg_private, msg->msg_len);
			
 
				-		/*
			
 
				-		 * NB: message will not generate event because w/o attached MD,
			
 
				-		 * but we still should give error code so lnet_msg_decommit()
			
 
				-		 * can skip counters operations and other checks.
			
 
				-		 */
			
 
				-		lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_recv_delayed_msg_list(struct list_head *head)
			
 
				-{
			
 
				-	while (!list_empty(head)) {
			
 
				-		struct lnet_msg *msg;
			
 
				-		struct lnet_process_id id;
			
 
				-
			
 
				-		msg = list_entry(head->next, struct lnet_msg, msg_list);
			
 
				-		list_del(&msg->msg_list);
			
 
				-
			
 
				-		/*
			
 
				-		 * md won't disappear under me, since each msg
			
 
				-		 * holds a ref on it
			
 
				-		 */
			
 
				-		id.nid = msg->msg_hdr.src_nid;
			
 
				-		id.pid = msg->msg_hdr.src_pid;
			
 
				-
			
 
				-		LASSERT(msg->msg_rx_delayed);
			
 
				-		LASSERT(msg->msg_md);
			
 
				-		LASSERT(msg->msg_rxpeer);
			
 
				-		LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
			
 
				-
			
 
				-		CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
			
 
				-		       libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
			
 
				-		       msg->msg_hdr.msg.put.match_bits,
			
 
				-		       msg->msg_hdr.msg.put.offset,
			
 
				-		       msg->msg_hdr.payload_length);
			
 
				-
			
 
				-		lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Initiate an asynchronous PUT operation.
			
 
				- *
			
 
				- * There are several events associated with a PUT: completion of the send on
			
 
				- * the initiator node (LNET_EVENT_SEND), and when the send completes
			
 
				- * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
			
 
				- * that the operation was accepted by the target. The event LNET_EVENT_PUT is
			
 
				- * used at the target node to indicate the completion of incoming data
			
 
				- * delivery.
			
 
				- *
			
 
				- * The local events will be logged in the EQ associated with the MD pointed to
			
 
				- * by \a mdh handle. Using a MD without an associated EQ results in these
			
 
				- * events being discarded. In this case, the caller must have another
			
 
				- * mechanism (e.g., a higher level protocol) for determining when it is safe
			
 
				- * to modify the memory region associated with the MD.
			
 
				- *
			
 
				- * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
			
 
				- * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
			
 
				- *
			
 
				- * \param self Indicates the NID of a local interface through which to send
			
 
				- * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
			
 
				- * \param mdh A handle for the MD that describes the memory to be sent. The MD
			
 
				- * must be "free floating" (See LNetMDBind()).
			
 
				- * \param ack Controls whether an acknowledgment is requested.
			
 
				- * Acknowledgments are only sent when they are requested by the initiating
			
 
				- * process and the target MD enables them.
			
 
				- * \param target A process identifier for the target process.
			
 
				- * \param portal The index in the \a target's portal table.
			
 
				- * \param match_bits The match bits to use for MD selection at the target
			
 
				- * process.
			
 
				- * \param offset The offset into the target MD (only used when the target
			
 
				- * MD has the LNET_MD_MANAGE_REMOTE option set).
			
 
				- * \param hdr_data 64 bits of user data that can be included in the message
			
 
				- * header. This data is written to an event queue entry at the target if an
			
 
				- * EQ is present on the matching MD.
			
 
				- *
			
 
				- * \retval  0      Success, and only in this case events will be generated
			
 
				- * and logged to EQ (if it exists).
			
 
				- * \retval -EIO    Simulated failure.
			
 
				- * \retval -ENOMEM Memory allocation failure.
			
 
				- * \retval -ENOENT Invalid MD object.
			
 
				- *
			
 
				- * \see lnet_event::hdr_data and lnet_event_kind.
			
 
				- */
			
 
				-int
			
 
				-LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack,
			
 
				-	struct lnet_process_id target, unsigned int portal,
			
 
				-	__u64 match_bits, unsigned int offset,
			
 
				-	__u64 hdr_data)
			
 
				-{
			
 
				-	struct lnet_msg *msg;
			
 
				-	struct lnet_libmd *md;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
			
 
				-	    fail_peer(target.nid, 1)) { /* shall we now? */
			
 
				-		CERROR("Dropping PUT to %s: simulated failure\n",
			
 
				-		       libcfs_id2str(target));
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	msg = kzalloc(sizeof(*msg), GFP_NOFS);
			
 
				-	if (!msg) {
			
 
				-		CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n",
			
 
				-		       libcfs_id2str(target));
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-	msg->msg_vmflush = !!(current->flags & PF_MEMALLOC);
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(mdh.cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	md = lnet_handle2md(&mdh);
			
 
				-	if (!md || !md->md_threshold || md->md_me) {
			
 
				-		CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n",
			
 
				-		       match_bits, portal, libcfs_id2str(target),
			
 
				-		       !md ? -1 : md->md_threshold);
			
 
				-		if (md && md->md_me)
			
 
				-			CERROR("Source MD also attached to portal %d\n",
			
 
				-			       md->md_me->me_portal);
			
 
				-		lnet_res_unlock(cpt);
			
 
				-
			
 
				-		kfree(msg);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
			
 
				-
			
 
				-	lnet_msg_attach_md(msg, md, 0, 0);
			
 
				-
			
 
				-	lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
			
 
				-
			
 
				-	msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
			
 
				-	msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
			
 
				-	msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
			
 
				-	msg->msg_hdr.msg.put.hdr_data = hdr_data;
			
 
				-
			
 
				-	/* NB handles only looked up by creator (no flips) */
			
 
				-	if (ack == LNET_ACK_REQ) {
			
 
				-		msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
			
 
				-			the_lnet.ln_interface_cookie;
			
 
				-		msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
			
 
				-			md->md_lh.lh_cookie;
			
 
				-	} else {
			
 
				-		msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
			
 
				-			LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				-		msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
			
 
				-			LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				-	}
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_SEND);
			
 
				-
			
 
				-	rc = lnet_send(self, msg, LNET_NID_ANY);
			
 
				-	if (rc) {
			
 
				-		CNETERR("Error sending PUT to %s: %d\n",
			
 
				-			libcfs_id2str(target), rc);
			
 
				-		lnet_finalize(NULL, msg, rc);
			
 
				-	}
			
 
				-
			
 
				-	/* completion will be signalled by an event */
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetPut);
			
 
				-
			
 
				-struct lnet_msg *
			
 
				-lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg)
			
 
				-{
			
 
				-	/*
			
 
				-	 * The LND can DMA direct to the GET md (i.e. no REPLY msg).  This
			
 
				-	 * returns a msg for the LND to pass to lnet_finalize() when the sink
			
 
				-	 * data has been received.
			
 
				-	 *
			
 
				-	 * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
			
 
				-	 * lnet_finalize() is called on it, so the LND must call this first
			
 
				-	 */
			
 
				-	struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS);
			
 
				-	struct lnet_libmd *getmd = getmsg->msg_md;
			
 
				-	struct lnet_process_id peer_id = getmsg->msg_target;
			
 
				-	int cpt;
			
 
				-
			
 
				-	LASSERT(!getmsg->msg_target_is_router);
			
 
				-	LASSERT(!getmsg->msg_routing);
			
 
				-
			
 
				-	if (!msg) {
			
 
				-		CERROR("%s: Dropping REPLY from %s: can't allocate msg\n",
			
 
				-		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
			
 
				-		goto drop;
			
 
				-	}
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	LASSERT(getmd->md_refcount > 0);
			
 
				-
			
 
				-	if (!getmd->md_threshold) {
			
 
				-		CERROR("%s: Dropping REPLY from %s for inactive MD %p\n",
			
 
				-		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
			
 
				-		       getmd);
			
 
				-		lnet_res_unlock(cpt);
			
 
				-		goto drop;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!getmd->md_offset);
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s: Reply from %s md %p\n",
			
 
				-	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
			
 
				-
			
 
				-	/* setup information for lnet_build_msg_event */
			
 
				-	msg->msg_from = peer_id.nid;
			
 
				-	msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
			
 
				-	msg->msg_hdr.src_nid = peer_id.nid;
			
 
				-	msg->msg_hdr.payload_length = getmd->md_length;
			
 
				-	msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
			
 
				-
			
 
				-	lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	cpt = lnet_cpt_of_nid(peer_id.nid);
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-	lnet_msg_commit(msg, cpt);
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_REPLY);
			
 
				-
			
 
				-	return msg;
			
 
				-
			
 
				- drop:
			
 
				-	cpt = lnet_cpt_of_nid(peer_id.nid);
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-	the_lnet.ln_counters[cpt]->drop_count++;
			
 
				-	the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	kfree(msg);
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_create_reply_msg);
			
 
				-
			
 
				-void
			
 
				-lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply,
			
 
				-		       unsigned int len)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Set the REPLY length, now the RDMA that elides the REPLY message has
			
 
				-	 * completed and I know it.
			
 
				-	 */
			
 
				-	LASSERT(reply);
			
 
				-	LASSERT(reply->msg_type == LNET_MSG_GET);
			
 
				-	LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY);
			
 
				-
			
 
				-	/*
			
 
				-	 * NB I trusted my peer to RDMA.  If she tells me she's written beyond
			
 
				-	 * the end of my buffer, I might as well be dead.
			
 
				-	 */
			
 
				-	LASSERT(len <= reply->msg_ev.mlength);
			
 
				-
			
 
				-	reply->msg_ev.mlength = len;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_set_reply_msg_len);
			
 
				-
			
 
				-/**
			
 
				- * Initiate an asynchronous GET operation.
			
 
				- *
			
 
				- * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
			
 
				- * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
			
 
				- * the target node in the REPLY has been written to local MD.
			
 
				- *
			
 
				- * On the target node, an LNET_EVENT_GET is logged when the GET request
			
 
				- * arrives and is accepted into a MD.
			
 
				- *
			
 
				- * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
			
 
				- * \param mdh A handle for the MD that describes the memory into which the
			
 
				- * requested data will be received. The MD must be "free floating"
			
 
				- * (See LNetMDBind()).
			
 
				- *
			
 
				- * \retval  0      Success, and only in this case events will be generated
			
 
				- * and logged to EQ (if it exists) of the MD.
			
 
				- * \retval -EIO    Simulated failure.
			
 
				- * \retval -ENOMEM Memory allocation failure.
			
 
				- * \retval -ENOENT Invalid MD object.
			
 
				- */
			
 
				-int
			
 
				-LNetGet(lnet_nid_t self, struct lnet_handle_md mdh,
			
 
				-	struct lnet_process_id target, unsigned int portal,
			
 
				-	__u64 match_bits, unsigned int offset)
			
 
				-{
			
 
				-	struct lnet_msg *msg;
			
 
				-	struct lnet_libmd *md;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
			
 
				-	    fail_peer(target.nid, 1)) {	  /* shall we now? */
			
 
				-		CERROR("Dropping GET to %s: simulated failure\n",
			
 
				-		       libcfs_id2str(target));
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	msg = kzalloc(sizeof(*msg), GFP_NOFS);
			
 
				-	if (!msg) {
			
 
				-		CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n",
			
 
				-		       libcfs_id2str(target));
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(mdh.cookie);
			
 
				-	lnet_res_lock(cpt);
			
 
				-
			
 
				-	md = lnet_handle2md(&mdh);
			
 
				-	if (!md || !md->md_threshold || md->md_me) {
			
 
				-		CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n",
			
 
				-		       match_bits, portal, libcfs_id2str(target),
			
 
				-		       !md ? -1 : md->md_threshold);
			
 
				-		if (md && md->md_me)
			
 
				-			CERROR("REPLY MD also attached to portal %d\n",
			
 
				-			       md->md_me->me_portal);
			
 
				-
			
 
				-		lnet_res_unlock(cpt);
			
 
				-
			
 
				-		kfree(msg);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
			
 
				-
			
 
				-	lnet_msg_attach_md(msg, md, 0, 0);
			
 
				-
			
 
				-	lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
			
 
				-
			
 
				-	msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
			
 
				-	msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
			
 
				-	msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
			
 
				-	msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
			
 
				-
			
 
				-	/* NB handles only looked up by creator (no flips) */
			
 
				-	msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
			
 
				-		the_lnet.ln_interface_cookie;
			
 
				-	msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
			
 
				-		md->md_lh.lh_cookie;
			
 
				-
			
 
				-	lnet_res_unlock(cpt);
			
 
				-
			
 
				-	lnet_build_msg_event(msg, LNET_EVENT_SEND);
			
 
				-
			
 
				-	rc = lnet_send(self, msg, LNET_NID_ANY);
			
 
				-	if (rc < 0) {
			
 
				-		CNETERR("Error sending GET to %s: %d\n",
			
 
				-			libcfs_id2str(target), rc);
			
 
				-		lnet_finalize(NULL, msg, rc);
			
 
				-	}
			
 
				-
			
 
				-	/* completion will be signalled by an event */
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetGet);
			
 
				-
			
 
				-/**
			
 
				- * Calculate distance to node at \a dstnid.
			
 
				- *
			
 
				- * \param dstnid Target NID.
			
 
				- * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
			
 
				- * is saved here.
			
 
				- * \param orderp If not NULL, order of the route to reach \a dstnid is saved
			
 
				- * here.
			
 
				- *
			
 
				- * \retval 0 If \a dstnid belongs to a local interface, and reserved option
			
 
				- * local_nid_dist_zero is set, which is the default.
			
 
				- * \retval positives Distance to target NID, i.e. number of hops plus one.
			
 
				- * \retval -EHOSTUNREACH If \a dstnid is not reachable.
			
 
				- */
			
 
				-int
			
 
				-LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
			
 
				-{
			
 
				-	struct list_head *e;
			
 
				-	struct lnet_ni *ni;
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	__u32 dstnet = LNET_NIDNET(dstnid);
			
 
				-	int hops;
			
 
				-	int cpt;
			
 
				-	__u32 order = 2;
			
 
				-	struct list_head *rn_list;
			
 
				-
			
 
				-	/*
			
 
				-	 * if !local_nid_dist_zero, I don't return a distance of 0 ever
			
 
				-	 * (when lustre sees a distance of 0, it substitutes 0@lo), so I
			
 
				-	 * keep order 0 free for 0@lo and order 1 free for a local NID
			
 
				-	 * match
			
 
				-	 */
			
 
				-	LASSERT(the_lnet.ln_refcount > 0);
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	list_for_each(e, &the_lnet.ln_nis) {
			
 
				-		ni = list_entry(e, struct lnet_ni, ni_list);
			
 
				-
			
 
				-		if (ni->ni_nid == dstnid) {
			
 
				-			if (srcnidp)
			
 
				-				*srcnidp = dstnid;
			
 
				-			if (orderp) {
			
 
				-				if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
			
 
				-					*orderp = 0;
			
 
				-				else
			
 
				-					*orderp = 1;
			
 
				-			}
			
 
				-			lnet_net_unlock(cpt);
			
 
				-
			
 
				-			return local_nid_dist_zero ? 0 : 1;
			
 
				-		}
			
 
				-
			
 
				-		if (LNET_NIDNET(ni->ni_nid) == dstnet) {
			
 
				-			/*
			
 
				-			 * Check if ni was originally created in
			
 
				-			 * current net namespace.
			
 
				-			 * If not, assign order above 0xffff0000,
			
 
				-			 * to make this ni not a priority.
			
 
				-			 */
			
 
				-			if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
			
 
				-				order += 0xffff0000;
			
 
				-
			
 
				-			if (srcnidp)
			
 
				-				*srcnidp = ni->ni_nid;
			
 
				-			if (orderp)
			
 
				-				*orderp = order;
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			return 1;
			
 
				-		}
			
 
				-
			
 
				-		order++;
			
 
				-	}
			
 
				-
			
 
				-	rn_list = lnet_net2rnethash(dstnet);
			
 
				-	list_for_each(e, rn_list) {
			
 
				-		rnet = list_entry(e, struct lnet_remotenet, lrn_list);
			
 
				-
			
 
				-		if (rnet->lrn_net == dstnet) {
			
 
				-			struct lnet_route *route;
			
 
				-			struct lnet_route *shortest = NULL;
			
 
				-			__u32 shortest_hops = LNET_UNDEFINED_HOPS;
			
 
				-			__u32 route_hops;
			
 
				-
			
 
				-			LASSERT(!list_empty(&rnet->lrn_routes));
			
 
				-
			
 
				-			list_for_each_entry(route, &rnet->lrn_routes,
			
 
				-					    lr_list) {
			
 
				-				route_hops = route->lr_hops;
			
 
				-				if (route_hops == LNET_UNDEFINED_HOPS)
			
 
				-					route_hops = 1;
			
 
				-				if (!shortest ||
			
 
				-				    route_hops < shortest_hops) {
			
 
				-					shortest = route;
			
 
				-					shortest_hops = route_hops;
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-			LASSERT(shortest);
			
 
				-			hops = shortest_hops;
			
 
				-			if (srcnidp)
			
 
				-				*srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
			
 
				-			if (orderp)
			
 
				-				*orderp = order;
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			return hops + 1;
			
 
				-		}
			
 
				-		order++;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return -EHOSTUNREACH;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetDist);
			
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -1,625 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/lib-msg.c
			
 
				- *
			
 
				- * Message decoding, parsing and finalizing routines
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-void
			
 
				-lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev)
			
 
				-{
			
 
				-	memset(ev, 0, sizeof(*ev));
			
 
				-
			
 
				-	ev->status   = 0;
			
 
				-	ev->unlinked = 1;
			
 
				-	ev->type     = LNET_EVENT_UNLINK;
			
 
				-	lnet_md_deconstruct(md, &ev->md);
			
 
				-	lnet_md2handle(&ev->md_handle, md);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Don't need any lock, must be called after lnet_commit_md
			
 
				- */
			
 
				-void
			
 
				-lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type)
			
 
				-{
			
 
				-	struct lnet_hdr *hdr = &msg->msg_hdr;
			
 
				-	struct lnet_event *ev  = &msg->msg_ev;
			
 
				-
			
 
				-	LASSERT(!msg->msg_routing);
			
 
				-
			
 
				-	ev->type = ev_type;
			
 
				-
			
 
				-	if (ev_type == LNET_EVENT_SEND) {
			
 
				-		/* event for active message */
			
 
				-		ev->target.nid    = le64_to_cpu(hdr->dest_nid);
			
 
				-		ev->target.pid    = le32_to_cpu(hdr->dest_pid);
			
 
				-		ev->initiator.nid = LNET_NID_ANY;
			
 
				-		ev->initiator.pid = the_lnet.ln_pid;
			
 
				-		ev->sender        = LNET_NID_ANY;
			
 
				-	} else {
			
 
				-		/* event for passive message */
			
 
				-		ev->target.pid    = hdr->dest_pid;
			
 
				-		ev->target.nid    = hdr->dest_nid;
			
 
				-		ev->initiator.pid = hdr->src_pid;
			
 
				-		ev->initiator.nid = hdr->src_nid;
			
 
				-		ev->rlength       = hdr->payload_length;
			
 
				-		ev->sender        = msg->msg_from;
			
 
				-		ev->mlength       = msg->msg_wanted;
			
 
				-		ev->offset        = msg->msg_offset;
			
 
				-	}
			
 
				-
			
 
				-	switch (ev_type) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-
			
 
				-	case LNET_EVENT_PUT: /* passive PUT */
			
 
				-		ev->pt_index   = hdr->msg.put.ptl_index;
			
 
				-		ev->match_bits = hdr->msg.put.match_bits;
			
 
				-		ev->hdr_data   = hdr->msg.put.hdr_data;
			
 
				-		return;
			
 
				-
			
 
				-	case LNET_EVENT_GET: /* passive GET */
			
 
				-		ev->pt_index   = hdr->msg.get.ptl_index;
			
 
				-		ev->match_bits = hdr->msg.get.match_bits;
			
 
				-		ev->hdr_data   = 0;
			
 
				-		return;
			
 
				-
			
 
				-	case LNET_EVENT_ACK: /* ACK */
			
 
				-		ev->match_bits = hdr->msg.ack.match_bits;
			
 
				-		ev->mlength    = hdr->msg.ack.mlength;
			
 
				-		return;
			
 
				-
			
 
				-	case LNET_EVENT_REPLY: /* REPLY */
			
 
				-		return;
			
 
				-
			
 
				-	case LNET_EVENT_SEND: /* active message */
			
 
				-		if (msg->msg_type == LNET_MSG_PUT) {
			
 
				-			ev->pt_index   = le32_to_cpu(hdr->msg.put.ptl_index);
			
 
				-			ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
			
 
				-			ev->offset     = le32_to_cpu(hdr->msg.put.offset);
			
 
				-			ev->mlength    =
			
 
				-			ev->rlength    = le32_to_cpu(hdr->payload_length);
			
 
				-			ev->hdr_data   = le64_to_cpu(hdr->msg.put.hdr_data);
			
 
				-
			
 
				-		} else {
			
 
				-			LASSERT(msg->msg_type == LNET_MSG_GET);
			
 
				-			ev->pt_index   = le32_to_cpu(hdr->msg.get.ptl_index);
			
 
				-			ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
			
 
				-			ev->mlength    =
			
 
				-			ev->rlength    = le32_to_cpu(hdr->msg.get.sink_length);
			
 
				-			ev->offset     = le32_to_cpu(hdr->msg.get.src_offset);
			
 
				-			ev->hdr_data   = 0;
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_msg_commit(struct lnet_msg *msg, int cpt)
			
 
				-{
			
 
				-	struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
			
 
				-	struct lnet_counters *counters  = the_lnet.ln_counters[cpt];
			
 
				-
			
 
				-	/* routed message can be committed for both receiving and sending */
			
 
				-	LASSERT(!msg->msg_tx_committed);
			
 
				-
			
 
				-	if (msg->msg_sending) {
			
 
				-		LASSERT(!msg->msg_receiving);
			
 
				-
			
 
				-		msg->msg_tx_cpt = cpt;
			
 
				-		msg->msg_tx_committed = 1;
			
 
				-		if (msg->msg_rx_committed) { /* routed message REPLY */
			
 
				-			LASSERT(msg->msg_onactivelist);
			
 
				-			return;
			
 
				-		}
			
 
				-	} else {
			
 
				-		LASSERT(!msg->msg_sending);
			
 
				-		msg->msg_rx_cpt = cpt;
			
 
				-		msg->msg_rx_committed = 1;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!msg->msg_onactivelist);
			
 
				-	msg->msg_onactivelist = 1;
			
 
				-	list_add(&msg->msg_activelist, &container->msc_active);
			
 
				-
			
 
				-	counters->msgs_alloc++;
			
 
				-	if (counters->msgs_alloc > counters->msgs_max)
			
 
				-		counters->msgs_max = counters->msgs_alloc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
			
 
				-{
			
 
				-	struct lnet_counters	*counters;
			
 
				-	struct lnet_event *ev = &msg->msg_ev;
			
 
				-
			
 
				-	LASSERT(msg->msg_tx_committed);
			
 
				-	if (status)
			
 
				-		goto out;
			
 
				-
			
 
				-	counters = the_lnet.ln_counters[msg->msg_tx_cpt];
			
 
				-	switch (ev->type) {
			
 
				-	default: /* routed message */
			
 
				-		LASSERT(msg->msg_routing);
			
 
				-		LASSERT(msg->msg_rx_committed);
			
 
				-		LASSERT(!ev->type);
			
 
				-
			
 
				-		counters->route_length += msg->msg_len;
			
 
				-		counters->route_count++;
			
 
				-		goto out;
			
 
				-
			
 
				-	case LNET_EVENT_PUT:
			
 
				-		/* should have been decommitted */
			
 
				-		LASSERT(!msg->msg_rx_committed);
			
 
				-		/* overwritten while sending ACK */
			
 
				-		LASSERT(msg->msg_type == LNET_MSG_ACK);
			
 
				-		msg->msg_type = LNET_MSG_PUT; /* fix type */
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_EVENT_SEND:
			
 
				-		LASSERT(!msg->msg_rx_committed);
			
 
				-		if (msg->msg_type == LNET_MSG_PUT)
			
 
				-			counters->send_length += msg->msg_len;
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_EVENT_GET:
			
 
				-		LASSERT(msg->msg_rx_committed);
			
 
				-		/*
			
 
				-		 * overwritten while sending reply, we should never be
			
 
				-		 * here for optimized GET
			
 
				-		 */
			
 
				-		LASSERT(msg->msg_type == LNET_MSG_REPLY);
			
 
				-		msg->msg_type = LNET_MSG_GET; /* fix type */
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	counters->send_count++;
			
 
				- out:
			
 
				-	lnet_return_tx_credits_locked(msg);
			
 
				-	msg->msg_tx_committed = 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
			
 
				-{
			
 
				-	struct lnet_counters *counters;
			
 
				-	struct lnet_event *ev = &msg->msg_ev;
			
 
				-
			
 
				-	LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
			
 
				-	LASSERT(msg->msg_rx_committed);
			
 
				-
			
 
				-	if (status)
			
 
				-		goto out;
			
 
				-
			
 
				-	counters = the_lnet.ln_counters[msg->msg_rx_cpt];
			
 
				-	switch (ev->type) {
			
 
				-	default:
			
 
				-		LASSERT(!ev->type);
			
 
				-		LASSERT(msg->msg_routing);
			
 
				-		goto out;
			
 
				-
			
 
				-	case LNET_EVENT_ACK:
			
 
				-		LASSERT(msg->msg_type == LNET_MSG_ACK);
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_EVENT_GET:
			
 
				-		/*
			
 
				-		 * type is "REPLY" if it's an optimized GET on passive side,
			
 
				-		 * because optimized GET will never be committed for sending,
			
 
				-		 * so message type wouldn't be changed back to "GET" by
			
 
				-		 * lnet_msg_decommit_tx(), see details in lnet_parse_get()
			
 
				-		 */
			
 
				-		LASSERT(msg->msg_type == LNET_MSG_REPLY ||
			
 
				-			msg->msg_type == LNET_MSG_GET);
			
 
				-		counters->send_length += msg->msg_wanted;
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_EVENT_PUT:
			
 
				-		LASSERT(msg->msg_type == LNET_MSG_PUT);
			
 
				-		break;
			
 
				-
			
 
				-	case LNET_EVENT_REPLY:
			
 
				-		/*
			
 
				-		 * type is "GET" if it's an optimized GET on active side,
			
 
				-		 * see details in lnet_create_reply_msg()
			
 
				-		 */
			
 
				-		LASSERT(msg->msg_type == LNET_MSG_GET ||
			
 
				-			msg->msg_type == LNET_MSG_REPLY);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	counters->recv_count++;
			
 
				-	if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
			
 
				-		counters->recv_length += msg->msg_wanted;
			
 
				-
			
 
				- out:
			
 
				-	lnet_return_rx_credits_locked(msg);
			
 
				-	msg->msg_rx_committed = 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status)
			
 
				-{
			
 
				-	int cpt2 = cpt;
			
 
				-
			
 
				-	LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
			
 
				-	LASSERT(msg->msg_onactivelist);
			
 
				-
			
 
				-	if (msg->msg_tx_committed) { /* always decommit for sending first */
			
 
				-		LASSERT(cpt == msg->msg_tx_cpt);
			
 
				-		lnet_msg_decommit_tx(msg, status);
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_rx_committed) {
			
 
				-		/* forwarding msg committed for both receiving and sending */
			
 
				-		if (cpt != msg->msg_rx_cpt) {
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			cpt2 = msg->msg_rx_cpt;
			
 
				-			lnet_net_lock(cpt2);
			
 
				-		}
			
 
				-		lnet_msg_decommit_rx(msg, status);
			
 
				-	}
			
 
				-
			
 
				-	list_del(&msg->msg_activelist);
			
 
				-	msg->msg_onactivelist = 0;
			
 
				-
			
 
				-	the_lnet.ln_counters[cpt2]->msgs_alloc--;
			
 
				-
			
 
				-	if (cpt2 != cpt) {
			
 
				-		lnet_net_unlock(cpt2);
			
 
				-		lnet_net_lock(cpt);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
			
 
				-		   unsigned int offset, unsigned int mlen)
			
 
				-{
			
 
				-	/* NB: @offset and @len are only useful for receiving */
			
 
				-	/*
			
 
				-	 * Here, we attach the MD on lnet_msg and mark it busy and
			
 
				-	 * decrementing its threshold. Come what may, the lnet_msg "owns"
			
 
				-	 * the MD until a call to lnet_msg_detach_md or lnet_finalize()
			
 
				-	 * signals completion.
			
 
				-	 */
			
 
				-	LASSERT(!msg->msg_routing);
			
 
				-
			
 
				-	msg->msg_md = md;
			
 
				-	if (msg->msg_receiving) { /* committed for receiving */
			
 
				-		msg->msg_offset = offset;
			
 
				-		msg->msg_wanted = mlen;
			
 
				-	}
			
 
				-
			
 
				-	md->md_refcount++;
			
 
				-	if (md->md_threshold != LNET_MD_THRESH_INF) {
			
 
				-		LASSERT(md->md_threshold > 0);
			
 
				-		md->md_threshold--;
			
 
				-	}
			
 
				-
			
 
				-	/* build umd in event */
			
 
				-	lnet_md2handle(&msg->msg_ev.md_handle, md);
			
 
				-	lnet_md_deconstruct(md, &msg->msg_ev.md);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_msg_detach_md(struct lnet_msg *msg, int status)
			
 
				-{
			
 
				-	struct lnet_libmd *md = msg->msg_md;
			
 
				-	int unlink;
			
 
				-
			
 
				-	/* Now it's safe to drop my caller's ref */
			
 
				-	md->md_refcount--;
			
 
				-	LASSERT(md->md_refcount >= 0);
			
 
				-
			
 
				-	unlink = lnet_md_unlinkable(md);
			
 
				-	if (md->md_eq) {
			
 
				-		msg->msg_ev.status   = status;
			
 
				-		msg->msg_ev.unlinked = unlink;
			
 
				-		lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
			
 
				-	}
			
 
				-
			
 
				-	if (unlink)
			
 
				-		lnet_md_unlink(md);
			
 
				-
			
 
				-	msg->msg_md = NULL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_complete_msg_locked(struct lnet_msg *msg, int cpt)
			
 
				-{
			
 
				-	struct lnet_handle_wire ack_wmd;
			
 
				-	int rc;
			
 
				-	int status = msg->msg_ev.status;
			
 
				-
			
 
				-	LASSERT(msg->msg_onactivelist);
			
 
				-
			
 
				-	if (!status && msg->msg_ack) {
			
 
				-		/* Only send an ACK if the PUT completed successfully */
			
 
				-
			
 
				-		lnet_msg_decommit(msg, cpt, 0);
			
 
				-
			
 
				-		msg->msg_ack = 0;
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
			
 
				-		LASSERT(!msg->msg_routing);
			
 
				-
			
 
				-		ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
			
 
				-
			
 
				-		lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
			
 
				-
			
 
				-		msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
			
 
				-		msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
			
 
				-		msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
			
 
				-
			
 
				-		/*
			
 
				-		 * NB: we probably want to use NID of msg::msg_from as 3rd
			
 
				-		 * parameter (router NID) if it's routed message
			
 
				-		 */
			
 
				-		rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
			
 
				-
			
 
				-		lnet_net_lock(cpt);
			
 
				-		/*
			
 
				-		 * NB: message is committed for sending, we should return
			
 
				-		 * on success because LND will finalize this message later.
			
 
				-		 *
			
 
				-		 * Also, there is possibility that message is committed for
			
 
				-		 * sending and also failed before delivering to LND,
			
 
				-		 * i.e: ENOMEM, in that case we can't fall through either
			
 
				-		 * because CPT for sending can be different with CPT for
			
 
				-		 * receiving, so we should return back to lnet_finalize()
			
 
				-		 * to make sure we are locking the correct partition.
			
 
				-		 */
			
 
				-		return rc;
			
 
				-
			
 
				-	} else if (!status &&	/* OK so far */
			
 
				-		   (msg->msg_routing && !msg->msg_sending)) {
			
 
				-		/* not forwarded */
			
 
				-		LASSERT(!msg->msg_receiving);	/* called back recv already */
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
			
 
				-
			
 
				-		lnet_net_lock(cpt);
			
 
				-		/*
			
 
				-		 * NB: message is committed for sending, we should return
			
 
				-		 * on success because LND will finalize this message later.
			
 
				-		 *
			
 
				-		 * Also, there is possibility that message is committed for
			
 
				-		 * sending and also failed before delivering to LND,
			
 
				-		 * i.e: ENOMEM, in that case we can't fall through either:
			
 
				-		 * - The rule is message must decommit for sending first if
			
 
				-		 *   the it's committed for both sending and receiving
			
 
				-		 * - CPT for sending can be different with CPT for receiving,
			
 
				-		 *   so we should return back to lnet_finalize() to make
			
 
				-		 *   sure we are locking the correct partition.
			
 
				-		 */
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lnet_msg_decommit(msg, cpt, status);
			
 
				-	kfree(msg);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status)
			
 
				-{
			
 
				-	struct lnet_msg_container *container;
			
 
				-	int my_slot;
			
 
				-	int cpt;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	if (!msg)
			
 
				-		return;
			
 
				-
			
 
				-	msg->msg_ev.status = status;
			
 
				-
			
 
				-	if (msg->msg_md) {
			
 
				-		cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
			
 
				-
			
 
				-		lnet_res_lock(cpt);
			
 
				-		lnet_msg_detach_md(msg, status);
			
 
				-		lnet_res_unlock(cpt);
			
 
				-	}
			
 
				-
			
 
				- again:
			
 
				-	rc = 0;
			
 
				-	if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
			
 
				-		/* not committed to network yet */
			
 
				-		LASSERT(!msg->msg_onactivelist);
			
 
				-		kfree(msg);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * NB: routed message can be committed for both receiving and sending,
			
 
				-	 * we should finalize in LIFO order and keep counters correct.
			
 
				-	 * (finalize sending first then finalize receiving)
			
 
				-	 */
			
 
				-	cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
			
 
				-	lnet_net_lock(cpt);
			
 
				-
			
 
				-	container = the_lnet.ln_msg_containers[cpt];
			
 
				-	list_add_tail(&msg->msg_list, &container->msc_finalizing);
			
 
				-
			
 
				-	/*
			
 
				-	 * Recursion breaker.  Don't complete the message here if I am (or
			
 
				-	 * enough other threads are) already completing messages
			
 
				-	 */
			
 
				-	my_slot = -1;
			
 
				-	for (i = 0; i < container->msc_nfinalizers; i++) {
			
 
				-		if (container->msc_finalizers[i] == current)
			
 
				-			break;
			
 
				-
			
 
				-		if (my_slot < 0 && !container->msc_finalizers[i])
			
 
				-			my_slot = i;
			
 
				-	}
			
 
				-
			
 
				-	if (i < container->msc_nfinalizers || my_slot < 0) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	container->msc_finalizers[my_slot] = current;
			
 
				-
			
 
				-	while (!list_empty(&container->msc_finalizing)) {
			
 
				-		msg = list_entry(container->msc_finalizing.next,
			
 
				-				 struct lnet_msg, msg_list);
			
 
				-
			
 
				-		list_del(&msg->msg_list);
			
 
				-
			
 
				-		/*
			
 
				-		 * NB drops and regains the lnet lock if it actually does
			
 
				-		 * anything, so my finalizing friends can chomp along too
			
 
				-		 */
			
 
				-		rc = lnet_complete_msg_locked(msg, cpt);
			
 
				-		if (rc)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		lnet_delay_rule_check();
			
 
				-		lnet_net_lock(cpt);
			
 
				-	}
			
 
				-
			
 
				-	container->msc_finalizers[my_slot] = NULL;
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	if (rc)
			
 
				-		goto again;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_finalize);
			
 
				-
			
 
				-void
			
 
				-lnet_msg_container_cleanup(struct lnet_msg_container *container)
			
 
				-{
			
 
				-	int count = 0;
			
 
				-
			
 
				-	if (!container->msc_init)
			
 
				-		return;
			
 
				-
			
 
				-	while (!list_empty(&container->msc_active)) {
			
 
				-		struct lnet_msg *msg;
			
 
				-
			
 
				-		msg = list_entry(container->msc_active.next,
			
 
				-				 struct lnet_msg, msg_activelist);
			
 
				-		LASSERT(msg->msg_onactivelist);
			
 
				-		msg->msg_onactivelist = 0;
			
 
				-		list_del(&msg->msg_activelist);
			
 
				-		kfree(msg);
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				-	if (count > 0)
			
 
				-		CERROR("%d active msg on exit\n", count);
			
 
				-
			
 
				-	kvfree(container->msc_finalizers);
			
 
				-	container->msc_finalizers = NULL;
			
 
				-	container->msc_init = 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
			
 
				-{
			
 
				-	container->msc_init = 1;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&container->msc_active);
			
 
				-	INIT_LIST_HEAD(&container->msc_finalizing);
			
 
				-
			
 
				-	/* number of CPUs */
			
 
				-	container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
			
 
				-
			
 
				-	container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers *
			
 
				-						 sizeof(*container->msc_finalizers),
			
 
				-						 GFP_KERNEL, cpt);
			
 
				-
			
 
				-	if (!container->msc_finalizers) {
			
 
				-		CERROR("Failed to allocate message finalizers\n");
			
 
				-		lnet_msg_container_cleanup(container);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_msg_containers_destroy(void)
			
 
				-{
			
 
				-	struct lnet_msg_container *container;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!the_lnet.ln_msg_containers)
			
 
				-		return;
			
 
				-
			
 
				-	cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
			
 
				-		lnet_msg_container_cleanup(container);
			
 
				-
			
 
				-	cfs_percpt_free(the_lnet.ln_msg_containers);
			
 
				-	the_lnet.ln_msg_containers = NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_msg_containers_create(void)
			
 
				-{
			
 
				-	struct lnet_msg_container *container;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-						      sizeof(*container));
			
 
				-
			
 
				-	if (!the_lnet.ln_msg_containers) {
			
 
				-		CERROR("Failed to allocate cpu-partition data for network\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
			
 
				-		rc = lnet_msg_container_setup(container, i);
			
 
				-		if (rc) {
			
 
				-			lnet_msg_containers_destroy();
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
@@ -1,987 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/lib-ptl.c
			
 
				- *
			
 
				- * portal & match routines
			
 
				- *
			
 
				- * Author: liang@whamcloud.com
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-/* NB: add /proc interfaces in upcoming patches */
			
 
				-int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
			
 
				-module_param(portal_rotor, int, 0644);
			
 
				-MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
			
 
				-
			
 
				-static int
			
 
				-lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id,
			
 
				-		    __u64 mbits, __u64 ignore_bits)
			
 
				-{
			
 
				-	struct lnet_portal *ptl = the_lnet.ln_portals[index];
			
 
				-	int unique;
			
 
				-
			
 
				-	unique = !ignore_bits &&
			
 
				-		 match_id.nid != LNET_NID_ANY &&
			
 
				-		 match_id.pid != LNET_PID_ANY;
			
 
				-
			
 
				-	LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
			
 
				-
			
 
				-	/* prefer to check w/o any lock */
			
 
				-	if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
			
 
				-		goto match;
			
 
				-
			
 
				-	/* unset, new portal */
			
 
				-	lnet_ptl_lock(ptl);
			
 
				-	/* check again with lock */
			
 
				-	if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
			
 
				-		lnet_ptl_unlock(ptl);
			
 
				-		goto match;
			
 
				-	}
			
 
				-
			
 
				-	/* still not set */
			
 
				-	if (unique)
			
 
				-		lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
			
 
				-	else
			
 
				-		lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
			
 
				-
			
 
				-	lnet_ptl_unlock(ptl);
			
 
				-
			
 
				-	return 1;
			
 
				-
			
 
				- match:
			
 
				-	if ((lnet_ptl_is_unique(ptl) && !unique) ||
			
 
				-	    (lnet_ptl_is_wildcard(ptl) && unique))
			
 
				-		return 0;
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
			
 
				-{
			
 
				-	struct lnet_match_table	*mtable = ptl->ptl_mtables[cpt];
			
 
				-	int i;
			
 
				-
			
 
				-	/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
			
 
				-	LASSERT(lnet_ptl_is_wildcard(ptl));
			
 
				-
			
 
				-	mtable->mt_enabled = 1;
			
 
				-
			
 
				-	ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
			
 
				-	for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
			
 
				-		LASSERT(ptl->ptl_mt_maps[i] != cpt);
			
 
				-		if (ptl->ptl_mt_maps[i] < cpt)
			
 
				-			break;
			
 
				-
			
 
				-		/* swap to order */
			
 
				-		ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
			
 
				-		ptl->ptl_mt_maps[i] = cpt;
			
 
				-	}
			
 
				-
			
 
				-	ptl->ptl_mt_nmaps++;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
			
 
				-{
			
 
				-	struct lnet_match_table	*mtable = ptl->ptl_mtables[cpt];
			
 
				-	int i;
			
 
				-
			
 
				-	/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
			
 
				-	LASSERT(lnet_ptl_is_wildcard(ptl));
			
 
				-
			
 
				-	if (LNET_CPT_NUMBER == 1)
			
 
				-		return; /* never disable the only match-table */
			
 
				-
			
 
				-	mtable->mt_enabled = 0;
			
 
				-
			
 
				-	LASSERT(ptl->ptl_mt_nmaps > 0 &&
			
 
				-		ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
			
 
				-
			
 
				-	/* remove it from mt_maps */
			
 
				-	ptl->ptl_mt_nmaps--;
			
 
				-	for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
			
 
				-		if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
			
 
				-			ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_try_match_md(struct lnet_libmd *md,
			
 
				-		  struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				-{
			
 
				-	/*
			
 
				-	 * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
			
 
				-	 * lnet_match_blocked_msg() relies on this to avoid races
			
 
				-	 */
			
 
				-	unsigned int offset;
			
 
				-	unsigned int mlength;
			
 
				-	struct lnet_me *me = md->md_me;
			
 
				-
			
 
				-	/* MD exhausted */
			
 
				-	if (lnet_md_exhausted(md))
			
 
				-		return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
			
 
				-
			
 
				-	/* mismatched MD op */
			
 
				-	if (!(md->md_options & info->mi_opc))
			
 
				-		return LNET_MATCHMD_NONE;
			
 
				-
			
 
				-	/* mismatched ME nid/pid? */
			
 
				-	if (me->me_match_id.nid != LNET_NID_ANY &&
			
 
				-	    me->me_match_id.nid != info->mi_id.nid)
			
 
				-		return LNET_MATCHMD_NONE;
			
 
				-
			
 
				-	if (me->me_match_id.pid != LNET_PID_ANY &&
			
 
				-	    me->me_match_id.pid != info->mi_id.pid)
			
 
				-		return LNET_MATCHMD_NONE;
			
 
				-
			
 
				-	/* mismatched ME matchbits? */
			
 
				-	if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits)
			
 
				-		return LNET_MATCHMD_NONE;
			
 
				-
			
 
				-	/* Hurrah! This _is_ a match; check it out... */
			
 
				-
			
 
				-	if (!(md->md_options & LNET_MD_MANAGE_REMOTE))
			
 
				-		offset = md->md_offset;
			
 
				-	else
			
 
				-		offset = info->mi_roffset;
			
 
				-
			
 
				-	if (md->md_options & LNET_MD_MAX_SIZE) {
			
 
				-		mlength = md->md_max_size;
			
 
				-		LASSERT(md->md_offset + mlength <= md->md_length);
			
 
				-	} else {
			
 
				-		mlength = md->md_length - offset;
			
 
				-	}
			
 
				-
			
 
				-	if (info->mi_rlength <= mlength) {	/* fits in allowed space */
			
 
				-		mlength = info->mi_rlength;
			
 
				-	} else if (!(md->md_options & LNET_MD_TRUNCATE)) {
			
 
				-		/* this packet _really_ is too big */
			
 
				-		CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n",
			
 
				-		       libcfs_id2str(info->mi_id), info->mi_mbits,
			
 
				-		       info->mi_rlength, md->md_length - offset, mlength);
			
 
				-
			
 
				-		return LNET_MATCHMD_DROP;
			
 
				-	}
			
 
				-
			
 
				-	/* Commit to this ME/MD */
			
 
				-	CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n",
			
 
				-	       (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
			
 
				-	       info->mi_portal, libcfs_id2str(info->mi_id), mlength,
			
 
				-	       info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
			
 
				-
			
 
				-	lnet_msg_attach_md(msg, md, offset, mlength);
			
 
				-	md->md_offset = offset + mlength;
			
 
				-
			
 
				-	if (!lnet_md_exhausted(md))
			
 
				-		return LNET_MATCHMD_OK;
			
 
				-
			
 
				-	/*
			
 
				-	 * Auto-unlink NOW, so the ME gets unlinked if required.
			
 
				-	 * We bumped md->md_refcount above so the MD just gets flagged
			
 
				-	 * for unlink when it is finalized.
			
 
				-	 */
			
 
				-	if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK)
			
 
				-		lnet_md_unlink(md);
			
 
				-
			
 
				-	return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_match_table *
			
 
				-lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits)
			
 
				-{
			
 
				-	if (LNET_CPT_NUMBER == 1)
			
 
				-		return ptl->ptl_mtables[0]; /* the only one */
			
 
				-
			
 
				-	/* if it's a unique portal, return match-table hashed by NID */
			
 
				-	return lnet_ptl_is_unique(ptl) ?
			
 
				-	       ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
			
 
				-}
			
 
				-
			
 
				-struct lnet_match_table *
			
 
				-lnet_mt_of_attach(unsigned int index, struct lnet_process_id id,
			
 
				-		  __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos)
			
 
				-{
			
 
				-	struct lnet_portal *ptl;
			
 
				-	struct lnet_match_table	*mtable;
			
 
				-
			
 
				-	/* NB: called w/o lock */
			
 
				-	LASSERT(index < the_lnet.ln_nportals);
			
 
				-
			
 
				-	if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
			
 
				-		return NULL;
			
 
				-
			
 
				-	ptl = the_lnet.ln_portals[index];
			
 
				-
			
 
				-	mtable = lnet_match2mt(ptl, id, mbits);
			
 
				-	if (mtable) /* unique portal or only one match-table */
			
 
				-		return mtable;
			
 
				-
			
 
				-	/* it's a wildcard portal */
			
 
				-	switch (pos) {
			
 
				-	default:
			
 
				-		return NULL;
			
 
				-	case LNET_INS_BEFORE:
			
 
				-	case LNET_INS_AFTER:
			
 
				-		/*
			
 
				-		 * posted by no affinity thread, always hash to specific
			
 
				-		 * match-table to avoid buffer stealing which is heavy
			
 
				-		 */
			
 
				-		return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
			
 
				-	case LNET_INS_LOCAL:
			
 
				-		/* posted by cpu-affinity thread */
			
 
				-		return ptl->ptl_mtables[lnet_cpt_current()];
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static struct lnet_match_table *
			
 
				-lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_match_table	*mtable;
			
 
				-	struct lnet_portal *ptl;
			
 
				-	unsigned int nmaps;
			
 
				-	unsigned int rotor;
			
 
				-	unsigned int cpt;
			
 
				-	bool routed;
			
 
				-
			
 
				-	/* NB: called w/o lock */
			
 
				-	LASSERT(info->mi_portal < the_lnet.ln_nportals);
			
 
				-	ptl = the_lnet.ln_portals[info->mi_portal];
			
 
				-
			
 
				-	LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
			
 
				-
			
 
				-	mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
			
 
				-	if (mtable)
			
 
				-		return mtable;
			
 
				-
			
 
				-	/* it's a wildcard portal */
			
 
				-	routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
			
 
				-		 LNET_NIDNET(msg->msg_hdr.dest_nid);
			
 
				-
			
 
				-	if (portal_rotor == LNET_PTL_ROTOR_OFF ||
			
 
				-	    (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
			
 
				-		cpt = lnet_cpt_current();
			
 
				-		if (ptl->ptl_mtables[cpt]->mt_enabled)
			
 
				-			return ptl->ptl_mtables[cpt];
			
 
				-	}
			
 
				-
			
 
				-	rotor = ptl->ptl_rotor++; /* get round-robin factor */
			
 
				-	if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
			
 
				-		cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
			
 
				-	else
			
 
				-		cpt = rotor % LNET_CPT_NUMBER;
			
 
				-
			
 
				-	if (!ptl->ptl_mtables[cpt]->mt_enabled) {
			
 
				-		/* is there any active entry for this portal? */
			
 
				-		nmaps = ptl->ptl_mt_nmaps;
			
 
				-		/* map to an active mtable to avoid heavy "stealing" */
			
 
				-		if (nmaps) {
			
 
				-			/*
			
 
				-			 * NB: there is possibility that ptl_mt_maps is being
			
 
				-			 * changed because we are not under protection of
			
 
				-			 * lnet_ptl_lock, but it shouldn't hurt anything
			
 
				-			 */
			
 
				-			cpt = ptl->ptl_mt_maps[rotor % nmaps];
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return ptl->ptl_mtables[cpt];
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
			
 
				-{
			
 
				-	__u64 *bmap;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
			
 
				-		return 0;
			
 
				-
			
 
				-	if (pos < 0) { /* check all bits */
			
 
				-		for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
			
 
				-			if (mtable->mt_exhausted[i] != (__u64)(-1))
			
 
				-				return 0;
			
 
				-		}
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(pos <= LNET_MT_HASH_IGNORE);
			
 
				-	/* mtable::mt_mhash[pos] is marked as exhausted or not */
			
 
				-	bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
			
 
				-	pos &= (1 << LNET_MT_BITS_U64) - 1;
			
 
				-
			
 
				-	return (*bmap & BIT(pos));
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
			
 
				-{
			
 
				-	__u64 *bmap;
			
 
				-
			
 
				-	LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
			
 
				-	LASSERT(pos <= LNET_MT_HASH_IGNORE);
			
 
				-
			
 
				-	/* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
			
 
				-	bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
			
 
				-	pos &= (1 << LNET_MT_BITS_U64) - 1;
			
 
				-
			
 
				-	if (!exhausted)
			
 
				-		*bmap &= ~(1ULL << pos);
			
 
				-	else
			
 
				-		*bmap |= 1ULL << pos;
			
 
				-}
			
 
				-
			
 
				-struct list_head *
			
 
				-lnet_mt_match_head(struct lnet_match_table *mtable,
			
 
				-		   struct lnet_process_id id, __u64 mbits)
			
 
				-{
			
 
				-	struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
			
 
				-	unsigned long hash = mbits;
			
 
				-
			
 
				-	if (!lnet_ptl_is_wildcard(ptl)) {
			
 
				-		hash += id.nid + id.pid;
			
 
				-
			
 
				-		LASSERT(lnet_ptl_is_unique(ptl));
			
 
				-		hash = hash_long(hash, LNET_MT_HASH_BITS);
			
 
				-	}
			
 
				-	return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_mt_match_md(struct lnet_match_table *mtable,
			
 
				-		 struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct list_head *head;
			
 
				-	struct lnet_me *me;
			
 
				-	struct lnet_me *tmp;
			
 
				-	int exhausted = 0;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* any ME with ignore bits? */
			
 
				-	if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
			
 
				-		head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
			
 
				-	else
			
 
				-		head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
			
 
				- again:
			
 
				-	/* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
			
 
				-	if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
			
 
				-		exhausted = LNET_MATCHMD_EXHAUSTED;
			
 
				-
			
 
				-	list_for_each_entry_safe(me, tmp, head, me_list) {
			
 
				-		/* ME attached but MD not attached yet */
			
 
				-		if (!me->me_md)
			
 
				-			continue;
			
 
				-
			
 
				-		LASSERT(me == me->me_md->md_me);
			
 
				-
			
 
				-		rc = lnet_try_match_md(me->me_md, info, msg);
			
 
				-		if (!(rc & LNET_MATCHMD_EXHAUSTED))
			
 
				-			exhausted = 0; /* mlist is not empty */
			
 
				-
			
 
				-		if (rc & LNET_MATCHMD_FINISH) {
			
 
				-			/*
			
 
				-			 * don't return EXHAUSTED bit because we don't know
			
 
				-			 * whether the mlist is empty or not
			
 
				-			 */
			
 
				-			return rc & ~LNET_MATCHMD_EXHAUSTED;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
			
 
				-		lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
			
 
				-		if (!lnet_mt_test_exhausted(mtable, -1))
			
 
				-			exhausted = 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
			
 
				-		head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
			
 
				-		goto again; /* re-check MEs w/o ignore-bits */
			
 
				-	}
			
 
				-
			
 
				-	if (info->mi_opc == LNET_MD_OP_GET ||
			
 
				-	    !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
			
 
				-		return exhausted | LNET_MATCHMD_DROP;
			
 
				-
			
 
				-	return exhausted | LNET_MATCHMD_NONE;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * message arrived before any buffer posting on this portal,
			
 
				-	 * simply delay or drop this message
			
 
				-	 */
			
 
				-	if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
			
 
				-		return 0;
			
 
				-
			
 
				-	lnet_ptl_lock(ptl);
			
 
				-	/* check it again with hold of lock */
			
 
				-	if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
			
 
				-		lnet_ptl_unlock(ptl);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_ptl_is_lazy(ptl)) {
			
 
				-		if (msg->msg_rx_ready_delay) {
			
 
				-			msg->msg_rx_delayed = 1;
			
 
				-			list_add_tail(&msg->msg_list,
			
 
				-				      &ptl->ptl_msg_delayed);
			
 
				-		}
			
 
				-		rc = LNET_MATCHMD_NONE;
			
 
				-	} else {
			
 
				-		rc = LNET_MATCHMD_DROP;
			
 
				-	}
			
 
				-
			
 
				-	lnet_ptl_unlock(ptl);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ptl_match_delay(struct lnet_portal *ptl,
			
 
				-		     struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				-{
			
 
				-	int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	/**
			
 
				-	 * Steal buffer from other CPTs, and delay msg if nothing to
			
 
				-	 * steal. This function is more expensive than a regular
			
 
				-	 * match, but we don't expect it can happen a lot. The return
			
 
				-	 * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
			
 
				-	 * LNET_MATCHMD_NONE.
			
 
				-	 */
			
 
				-	LASSERT(lnet_ptl_is_wildcard(ptl));
			
 
				-
			
 
				-	for (i = 0; i < LNET_CPT_NUMBER; i++) {
			
 
				-		struct lnet_match_table *mtable;
			
 
				-		int cpt;
			
 
				-
			
 
				-		cpt = (first + i) % LNET_CPT_NUMBER;
			
 
				-		mtable = ptl->ptl_mtables[cpt];
			
 
				-		if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
			
 
				-			continue;
			
 
				-
			
 
				-		lnet_res_lock(cpt);
			
 
				-		lnet_ptl_lock(ptl);
			
 
				-
			
 
				-		if (!i) {
			
 
				-			/* The first try, add to stealing list. */
			
 
				-			list_add_tail(&msg->msg_list,
			
 
				-				      &ptl->ptl_msg_stealing);
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&msg->msg_list)) {
			
 
				-			/* On stealing list. */
			
 
				-			rc = lnet_mt_match_md(mtable, info, msg);
			
 
				-
			
 
				-			if ((rc & LNET_MATCHMD_EXHAUSTED) &&
			
 
				-			    mtable->mt_enabled)
			
 
				-				lnet_ptl_disable_mt(ptl, cpt);
			
 
				-
			
 
				-			if (rc & LNET_MATCHMD_FINISH) {
			
 
				-				/* Match found, remove from stealing list. */
			
 
				-				list_del_init(&msg->msg_list);
			
 
				-			} else if (i == LNET_CPT_NUMBER - 1 ||	/* (1) */
			
 
				-				   !ptl->ptl_mt_nmaps ||	/* (2) */
			
 
				-				   (ptl->ptl_mt_nmaps == 1 &&	/* (3) */
			
 
				-				    ptl->ptl_mt_maps[0] == cpt)) {
			
 
				-				/**
			
 
				-				 * No match found, and this is either
			
 
				-				 * (1) the last cpt to check, or
			
 
				-				 * (2) there is no active cpt, or
			
 
				-				 * (3) this is the only active cpt.
			
 
				-				 * There is nothing to steal: delay or
			
 
				-				 * drop the message.
			
 
				-				 */
			
 
				-				list_del_init(&msg->msg_list);
			
 
				-
			
 
				-				if (lnet_ptl_is_lazy(ptl)) {
			
 
				-					msg->msg_rx_delayed = 1;
			
 
				-					list_add_tail(&msg->msg_list,
			
 
				-						      &ptl->ptl_msg_delayed);
			
 
				-					rc = LNET_MATCHMD_NONE;
			
 
				-				} else {
			
 
				-					rc = LNET_MATCHMD_DROP;
			
 
				-				}
			
 
				-			} else {
			
 
				-				/* Do another iteration. */
			
 
				-				rc = 0;
			
 
				-			}
			
 
				-		} else {
			
 
				-			/**
			
 
				-			 * No longer on stealing list: another thread
			
 
				-			 * matched the message in lnet_ptl_attach_md().
			
 
				-			 * We are now expected to handle the message.
			
 
				-			 */
			
 
				-			rc = !msg->msg_md ?
			
 
				-			     LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
			
 
				-		}
			
 
				-
			
 
				-		lnet_ptl_unlock(ptl);
			
 
				-		lnet_res_unlock(cpt);
			
 
				-
			
 
				-		/**
			
 
				-		 * Note that test (1) above ensures that we always
			
 
				-		 * exit the loop through this break statement.
			
 
				-		 *
			
 
				-		 * LNET_MATCHMD_NONE means msg was added to the
			
 
				-		 * delayed queue, and we may no longer reference it
			
 
				-		 * after lnet_ptl_unlock() and lnet_res_unlock().
			
 
				-		 */
			
 
				-		if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_match_table	*mtable;
			
 
				-	struct lnet_portal *ptl;
			
 
				-	int rc;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n",
			
 
				-	       libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal,
			
 
				-	       info->mi_mbits);
			
 
				-
			
 
				-	if (info->mi_portal >= the_lnet.ln_nportals) {
			
 
				-		CERROR("Invalid portal %d not in [0-%d]\n",
			
 
				-		       info->mi_portal, the_lnet.ln_nportals);
			
 
				-		return LNET_MATCHMD_DROP;
			
 
				-	}
			
 
				-
			
 
				-	ptl = the_lnet.ln_portals[info->mi_portal];
			
 
				-	rc = lnet_ptl_match_early(ptl, msg);
			
 
				-	if (rc) /* matched or delayed early message */
			
 
				-		return rc;
			
 
				-
			
 
				-	mtable = lnet_mt_of_match(info, msg);
			
 
				-	lnet_res_lock(mtable->mt_cpt);
			
 
				-
			
 
				-	if (the_lnet.ln_shutdown) {
			
 
				-		rc = LNET_MATCHMD_DROP;
			
 
				-		goto out1;
			
 
				-	}
			
 
				-
			
 
				-	rc = lnet_mt_match_md(mtable, info, msg);
			
 
				-	if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) {
			
 
				-		lnet_ptl_lock(ptl);
			
 
				-		lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
			
 
				-		lnet_ptl_unlock(ptl);
			
 
				-	}
			
 
				-
			
 
				-	if (rc & LNET_MATCHMD_FINISH)	/* matched or dropping */
			
 
				-		goto out1;
			
 
				-
			
 
				-	if (!msg->msg_rx_ready_delay)
			
 
				-		goto out1;
			
 
				-
			
 
				-	LASSERT(lnet_ptl_is_lazy(ptl));
			
 
				-	LASSERT(!msg->msg_rx_delayed);
			
 
				-
			
 
				-	/* NB: we don't expect "delay" can happen a lot */
			
 
				-	if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
			
 
				-		lnet_ptl_lock(ptl);
			
 
				-
			
 
				-		msg->msg_rx_delayed = 1;
			
 
				-		list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
			
 
				-
			
 
				-		lnet_ptl_unlock(ptl);
			
 
				-		lnet_res_unlock(mtable->mt_cpt);
			
 
				-		rc = LNET_MATCHMD_NONE;
			
 
				-	} else  {
			
 
				-		lnet_res_unlock(mtable->mt_cpt);
			
 
				-		rc = lnet_ptl_match_delay(ptl, info, msg);
			
 
				-	}
			
 
				-
			
 
				-	/* LNET_MATCHMD_NONE means msg was added to the delay queue */
			
 
				-	if (rc & LNET_MATCHMD_NONE) {
			
 
				-		CDEBUG(D_NET,
			
 
				-		       "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
			
 
				-		       info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
			
 
				-		       libcfs_id2str(info->mi_id), info->mi_portal,
			
 
				-		       info->mi_mbits, info->mi_roffset, info->mi_rlength);
			
 
				-	}
			
 
				-	goto out0;
			
 
				- out1:
			
 
				-	lnet_res_unlock(mtable->mt_cpt);
			
 
				- out0:
			
 
				-	/* EXHAUSTED bit is only meaningful for internal functions */
			
 
				-	return rc & ~LNET_MATCHMD_EXHAUSTED;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md)
			
 
				-{
			
 
				-	LASSERT(me->me_md == md && md->md_me == me);
			
 
				-
			
 
				-	me->me_md = NULL;
			
 
				-	md->md_me = NULL;
			
 
				-}
			
 
				-
			
 
				-/* called with lnet_res_lock held */
			
 
				-void
			
 
				-lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
			
 
				-		   struct list_head *matches, struct list_head *drops)
			
 
				-{
			
 
				-	struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
			
 
				-	struct lnet_match_table	*mtable;
			
 
				-	struct list_head *head;
			
 
				-	struct lnet_msg *tmp;
			
 
				-	struct lnet_msg *msg;
			
 
				-	int exhausted = 0;
			
 
				-	int cpt;
			
 
				-
			
 
				-	LASSERT(!md->md_refcount); /* a brand new MD */
			
 
				-
			
 
				-	me->me_md = md;
			
 
				-	md->md_me = me;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
			
 
				-	mtable = ptl->ptl_mtables[cpt];
			
 
				-
			
 
				-	if (list_empty(&ptl->ptl_msg_stealing) &&
			
 
				-	    list_empty(&ptl->ptl_msg_delayed) &&
			
 
				-	    !lnet_mt_test_exhausted(mtable, me->me_pos))
			
 
				-		return;
			
 
				-
			
 
				-	lnet_ptl_lock(ptl);
			
 
				-	head = &ptl->ptl_msg_stealing;
			
 
				- again:
			
 
				-	list_for_each_entry_safe(msg, tmp, head, msg_list) {
			
 
				-		struct lnet_match_info info;
			
 
				-		struct lnet_hdr *hdr;
			
 
				-		int rc;
			
 
				-
			
 
				-		LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
			
 
				-
			
 
				-		hdr = &msg->msg_hdr;
			
 
				-		info.mi_id.nid  = hdr->src_nid;
			
 
				-		info.mi_id.pid  = hdr->src_pid;
			
 
				-		info.mi_opc     = LNET_MD_OP_PUT;
			
 
				-		info.mi_portal  = hdr->msg.put.ptl_index;
			
 
				-		info.mi_rlength = hdr->payload_length;
			
 
				-		info.mi_roffset = hdr->msg.put.offset;
			
 
				-		info.mi_mbits   = hdr->msg.put.match_bits;
			
 
				-
			
 
				-		rc = lnet_try_match_md(md, &info, msg);
			
 
				-
			
 
				-		exhausted = (rc & LNET_MATCHMD_EXHAUSTED);
			
 
				-		if (rc & LNET_MATCHMD_NONE) {
			
 
				-			if (exhausted)
			
 
				-				break;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* Hurrah! This _is_ a match */
			
 
				-		LASSERT(rc & LNET_MATCHMD_FINISH);
			
 
				-		list_del_init(&msg->msg_list);
			
 
				-
			
 
				-		if (head == &ptl->ptl_msg_stealing) {
			
 
				-			if (exhausted)
			
 
				-				break;
			
 
				-			/* stealing thread will handle the message */
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (rc & LNET_MATCHMD_OK) {
			
 
				-			list_add_tail(&msg->msg_list, matches);
			
 
				-
			
 
				-			CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
			
 
				-			       libcfs_id2str(info.mi_id),
			
 
				-			       info.mi_portal, info.mi_mbits,
			
 
				-			       info.mi_roffset, info.mi_rlength);
			
 
				-		} else {
			
 
				-			list_add_tail(&msg->msg_list, drops);
			
 
				-		}
			
 
				-
			
 
				-		if (exhausted)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (!exhausted && head == &ptl->ptl_msg_stealing) {
			
 
				-		head = &ptl->ptl_msg_delayed;
			
 
				-		goto again;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
			
 
				-		lnet_mt_set_exhausted(mtable, me->me_pos, 0);
			
 
				-		if (!mtable->mt_enabled)
			
 
				-			lnet_ptl_enable_mt(ptl, cpt);
			
 
				-	}
			
 
				-
			
 
				-	lnet_ptl_unlock(ptl);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ptl_cleanup(struct lnet_portal *ptl)
			
 
				-{
			
 
				-	struct lnet_match_table	*mtable;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!ptl->ptl_mtables) /* uninitialized portal */
			
 
				-		return;
			
 
				-
			
 
				-	LASSERT(list_empty(&ptl->ptl_msg_delayed));
			
 
				-	LASSERT(list_empty(&ptl->ptl_msg_stealing));
			
 
				-	cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
			
 
				-		struct list_head *mhash;
			
 
				-		struct lnet_me *me;
			
 
				-		int j;
			
 
				-
			
 
				-		if (!mtable->mt_mhash) /* uninitialized match-table */
			
 
				-			continue;
			
 
				-
			
 
				-		mhash = mtable->mt_mhash;
			
 
				-		/* cleanup ME */
			
 
				-		for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
			
 
				-			while (!list_empty(&mhash[j])) {
			
 
				-				me = list_entry(mhash[j].next,
			
 
				-						struct lnet_me, me_list);
			
 
				-				CERROR("Active ME %p on exit\n", me);
			
 
				-				list_del(&me->me_list);
			
 
				-				kfree(me);
			
 
				-			}
			
 
				-		}
			
 
				-		/* the extra entry is for MEs with ignore bits */
			
 
				-		kvfree(mhash);
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_free(ptl->ptl_mtables);
			
 
				-	ptl->ptl_mtables = NULL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ptl_setup(struct lnet_portal *ptl, int index)
			
 
				-{
			
 
				-	struct lnet_match_table	*mtable;
			
 
				-	struct list_head *mhash;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-					    sizeof(struct lnet_match_table));
			
 
				-	if (!ptl->ptl_mtables) {
			
 
				-		CERROR("Failed to create match table for portal %d\n", index);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	ptl->ptl_index = index;
			
 
				-	INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
			
 
				-	INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
			
 
				-	spin_lock_init(&ptl->ptl_lock);
			
 
				-	cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
			
 
				-		/* the extra entry is for MEs with ignore bits */
			
 
				-		mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1),
			
 
				-				     GFP_KERNEL, i);
			
 
				-		if (!mhash) {
			
 
				-			CERROR("Failed to create match hash for portal %d\n",
			
 
				-			       index);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		memset(&mtable->mt_exhausted[0], -1,
			
 
				-		       sizeof(mtable->mt_exhausted[0]) *
			
 
				-		       LNET_MT_EXHAUSTED_BMAP);
			
 
				-		mtable->mt_mhash = mhash;
			
 
				-		for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
			
 
				-			INIT_LIST_HEAD(&mhash[j]);
			
 
				-
			
 
				-		mtable->mt_portal = index;
			
 
				-		mtable->mt_cpt = i;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				- failed:
			
 
				-	lnet_ptl_cleanup(ptl);
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_portals_destroy(void)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (!the_lnet.ln_portals)
			
 
				-		return;
			
 
				-
			
 
				-	for (i = 0; i < the_lnet.ln_nportals; i++)
			
 
				-		lnet_ptl_cleanup(the_lnet.ln_portals[i]);
			
 
				-
			
 
				-	cfs_array_free(the_lnet.ln_portals);
			
 
				-	the_lnet.ln_portals = NULL;
			
 
				-	the_lnet.ln_nportals = 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_portals_create(void)
			
 
				-{
			
 
				-	int size;
			
 
				-	int i;
			
 
				-
			
 
				-	size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
			
 
				-
			
 
				-	the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size);
			
 
				-	if (!the_lnet.ln_portals) {
			
 
				-		CERROR("Failed to allocate portals table\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-	the_lnet.ln_nportals = MAX_PORTALS;
			
 
				-
			
 
				-	for (i = 0; i < the_lnet.ln_nportals; i++) {
			
 
				-		if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
			
 
				-			lnet_portals_destroy();
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Turn on the lazy portal attribute. Use with caution!
			
 
				- *
			
 
				- * This portal attribute only affects incoming PUT requests to the portal,
			
 
				- * and is off by default. By default, if there's no matching MD for an
			
 
				- * incoming PUT request, it is simply dropped. With the lazy attribute on,
			
 
				- * such requests are queued indefinitely until either a matching MD is
			
 
				- * posted to the portal or the lazy attribute is turned off.
			
 
				- *
			
 
				- * It would prevent dropped requests, however it should be regarded as the
			
 
				- * last line of defense - i.e. users must keep a close watch on active
			
 
				- * buffers on a lazy portal and once it becomes too low post more buffers as
			
 
				- * soon as possible. This is because delayed requests usually have detrimental
			
 
				- * effects on underlying network connections. A few delayed requests often
			
 
				- * suffice to bring an underlying connection to a complete halt, due to flow
			
 
				- * control mechanisms.
			
 
				- *
			
 
				- * There's also a DOS attack risk. If users don't post match-all MDs on a
			
 
				- * lazy portal, a malicious peer can easily stop a service by sending some
			
 
				- * PUT requests with match bits that won't match any MD. A routed server is
			
 
				- * especially vulnerable since the connections to its neighbor routers are
			
 
				- * shared among all clients.
			
 
				- *
			
 
				- * \param portal Index of the portal to enable the lazy attribute on.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -EINVAL If \a portal is not a valid index.
			
 
				- */
			
 
				-int
			
 
				-LNetSetLazyPortal(int portal)
			
 
				-{
			
 
				-	struct lnet_portal *ptl;
			
 
				-
			
 
				-	if (portal < 0 || portal >= the_lnet.ln_nportals)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
			
 
				-	ptl = the_lnet.ln_portals[portal];
			
 
				-
			
 
				-	lnet_res_lock(LNET_LOCK_EX);
			
 
				-	lnet_ptl_lock(ptl);
			
 
				-
			
 
				-	lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
			
 
				-
			
 
				-	lnet_ptl_unlock(ptl);
			
 
				-	lnet_res_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetSetLazyPortal);
			
 
				-
			
 
				-int
			
 
				-lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
			
 
				-{
			
 
				-	struct lnet_portal *ptl;
			
 
				-	LIST_HEAD(zombies);
			
 
				-
			
 
				-	if (portal < 0 || portal >= the_lnet.ln_nportals)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	ptl = the_lnet.ln_portals[portal];
			
 
				-
			
 
				-	lnet_res_lock(LNET_LOCK_EX);
			
 
				-	lnet_ptl_lock(ptl);
			
 
				-
			
 
				-	if (!lnet_ptl_is_lazy(ptl)) {
			
 
				-		lnet_ptl_unlock(ptl);
			
 
				-		lnet_res_unlock(LNET_LOCK_EX);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (ni) {
			
 
				-		struct lnet_msg *msg, *tmp;
			
 
				-
			
 
				-		/* grab all messages which are on the NI passed in */
			
 
				-		list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
			
 
				-					 msg_list) {
			
 
				-			if (msg->msg_rxpeer->lp_ni == ni)
			
 
				-				list_move(&msg->msg_list, &zombies);
			
 
				-		}
			
 
				-	} else {
			
 
				-		if (the_lnet.ln_shutdown)
			
 
				-			CWARN("Active lazy portal %d on exit\n", portal);
			
 
				-		else
			
 
				-			CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
			
 
				-
			
 
				-		/* grab all the blocked messages atomically */
			
 
				-		list_splice_init(&ptl->ptl_msg_delayed, &zombies);
			
 
				-
			
 
				-		lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
			
 
				-	}
			
 
				-
			
 
				-	lnet_ptl_unlock(ptl);
			
 
				-	lnet_res_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	lnet_drop_delayed_msg_list(&zombies, reason);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Turn off the lazy portal attribute. Delayed requests on the portal,
			
 
				- * if any, will be all dropped when this function returns.
			
 
				- *
			
 
				- * \param portal Index of the portal to disable the lazy attribute on.
			
 
				- *
			
 
				- * \retval 0       On success.
			
 
				- * \retval -EINVAL If \a portal is not a valid index.
			
 
				- */
			
 
				-int
			
 
				-LNetClearLazyPortal(int portal)
			
 
				-{
			
 
				-	return lnet_clear_lazy_portal(NULL, portal,
			
 
				-				      "Clearing lazy portal attr");
			
 
				-}
			
 
				-EXPORT_SYMBOL(LNetClearLazyPortal);
			
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -1,585 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- */
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/if.h>
			
 
				-#include <linux/in.h>
			
 
				-#include <linux/net.h>
			
 
				-#include <linux/file.h>
			
 
				-#include <linux/pagemap.h>
			
 
				-/* For sys_open & sys_close */
			
 
				-#include <linux/syscalls.h>
			
 
				-#include <net/sock.h>
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-static int
			
 
				-kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
			
 
				-{
			
 
				-	mm_segment_t oldfs = get_fs();
			
 
				-	int err;
			
 
				-
			
 
				-	set_fs(KERNEL_DS);
			
 
				-	err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
			
 
				-	set_fs(oldfs);
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_sock_ioctl(int cmd, unsigned long arg)
			
 
				-{
			
 
				-	struct file *sock_filp;
			
 
				-	struct socket *sock;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create socket: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	sock_filp = sock_alloc_file(sock, 0, NULL);
			
 
				-	if (IS_ERR(sock_filp))
			
 
				-		return PTR_ERR(sock_filp);
			
 
				-
			
 
				-	rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
			
 
				-
			
 
				-	fput(sock_filp);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
			
 
				-{
			
 
				-	struct ifreq ifr;
			
 
				-	int nob;
			
 
				-	int rc;
			
 
				-	__be32 val;
			
 
				-
			
 
				-	nob = strnlen(name, IFNAMSIZ);
			
 
				-	if (nob == IFNAMSIZ) {
			
 
				-		CERROR("Interface name %s too long\n", name);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ);
			
 
				-
			
 
				-	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
			
 
				-		return -E2BIG;
			
 
				-	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
			
 
				-
			
 
				-	rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't get flags for interface %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!(ifr.ifr_flags & IFF_UP)) {
			
 
				-		CDEBUG(D_NET, "Interface %s down\n", name);
			
 
				-		*up = 0;
			
 
				-		*ip = *mask = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				-	*up = 1;
			
 
				-
			
 
				-	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
			
 
				-		return -E2BIG;
			
 
				-	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
			
 
				-
			
 
				-	ifr.ifr_addr.sa_family = AF_INET;
			
 
				-	rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't get IP address for interface %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
			
 
				-	*ip = ntohl(val);
			
 
				-
			
 
				-	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
			
 
				-		return -E2BIG;
			
 
				-	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
			
 
				-
			
 
				-	ifr.ifr_addr.sa_family = AF_INET;
			
 
				-	rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't get netmask for interface %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
			
 
				-	*mask = ntohl(val);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_ipif_query);
			
 
				-
			
 
				-int
			
 
				-lnet_ipif_enumerate(char ***namesp)
			
 
				-{
			
 
				-	/* Allocate and fill in 'names', returning # interfaces/error */
			
 
				-	char **names;
			
 
				-	int toobig;
			
 
				-	int nalloc;
			
 
				-	int nfound;
			
 
				-	struct ifreq *ifr;
			
 
				-	struct ifconf ifc;
			
 
				-	int rc;
			
 
				-	int nob;
			
 
				-	int i;
			
 
				-
			
 
				-	nalloc = 16;	/* first guess at max interfaces */
			
 
				-	toobig = 0;
			
 
				-	for (;;) {
			
 
				-		if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
			
 
				-			toobig = 1;
			
 
				-			nalloc = PAGE_SIZE / sizeof(*ifr);
			
 
				-			CWARN("Too many interfaces: only enumerating first %d\n",
			
 
				-			      nalloc);
			
 
				-		}
			
 
				-
			
 
				-		ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL);
			
 
				-		if (!ifr) {
			
 
				-			CERROR("ENOMEM enumerating up to %d interfaces\n",
			
 
				-			       nalloc);
			
 
				-			rc = -ENOMEM;
			
 
				-			goto out0;
			
 
				-		}
			
 
				-
			
 
				-		ifc.ifc_buf = (char *)ifr;
			
 
				-		ifc.ifc_len = nalloc * sizeof(*ifr);
			
 
				-
			
 
				-		rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
			
 
				-		if (rc < 0) {
			
 
				-			CERROR("Error %d enumerating interfaces\n", rc);
			
 
				-			goto out1;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(!rc);
			
 
				-
			
 
				-		nfound = ifc.ifc_len / sizeof(*ifr);
			
 
				-		LASSERT(nfound <= nalloc);
			
 
				-
			
 
				-		if (nfound < nalloc || toobig)
			
 
				-			break;
			
 
				-
			
 
				-		kfree(ifr);
			
 
				-		nalloc *= 2;
			
 
				-	}
			
 
				-
			
 
				-	if (!nfound)
			
 
				-		goto out1;
			
 
				-
			
 
				-	names = kzalloc(nfound * sizeof(*names), GFP_KERNEL);
			
 
				-	if (!names) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto out1;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < nfound; i++) {
			
 
				-		nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
			
 
				-		if (nob == IFNAMSIZ) {
			
 
				-			/* no space for terminating NULL */
			
 
				-			CERROR("interface name %.*s too long (%d max)\n",
			
 
				-			       nob, ifr[i].ifr_name, IFNAMSIZ);
			
 
				-			rc = -ENAMETOOLONG;
			
 
				-			goto out2;
			
 
				-		}
			
 
				-
			
 
				-		names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL);
			
 
				-		if (!names[i]) {
			
 
				-			rc = -ENOMEM;
			
 
				-			goto out2;
			
 
				-		}
			
 
				-
			
 
				-		memcpy(names[i], ifr[i].ifr_name, nob);
			
 
				-		names[i][nob] = 0;
			
 
				-	}
			
 
				-
			
 
				-	*namesp = names;
			
 
				-	rc = nfound;
			
 
				-
			
 
				-out2:
			
 
				-	if (rc < 0)
			
 
				-		lnet_ipif_free_enumeration(names, nfound);
			
 
				-out1:
			
 
				-	kfree(ifr);
			
 
				-out0:
			
 
				-	return rc;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_ipif_enumerate);
			
 
				-
			
 
				-void
			
 
				-lnet_ipif_free_enumeration(char **names, int n)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(n > 0);
			
 
				-
			
 
				-	for (i = 0; i < n && names[i]; i++)
			
 
				-		kfree(names[i]);
			
 
				-
			
 
				-	kfree(names);
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_ipif_free_enumeration);
			
 
				-
			
 
				-int
			
 
				-lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
			
 
				-{
			
 
				-	int rc;
			
 
				-	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
			
 
				-	unsigned long then;
			
 
				-	struct timeval tv;
			
 
				-	struct kvec  iov = { .iov_base = buffer, .iov_len  = nob };
			
 
				-	struct msghdr msg = {NULL,};
			
 
				-
			
 
				-	LASSERT(nob > 0);
			
 
				-	/*
			
 
				-	 * Caller may pass a zero timeout if she thinks the socket buffer is
			
 
				-	 * empty enough to take the whole message immediately
			
 
				-	 */
			
 
				-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
			
 
				-	for (;;) {
			
 
				-		msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
			
 
				-		if (timeout) {
			
 
				-			/* Set send timeout to remaining time */
			
 
				-			jiffies_to_timeval(jiffies_left, &tv);
			
 
				-			rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
			
 
				-					       (char *)&tv, sizeof(tv));
			
 
				-			if (rc) {
			
 
				-				CERROR("Can't set socket send timeout %ld.%06d: %d\n",
			
 
				-				       (long)tv.tv_sec, (int)tv.tv_usec, rc);
			
 
				-				return rc;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		then = jiffies;
			
 
				-		rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
			
 
				-		jiffies_left -= jiffies - then;
			
 
				-
			
 
				-		if (rc < 0)
			
 
				-			return rc;
			
 
				-
			
 
				-		if (!rc) {
			
 
				-			CERROR("Unexpected zero rc\n");
			
 
				-			return -ECONNABORTED;
			
 
				-		}
			
 
				-
			
 
				-		if (!msg_data_left(&msg))
			
 
				-			break;
			
 
				-
			
 
				-		if (jiffies_left <= 0)
			
 
				-			return -EAGAIN;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_sock_write);
			
 
				-
			
 
				-int
			
 
				-lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
			
 
				-{
			
 
				-	int rc;
			
 
				-	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
			
 
				-	unsigned long then;
			
 
				-	struct timeval tv;
			
 
				-	struct kvec  iov = {
			
 
				-		.iov_base = buffer,
			
 
				-		.iov_len  = nob
			
 
				-	};
			
 
				-	struct msghdr msg = {
			
 
				-		.msg_flags = 0
			
 
				-	};
			
 
				-
			
 
				-	LASSERT(nob > 0);
			
 
				-	LASSERT(jiffies_left > 0);
			
 
				-
			
 
				-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		/* Set receive timeout to remaining time */
			
 
				-		jiffies_to_timeval(jiffies_left, &tv);
			
 
				-		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
			
 
				-				       (char *)&tv, sizeof(tv));
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
			
 
				-			       (long)tv.tv_sec, (int)tv.tv_usec, rc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		then = jiffies;
			
 
				-		rc = sock_recvmsg(sock, &msg, 0);
			
 
				-		jiffies_left -= jiffies - then;
			
 
				-
			
 
				-		if (rc < 0)
			
 
				-			return rc;
			
 
				-
			
 
				-		if (!rc)
			
 
				-			return -ECONNRESET;
			
 
				-
			
 
				-		if (!msg_data_left(&msg))
			
 
				-			return 0;
			
 
				-
			
 
				-		if (jiffies_left <= 0)
			
 
				-			return -ETIMEDOUT;
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_sock_read);
			
 
				-
			
 
				-static int
			
 
				-lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
			
 
				-		 int local_port)
			
 
				-{
			
 
				-	struct sockaddr_in locaddr;
			
 
				-	struct socket *sock;
			
 
				-	int rc;
			
 
				-	int option;
			
 
				-
			
 
				-	/* All errors are fatal except bind failure if the port is in use */
			
 
				-	*fatal = 1;
			
 
				-
			
 
				-	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
			
 
				-	*sockp = sock;
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create socket: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	option = 1;
			
 
				-	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
			
 
				-			       (char *)&option, sizeof(option));
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	if (local_ip || local_port) {
			
 
				-		memset(&locaddr, 0, sizeof(locaddr));
			
 
				-		locaddr.sin_family = AF_INET;
			
 
				-		locaddr.sin_port = htons(local_port);
			
 
				-		if (!local_ip)
			
 
				-			locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
			
 
				-		else
			
 
				-			locaddr.sin_addr.s_addr = htonl(local_ip);
			
 
				-
			
 
				-		rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
			
 
				-				 sizeof(locaddr));
			
 
				-		if (rc == -EADDRINUSE) {
			
 
				-			CDEBUG(D_NET, "Port %d already in use\n", local_port);
			
 
				-			*fatal = 0;
			
 
				-			goto failed;
			
 
				-		}
			
 
				-		if (rc) {
			
 
				-			CERROR("Error trying to bind to port %d: %d\n",
			
 
				-			       local_port, rc);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-	}
			
 
				-	return 0;
			
 
				-
			
 
				-failed:
			
 
				-	sock_release(sock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
			
 
				-{
			
 
				-	int option;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (txbufsize) {
			
 
				-		option = txbufsize;
			
 
				-		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
			
 
				-				       (char *)&option, sizeof(option));
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't set send buffer %d: %d\n",
			
 
				-			       option, rc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (rxbufsize) {
			
 
				-		option = rxbufsize;
			
 
				-		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
			
 
				-				       (char *)&option, sizeof(option));
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't set receive buffer %d: %d\n",
			
 
				-			       option, rc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_sock_setbuf);
			
 
				-
			
 
				-int
			
 
				-lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
			
 
				-{
			
 
				-	struct sockaddr_in sin;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (remote)
			
 
				-		rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
			
 
				-	else
			
 
				-		rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
			
 
				-	if (rc < 0) {
			
 
				-		CERROR("Error %d getting sock %s IP/port\n",
			
 
				-		       rc, remote ? "peer" : "local");
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (ip)
			
 
				-		*ip = ntohl(sin.sin_addr.s_addr);
			
 
				-
			
 
				-	if (port)
			
 
				-		*port = ntohs(sin.sin_port);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_sock_getaddr);
			
 
				-
			
 
				-int
			
 
				-lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
			
 
				-{
			
 
				-	if (txbufsize)
			
 
				-		*txbufsize = sock->sk->sk_sndbuf;
			
 
				-
			
 
				-	if (rxbufsize)
			
 
				-		*rxbufsize = sock->sk->sk_rcvbuf;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_sock_getbuf);
			
 
				-
			
 
				-int
			
 
				-lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
			
 
				-		 int backlog)
			
 
				-{
			
 
				-	int fatal;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
			
 
				-	if (rc) {
			
 
				-		if (!fatal)
			
 
				-			CERROR("Can't create socket: port %d already in use\n",
			
 
				-			       local_port);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = kernel_listen(*sockp, backlog);
			
 
				-	if (!rc)
			
 
				-		return 0;
			
 
				-
			
 
				-	CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
			
 
				-	sock_release(*sockp);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_sock_accept(struct socket **newsockp, struct socket *sock)
			
 
				-{
			
 
				-	wait_queue_entry_t wait;
			
 
				-	struct socket *newsock;
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * XXX this should add a ref to sock->ops->owner, if
			
 
				-	 * TCP could be a module
			
 
				-	 */
			
 
				-	rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't allocate socket\n");
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	newsock->ops = sock->ops;
			
 
				-
			
 
				-	rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
			
 
				-	if (rc == -EAGAIN) {
			
 
				-		/* Nothing ready, so wait for activity */
			
 
				-		init_waitqueue_entry(&wait, current);
			
 
				-		add_wait_queue(sk_sleep(sock->sk), &wait);
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		schedule();
			
 
				-		remove_wait_queue(sk_sleep(sock->sk), &wait);
			
 
				-		rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
			
 
				-	}
			
 
				-
			
 
				-	if (rc)
			
 
				-		goto failed;
			
 
				-
			
 
				-	*newsockp = newsock;
			
 
				-	return 0;
			
 
				-
			
 
				-failed:
			
 
				-	sock_release(newsock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
			
 
				-		  int local_port, __u32 peer_ip, int peer_port)
			
 
				-{
			
 
				-	struct sockaddr_in srvaddr;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	memset(&srvaddr, 0, sizeof(srvaddr));
			
 
				-	srvaddr.sin_family = AF_INET;
			
 
				-	srvaddr.sin_port = htons(peer_port);
			
 
				-	srvaddr.sin_addr.s_addr = htonl(peer_ip);
			
 
				-
			
 
				-	rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
			
 
				-			    sizeof(srvaddr), 0);
			
 
				-	if (!rc)
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * EADDRNOTAVAIL probably means we're already connected to the same
			
 
				-	 * peer/port on the same local port on a differently typed
			
 
				-	 * connection.  Let our caller retry with a different local
			
 
				-	 * port...
			
 
				-	 */
			
 
				-	*fatal = !(rc == -EADDRNOTAVAIL);
			
 
				-
			
 
				-	CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
			
 
				-		     "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
			
 
				-		     &local_ip, local_port, &peer_ip, peer_port);
			
 
				-
			
 
				-	sock_release(*sockp);
			
 
				-	return rc;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -1,105 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-static int
			
 
				-lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
			
 
				-{
			
 
				-	LASSERT(!lntmsg->msg_routing);
			
 
				-	LASSERT(!lntmsg->msg_target_is_router);
			
 
				-
			
 
				-	return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
			
 
				-	   int delayed, struct iov_iter *to, unsigned int rlen)
			
 
				-{
			
 
				-	struct lnet_msg *sendmsg = private;
			
 
				-
			
 
				-	if (lntmsg) {		   /* not discarding */
			
 
				-		if (sendmsg->msg_iov)
			
 
				-			lnet_copy_iov2iter(to,
			
 
				-					   sendmsg->msg_niov,
			
 
				-					   sendmsg->msg_iov,
			
 
				-					   sendmsg->msg_offset,
			
 
				-					   iov_iter_count(to));
			
 
				-		else
			
 
				-			lnet_copy_kiov2iter(to,
			
 
				-					    sendmsg->msg_niov,
			
 
				-					    sendmsg->msg_kiov,
			
 
				-					    sendmsg->msg_offset,
			
 
				-					    iov_iter_count(to));
			
 
				-
			
 
				-		lnet_finalize(ni, lntmsg, 0);
			
 
				-	}
			
 
				-
			
 
				-	lnet_finalize(ni, sendmsg, 0);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int lolnd_instanced;
			
 
				-
			
 
				-static void
			
 
				-lolnd_shutdown(struct lnet_ni *ni)
			
 
				-{
			
 
				-	CDEBUG(D_NET, "shutdown\n");
			
 
				-	LASSERT(lolnd_instanced);
			
 
				-
			
 
				-	lolnd_instanced = 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lolnd_startup(struct lnet_ni *ni)
			
 
				-{
			
 
				-	LASSERT(ni->ni_lnd == &the_lolnd);
			
 
				-	LASSERT(!lolnd_instanced);
			
 
				-	lolnd_instanced = 1;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-struct lnet_lnd the_lolnd = {
			
 
				-	/* .lnd_list       = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
			
 
				-	/* .lnd_refcount   = */ 0,
			
 
				-	/* .lnd_type       = */ LOLND,
			
 
				-	/* .lnd_startup    = */ lolnd_startup,
			
 
				-	/* .lnd_shutdown   = */ lolnd_shutdown,
			
 
				-	/* .lnt_ctl        = */ NULL,
			
 
				-	/* .lnd_send       = */ lolnd_send,
			
 
				-	/* .lnd_recv       = */ lolnd_recv,
			
 
				-	/* .lnd_eager_recv = */ NULL,
			
 
				-	/* .lnd_notify     = */ NULL,
			
 
				-	/* .lnd_accept     = */ NULL
			
 
				-};
			
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ b/drivers/staging/lustre/lnet/lnet/module.c
@@ -1,239 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <uapi/linux/lnet/lnet-dlc.h>
			
 
				-
			
 
				-static int config_on_load;
			
 
				-module_param(config_on_load, int, 0444);
			
 
				-MODULE_PARM_DESC(config_on_load, "configure network at module load");
			
 
				-
			
 
				-static struct mutex lnet_config_mutex;
			
 
				-
			
 
				-static int
			
 
				-lnet_configure(void *arg)
			
 
				-{
			
 
				-	/* 'arg' only there so I can be passed to cfs_create_thread() */
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	mutex_lock(&lnet_config_mutex);
			
 
				-
			
 
				-	if (!the_lnet.ln_niinit_self) {
			
 
				-		rc = try_module_get(THIS_MODULE);
			
 
				-
			
 
				-		if (rc != 1)
			
 
				-			goto out;
			
 
				-
			
 
				-		rc = LNetNIInit(LNET_PID_LUSTRE);
			
 
				-		if (rc >= 0) {
			
 
				-			the_lnet.ln_niinit_self = 1;
			
 
				-			rc = 0;
			
 
				-		} else {
			
 
				-			module_put(THIS_MODULE);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	mutex_unlock(&lnet_config_mutex);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_unconfigure(void)
			
 
				-{
			
 
				-	int refcount;
			
 
				-
			
 
				-	mutex_lock(&lnet_config_mutex);
			
 
				-
			
 
				-	if (the_lnet.ln_niinit_self) {
			
 
				-		the_lnet.ln_niinit_self = 0;
			
 
				-		LNetNIFini();
			
 
				-		module_put(THIS_MODULE);
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&the_lnet.ln_api_mutex);
			
 
				-	refcount = the_lnet.ln_refcount;
			
 
				-	mutex_unlock(&the_lnet.ln_api_mutex);
			
 
				-
			
 
				-	mutex_unlock(&lnet_config_mutex);
			
 
				-	return !refcount ? 0 : -EBUSY;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_data *conf =
			
 
				-		(struct lnet_ioctl_config_data *)hdr;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (conf->cfg_hdr.ioc_len < sizeof(*conf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	mutex_lock(&lnet_config_mutex);
			
 
				-	if (!the_lnet.ln_niinit_self) {
			
 
				-		rc = -EINVAL;
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-	rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
			
 
				-out_unlock:
			
 
				-	mutex_unlock(&lnet_config_mutex);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr)
			
 
				-{
			
 
				-	struct lnet_ioctl_config_data *conf =
			
 
				-		(struct lnet_ioctl_config_data *)hdr;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (conf->cfg_hdr.ioc_len < sizeof(*conf))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	mutex_lock(&lnet_config_mutex);
			
 
				-	if (!the_lnet.ln_niinit_self) {
			
 
				-		rc = -EINVAL;
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-	rc = lnet_dyn_del_ni(conf->cfg_net);
			
 
				-out_unlock:
			
 
				-	mutex_unlock(&lnet_config_mutex);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_ioctl(struct notifier_block *nb,
			
 
				-	   unsigned long cmd, void *vdata)
			
 
				-{
			
 
				-	int rc;
			
 
				-	struct libcfs_ioctl_hdr *hdr = vdata;
			
 
				-
			
 
				-	switch (cmd) {
			
 
				-	case IOC_LIBCFS_CONFIGURE: {
			
 
				-		struct libcfs_ioctl_data *data =
			
 
				-			(struct libcfs_ioctl_data *)hdr;
			
 
				-
			
 
				-		if (data->ioc_hdr.ioc_len < sizeof(*data)) {
			
 
				-			rc = -EINVAL;
			
 
				-		} else {
			
 
				-			the_lnet.ln_nis_from_mod_params = data->ioc_flags;
			
 
				-			rc = lnet_configure(NULL);
			
 
				-		}
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	case IOC_LIBCFS_UNCONFIGURE:
			
 
				-		rc = lnet_unconfigure();
			
 
				-		break;
			
 
				-
			
 
				-	case IOC_LIBCFS_ADD_NET:
			
 
				-		rc = lnet_dyn_configure(hdr);
			
 
				-		break;
			
 
				-
			
 
				-	case IOC_LIBCFS_DEL_NET:
			
 
				-		rc = lnet_dyn_unconfigure(hdr);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		/*
			
 
				-		 * Passing LNET_PID_ANY only gives me a ref if the net is up
			
 
				-		 * already; I'll need it to ensure the net can't go down while
			
 
				-		 * I'm called into it
			
 
				-		 */
			
 
				-		rc = LNetNIInit(LNET_PID_ANY);
			
 
				-		if (rc >= 0) {
			
 
				-			rc = LNetCtl(cmd, hdr);
			
 
				-			LNetNIFini();
			
 
				-		}
			
 
				-		break;
			
 
				-	}
			
 
				-	return notifier_from_ioctl_errno(rc);
			
 
				-}
			
 
				-
			
 
				-static struct notifier_block lnet_ioctl_handler = {
			
 
				-	.notifier_call = lnet_ioctl,
			
 
				-};
			
 
				-
			
 
				-static int __init lnet_init(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	mutex_init(&lnet_config_mutex);
			
 
				-
			
 
				-	rc = libcfs_setup();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	rc = lnet_lib_init();
			
 
				-	if (rc) {
			
 
				-		CERROR("lnet_lib_init: error %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
			
 
				-					      &lnet_ioctl_handler);
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	if (config_on_load) {
			
 
				-		/*
			
 
				-		 * Have to schedule a separate thread to avoid deadlocking
			
 
				-		 * in modload
			
 
				-		 */
			
 
				-		(void)kthread_run(lnet_configure, NULL, "lnet_initd");
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void __exit lnet_exit(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = blocking_notifier_chain_unregister(&libcfs_ioctl_list,
			
 
				-						&lnet_ioctl_handler);
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	lnet_lib_exit();
			
 
				-}
			
 
				-
			
 
				-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
			
 
				-MODULE_DESCRIPTION("Lustre Networking layer");
			
 
				-MODULE_VERSION(LNET_VERSION);
			
 
				-MODULE_LICENSE("GPL");
			
 
				-
			
 
				-module_init(lnet_init);
			
 
				-module_exit(lnet_exit);
			
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ b/drivers/staging/lustre/lnet/lnet/net_fault.c
@@ -1,1023 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2014, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Seagate, Inc.
			
 
				- *
			
 
				- * lnet/lnet/net_fault.c
			
 
				- *
			
 
				- * Lustre network fault simulation
			
 
				- *
			
 
				- * Author: liang.zhen@intel.com
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <uapi/linux/lnet/lnetctl.h>
			
 
				-
			
 
				-#define LNET_MSG_MASK		(LNET_PUT_BIT | LNET_ACK_BIT | \
			
 
				-				 LNET_GET_BIT | LNET_REPLY_BIT)
			
 
				-
			
 
				-struct lnet_drop_rule {
			
 
				-	/** link chain on the_lnet.ln_drop_rules */
			
 
				-	struct list_head	dr_link;
			
 
				-	/** attributes of this rule */
			
 
				-	struct lnet_fault_attr	dr_attr;
			
 
				-	/** lock to protect \a dr_drop_at and \a dr_stat */
			
 
				-	spinlock_t		dr_lock;
			
 
				-	/**
			
 
				-	 * the message sequence to drop, which means message is dropped when
			
 
				-	 * dr_stat.drs_count == dr_drop_at
			
 
				-	 */
			
 
				-	unsigned long		dr_drop_at;
			
 
				-	/**
			
 
				-	 * seconds to drop the next message, it's exclusive with dr_drop_at
			
 
				-	 */
			
 
				-	unsigned long		dr_drop_time;
			
 
				-	/** baseline to caculate dr_drop_time */
			
 
				-	unsigned long		dr_time_base;
			
 
				-	/** statistic of dropped messages */
			
 
				-	struct lnet_fault_stat	dr_stat;
			
 
				-};
			
 
				-
			
 
				-static bool
			
 
				-lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
			
 
				-{
			
 
				-	if (nid == msg_nid || nid == LNET_NID_ANY)
			
 
				-		return true;
			
 
				-
			
 
				-	if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid))
			
 
				-		return false;
			
 
				-
			
 
				-	/* 255.255.255.255@net is wildcard for all addresses in a network */
			
 
				-	return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY);
			
 
				-}
			
 
				-
			
 
				-static bool
			
 
				-lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
			
 
				-		      lnet_nid_t dst, unsigned int type, unsigned int portal)
			
 
				-{
			
 
				-	if (!lnet_fault_nid_match(attr->fa_src, src) ||
			
 
				-	    !lnet_fault_nid_match(attr->fa_dst, dst))
			
 
				-		return false;
			
 
				-
			
 
				-	if (!(attr->fa_msg_mask & (1 << type)))
			
 
				-		return false;
			
 
				-
			
 
				-	/**
			
 
				-	 * NB: ACK and REPLY have no portal, but they should have been
			
 
				-	 * rejected by message mask
			
 
				-	 */
			
 
				-	if (attr->fa_ptl_mask && /* has portal filter */
			
 
				-	    !(attr->fa_ptl_mask & (1ULL << portal)))
			
 
				-		return false;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_fault_attr_validate(struct lnet_fault_attr *attr)
			
 
				-{
			
 
				-	if (!attr->fa_msg_mask)
			
 
				-		attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */
			
 
				-
			
 
				-	if (!attr->fa_ptl_mask) /* no portal filter */
			
 
				-		return 0;
			
 
				-
			
 
				-	/* NB: only PUT and GET can be filtered if portal filter has been set */
			
 
				-	attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT;
			
 
				-	if (!attr->fa_msg_mask) {
			
 
				-		CDEBUG(D_NET, "can't find valid message type bits %x\n",
			
 
				-		       attr->fa_msg_mask);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type)
			
 
				-{
			
 
				-	/* NB: fs_counter is NOT updated by this function */
			
 
				-	switch (type) {
			
 
				-	case LNET_MSG_PUT:
			
 
				-		stat->fs_put++;
			
 
				-		return;
			
 
				-	case LNET_MSG_ACK:
			
 
				-		stat->fs_ack++;
			
 
				-		return;
			
 
				-	case LNET_MSG_GET:
			
 
				-		stat->fs_get++;
			
 
				-		return;
			
 
				-	case LNET_MSG_REPLY:
			
 
				-		stat->fs_reply++;
			
 
				-		return;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * LNet message drop simulation
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Add a new drop rule to LNet
			
 
				- * There is no check for duplicated drop rule, all rules will be checked for
			
 
				- * incoming message.
			
 
				- */
			
 
				-static int
			
 
				-lnet_drop_rule_add(struct lnet_fault_attr *attr)
			
 
				-{
			
 
				-	struct lnet_drop_rule *rule;
			
 
				-
			
 
				-	if (attr->u.drop.da_rate & attr->u.drop.da_interval) {
			
 
				-		CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n",
			
 
				-		       attr->u.drop.da_rate, attr->u.drop.da_interval);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_fault_attr_validate(attr))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	rule = kzalloc(sizeof(*rule), GFP_NOFS);
			
 
				-	if (!rule)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	spin_lock_init(&rule->dr_lock);
			
 
				-
			
 
				-	rule->dr_attr = *attr;
			
 
				-	if (attr->u.drop.da_interval) {
			
 
				-		rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
			
 
				-		rule->dr_drop_time = jiffies +
			
 
				-			prandom_u32_max(attr->u.drop.da_interval) * HZ;
			
 
				-	} else {
			
 
				-		rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	list_add(&rule->dr_link, &the_lnet.ln_drop_rules);
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n",
			
 
				-	       libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
			
 
				-	       attr->u.drop.da_rate, attr->u.drop.da_interval);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Remove matched drop rules from lnet, all rules that can match \a src and
			
 
				- * \a dst will be removed.
			
 
				- * If \a src is zero, then all rules have \a dst as destination will be remove
			
 
				- * If \a dst is zero, then all rules have \a src as source will be removed
			
 
				- * If both of them are zero, all rules will be removed
			
 
				- */
			
 
				-static int
			
 
				-lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst)
			
 
				-{
			
 
				-	struct lnet_drop_rule *rule;
			
 
				-	struct lnet_drop_rule *tmp;
			
 
				-	struct list_head zombies;
			
 
				-	int n = 0;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&zombies);
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
			
 
				-		if (rule->dr_attr.fa_src != src && src)
			
 
				-			continue;
			
 
				-
			
 
				-		if (rule->dr_attr.fa_dst != dst && dst)
			
 
				-			continue;
			
 
				-
			
 
				-		list_move(&rule->dr_link, &zombies);
			
 
				-	}
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	list_for_each_entry_safe(rule, tmp, &zombies, dr_link) {
			
 
				-		CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n",
			
 
				-		       libcfs_nid2str(rule->dr_attr.fa_src),
			
 
				-		       libcfs_nid2str(rule->dr_attr.fa_dst),
			
 
				-		       rule->dr_attr.u.drop.da_rate,
			
 
				-		       rule->dr_attr.u.drop.da_interval);
			
 
				-
			
 
				-		list_del(&rule->dr_link);
			
 
				-		kfree(rule);
			
 
				-		n++;
			
 
				-	}
			
 
				-
			
 
				-	return n;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * List drop rule at position of \a pos
			
 
				- */
			
 
				-static int
			
 
				-lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr,
			
 
				-		    struct lnet_fault_stat *stat)
			
 
				-{
			
 
				-	struct lnet_drop_rule *rule;
			
 
				-	int cpt;
			
 
				-	int i = 0;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-	list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
			
 
				-		if (i++ < pos)
			
 
				-			continue;
			
 
				-
			
 
				-		spin_lock(&rule->dr_lock);
			
 
				-		*attr = rule->dr_attr;
			
 
				-		*stat = rule->dr_stat;
			
 
				-		spin_unlock(&rule->dr_lock);
			
 
				-		rc = 0;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * reset counters for all drop rules
			
 
				- */
			
 
				-static void
			
 
				-lnet_drop_rule_reset(void)
			
 
				-{
			
 
				-	struct lnet_drop_rule *rule;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
			
 
				-		struct lnet_fault_attr *attr = &rule->dr_attr;
			
 
				-
			
 
				-		spin_lock(&rule->dr_lock);
			
 
				-
			
 
				-		memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
			
 
				-		if (attr->u.drop.da_rate) {
			
 
				-			rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
			
 
				-		} else {
			
 
				-			rule->dr_drop_time = jiffies +
			
 
				-				prandom_u32_max(attr->u.drop.da_interval) * HZ;
			
 
				-			rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
			
 
				-		}
			
 
				-		spin_unlock(&rule->dr_lock);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * check source/destination NID, portal, message type and drop rate,
			
 
				- * decide whether should drop this message or not
			
 
				- */
			
 
				-static bool
			
 
				-drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
			
 
				-		lnet_nid_t dst, unsigned int type, unsigned int portal)
			
 
				-{
			
 
				-	struct lnet_fault_attr *attr = &rule->dr_attr;
			
 
				-	bool drop;
			
 
				-
			
 
				-	if (!lnet_fault_attr_match(attr, src, dst, type, portal))
			
 
				-		return false;
			
 
				-
			
 
				-	/* match this rule, check drop rate now */
			
 
				-	spin_lock(&rule->dr_lock);
			
 
				-	if (rule->dr_drop_time) { /* time based drop */
			
 
				-		unsigned long now = jiffies;
			
 
				-
			
 
				-		rule->dr_stat.fs_count++;
			
 
				-		drop = time_after_eq(now, rule->dr_drop_time);
			
 
				-		if (drop) {
			
 
				-			if (time_after(now, rule->dr_time_base))
			
 
				-				rule->dr_time_base = now;
			
 
				-
			
 
				-			rule->dr_drop_time = rule->dr_time_base +
			
 
				-				prandom_u32_max(attr->u.drop.da_interval) * HZ;
			
 
				-			rule->dr_time_base += attr->u.drop.da_interval * HZ;
			
 
				-
			
 
				-			CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n",
			
 
				-			       libcfs_nid2str(attr->fa_src),
			
 
				-			       libcfs_nid2str(attr->fa_dst),
			
 
				-			       rule->dr_drop_time);
			
 
				-		}
			
 
				-
			
 
				-	} else { /* rate based drop */
			
 
				-		drop = rule->dr_stat.fs_count++ == rule->dr_drop_at;
			
 
				-
			
 
				-		if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) {
			
 
				-			rule->dr_drop_at = rule->dr_stat.fs_count +
			
 
				-				prandom_u32_max(attr->u.drop.da_rate);
			
 
				-			CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
			
 
				-			       libcfs_nid2str(attr->fa_src),
			
 
				-			       libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (drop) { /* drop this message, update counters */
			
 
				-		lnet_fault_stat_inc(&rule->dr_stat, type);
			
 
				-		rule->dr_stat.u.drop.ds_dropped++;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&rule->dr_lock);
			
 
				-	return drop;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Check if message from \a src to \a dst can match any existed drop rule
			
 
				- */
			
 
				-bool
			
 
				-lnet_drop_rule_match(struct lnet_hdr *hdr)
			
 
				-{
			
 
				-	struct lnet_drop_rule *rule;
			
 
				-	lnet_nid_t src = le64_to_cpu(hdr->src_nid);
			
 
				-	lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
			
 
				-	unsigned int typ = le32_to_cpu(hdr->type);
			
 
				-	unsigned int ptl = -1;
			
 
				-	bool drop = false;
			
 
				-	int cpt;
			
 
				-
			
 
				-	/**
			
 
				-	 * NB: if Portal is specified, then only PUT and GET will be
			
 
				-	 * filtered by drop rule
			
 
				-	 */
			
 
				-	if (typ == LNET_MSG_PUT)
			
 
				-		ptl = le32_to_cpu(hdr->msg.put.ptl_index);
			
 
				-	else if (typ == LNET_MSG_GET)
			
 
				-		ptl = le32_to_cpu(hdr->msg.get.ptl_index);
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-	list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
			
 
				-		drop = drop_rule_match(rule, src, dst, typ, ptl);
			
 
				-		if (drop)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return drop;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * LNet Delay Simulation
			
 
				- */
			
 
				-/** timestamp (second) to send delayed message */
			
 
				-#define msg_delay_send		 msg_ev.hdr_data
			
 
				-
			
 
				-struct lnet_delay_rule {
			
 
				-	/** link chain on the_lnet.ln_delay_rules */
			
 
				-	struct list_head	dl_link;
			
 
				-	/** link chain on delay_dd.dd_sched_rules */
			
 
				-	struct list_head	dl_sched_link;
			
 
				-	/** attributes of this rule */
			
 
				-	struct lnet_fault_attr	dl_attr;
			
 
				-	/** lock to protect \a below members */
			
 
				-	spinlock_t		dl_lock;
			
 
				-	/** refcount of delay rule */
			
 
				-	atomic_t		dl_refcount;
			
 
				-	/**
			
 
				-	 * the message sequence to delay, which means message is delayed when
			
 
				-	 * dl_stat.fs_count == dl_delay_at
			
 
				-	 */
			
 
				-	unsigned long		dl_delay_at;
			
 
				-	/**
			
 
				-	 * seconds to delay the next message, it's exclusive with dl_delay_at
			
 
				-	 */
			
 
				-	unsigned long		dl_delay_time;
			
 
				-	/** baseline to caculate dl_delay_time */
			
 
				-	unsigned long		dl_time_base;
			
 
				-	/** jiffies to send the next delayed message */
			
 
				-	unsigned long		dl_msg_send;
			
 
				-	/** delayed message list */
			
 
				-	struct list_head	dl_msg_list;
			
 
				-	/** statistic of delayed messages */
			
 
				-	struct lnet_fault_stat	dl_stat;
			
 
				-	/** timer to wakeup delay_daemon */
			
 
				-	struct timer_list	dl_timer;
			
 
				-};
			
 
				-
			
 
				-struct delay_daemon_data {
			
 
				-	/** serialise rule add/remove */
			
 
				-	struct mutex		dd_mutex;
			
 
				-	/** protect rules on \a dd_sched_rules */
			
 
				-	spinlock_t		dd_lock;
			
 
				-	/** scheduled delay rules (by timer) */
			
 
				-	struct list_head	dd_sched_rules;
			
 
				-	/** daemon thread sleeps at here */
			
 
				-	wait_queue_head_t	dd_waitq;
			
 
				-	/** controller (lctl command) wait at here */
			
 
				-	wait_queue_head_t	dd_ctl_waitq;
			
 
				-	/** daemon is running */
			
 
				-	unsigned int		dd_running;
			
 
				-	/** daemon stopped */
			
 
				-	unsigned int		dd_stopped;
			
 
				-};
			
 
				-
			
 
				-static struct delay_daemon_data	delay_dd;
			
 
				-
			
 
				-static unsigned long
			
 
				-round_timeout(unsigned long timeout)
			
 
				-{
			
 
				-	return (unsigned int)rounddown(timeout, HZ) + HZ;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-delay_rule_decref(struct lnet_delay_rule *rule)
			
 
				-{
			
 
				-	if (atomic_dec_and_test(&rule->dl_refcount)) {
			
 
				-		LASSERT(list_empty(&rule->dl_sched_link));
			
 
				-		LASSERT(list_empty(&rule->dl_msg_list));
			
 
				-		LASSERT(list_empty(&rule->dl_link));
			
 
				-
			
 
				-		kfree(rule);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * check source/destination NID, portal, message type and delay rate,
			
 
				- * decide whether should delay this message or not
			
 
				- */
			
 
				-static bool
			
 
				-delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
			
 
				-		 lnet_nid_t dst, unsigned int type, unsigned int portal,
			
 
				-		 struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_fault_attr *attr = &rule->dl_attr;
			
 
				-	bool delay;
			
 
				-
			
 
				-	if (!lnet_fault_attr_match(attr, src, dst, type, portal))
			
 
				-		return false;
			
 
				-
			
 
				-	/* match this rule, check delay rate now */
			
 
				-	spin_lock(&rule->dl_lock);
			
 
				-	if (rule->dl_delay_time) { /* time based delay */
			
 
				-		unsigned long now = jiffies;
			
 
				-
			
 
				-		rule->dl_stat.fs_count++;
			
 
				-		delay = time_after_eq(now, rule->dl_delay_time);
			
 
				-		if (delay) {
			
 
				-			if (time_after(now, rule->dl_time_base))
			
 
				-				rule->dl_time_base = now;
			
 
				-
			
 
				-			rule->dl_delay_time = rule->dl_time_base +
			
 
				-				prandom_u32_max(attr->u.delay.la_interval) * HZ;
			
 
				-			rule->dl_time_base += attr->u.delay.la_interval * HZ;
			
 
				-
			
 
				-			CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n",
			
 
				-			       libcfs_nid2str(attr->fa_src),
			
 
				-			       libcfs_nid2str(attr->fa_dst),
			
 
				-			       rule->dl_delay_time);
			
 
				-		}
			
 
				-
			
 
				-	} else { /* rate based delay */
			
 
				-		delay = rule->dl_stat.fs_count++ == rule->dl_delay_at;
			
 
				-		/* generate the next random rate sequence */
			
 
				-		if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) {
			
 
				-			rule->dl_delay_at = rule->dl_stat.fs_count +
			
 
				-				prandom_u32_max(attr->u.delay.la_rate);
			
 
				-			CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
			
 
				-			       libcfs_nid2str(attr->fa_src),
			
 
				-			       libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (!delay) {
			
 
				-		spin_unlock(&rule->dl_lock);
			
 
				-		return false;
			
 
				-	}
			
 
				-
			
 
				-	/* delay this message, update counters */
			
 
				-	lnet_fault_stat_inc(&rule->dl_stat, type);
			
 
				-	rule->dl_stat.u.delay.ls_delayed++;
			
 
				-
			
 
				-	list_add_tail(&msg->msg_list, &rule->dl_msg_list);
			
 
				-	msg->msg_delay_send = round_timeout(
			
 
				-			jiffies + attr->u.delay.la_latency * HZ);
			
 
				-	if (rule->dl_msg_send == -1) {
			
 
				-		rule->dl_msg_send = msg->msg_delay_send;
			
 
				-		mod_timer(&rule->dl_timer, rule->dl_msg_send);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&rule->dl_lock);
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * check if \a msg can match any Delay Rule, receiving of this message
			
 
				- * will be delayed if there is a match.
			
 
				- */
			
 
				-bool
			
 
				-lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule;
			
 
				-	lnet_nid_t src = le64_to_cpu(hdr->src_nid);
			
 
				-	lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
			
 
				-	unsigned int typ = le32_to_cpu(hdr->type);
			
 
				-	unsigned int ptl = -1;
			
 
				-
			
 
				-	/* NB: called with hold of lnet_net_lock */
			
 
				-
			
 
				-	/**
			
 
				-	 * NB: if Portal is specified, then only PUT and GET will be
			
 
				-	 * filtered by delay rule
			
 
				-	 */
			
 
				-	if (typ == LNET_MSG_PUT)
			
 
				-		ptl = le32_to_cpu(hdr->msg.put.ptl_index);
			
 
				-	else if (typ == LNET_MSG_GET)
			
 
				-		ptl = le32_to_cpu(hdr->msg.get.ptl_index);
			
 
				-
			
 
				-	list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
			
 
				-		if (delay_rule_match(rule, src, dst, typ, ptl, msg))
			
 
				-			return true;
			
 
				-	}
			
 
				-
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-/** check out delayed messages for send */
			
 
				-static void
			
 
				-delayed_msg_check(struct lnet_delay_rule *rule, bool all,
			
 
				-		  struct list_head *msg_list)
			
 
				-{
			
 
				-	struct lnet_msg *msg;
			
 
				-	struct lnet_msg *tmp;
			
 
				-	unsigned long now = jiffies;
			
 
				-
			
 
				-	if (!all && rule->dl_msg_send > now)
			
 
				-		return;
			
 
				-
			
 
				-	spin_lock(&rule->dl_lock);
			
 
				-	list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) {
			
 
				-		if (!all && msg->msg_delay_send > now)
			
 
				-			break;
			
 
				-
			
 
				-		msg->msg_delay_send = 0;
			
 
				-		list_move_tail(&msg->msg_list, msg_list);
			
 
				-	}
			
 
				-
			
 
				-	if (list_empty(&rule->dl_msg_list)) {
			
 
				-		del_timer(&rule->dl_timer);
			
 
				-		rule->dl_msg_send = -1;
			
 
				-
			
 
				-	} else if (!list_empty(msg_list)) {
			
 
				-		/*
			
 
				-		 * dequeued some timedout messages, update timer for the
			
 
				-		 * next delayed message on rule
			
 
				-		 */
			
 
				-		msg = list_entry(rule->dl_msg_list.next,
			
 
				-				 struct lnet_msg, msg_list);
			
 
				-		rule->dl_msg_send = msg->msg_delay_send;
			
 
				-		mod_timer(&rule->dl_timer, rule->dl_msg_send);
			
 
				-	}
			
 
				-	spin_unlock(&rule->dl_lock);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-delayed_msg_process(struct list_head *msg_list, bool drop)
			
 
				-{
			
 
				-	struct lnet_msg	*msg;
			
 
				-
			
 
				-	while (!list_empty(msg_list)) {
			
 
				-		struct lnet_ni *ni;
			
 
				-		int cpt;
			
 
				-		int rc;
			
 
				-
			
 
				-		msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
			
 
				-		LASSERT(msg->msg_rxpeer);
			
 
				-
			
 
				-		ni = msg->msg_rxpeer->lp_ni;
			
 
				-		cpt = msg->msg_rx_cpt;
			
 
				-
			
 
				-		list_del_init(&msg->msg_list);
			
 
				-		if (drop) {
			
 
				-			rc = -ECANCELED;
			
 
				-
			
 
				-		} else if (!msg->msg_routing) {
			
 
				-			rc = lnet_parse_local(ni, msg);
			
 
				-			if (!rc)
			
 
				-				continue;
			
 
				-
			
 
				-		} else {
			
 
				-			lnet_net_lock(cpt);
			
 
				-			rc = lnet_parse_forward_locked(ni, msg);
			
 
				-			lnet_net_unlock(cpt);
			
 
				-
			
 
				-			switch (rc) {
			
 
				-			case LNET_CREDIT_OK:
			
 
				-				lnet_ni_recv(ni, msg->msg_private, msg, 0,
			
 
				-					     0, msg->msg_len, msg->msg_len);
			
 
				-				/* fall through */
			
 
				-			case LNET_CREDIT_WAIT:
			
 
				-				continue;
			
 
				-			default: /* failures */
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len);
			
 
				-		lnet_finalize(ni, msg, rc);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Process delayed messages for scheduled rules
			
 
				- * This function can either be called by delay_rule_daemon, or by lnet_finalise
			
 
				- */
			
 
				-void
			
 
				-lnet_delay_rule_check(void)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule;
			
 
				-	struct list_head msgs;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&msgs);
			
 
				-	while (1) {
			
 
				-		if (list_empty(&delay_dd.dd_sched_rules))
			
 
				-			break;
			
 
				-
			
 
				-		spin_lock_bh(&delay_dd.dd_lock);
			
 
				-		if (list_empty(&delay_dd.dd_sched_rules)) {
			
 
				-			spin_unlock_bh(&delay_dd.dd_lock);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		rule = list_entry(delay_dd.dd_sched_rules.next,
			
 
				-				  struct lnet_delay_rule, dl_sched_link);
			
 
				-		list_del_init(&rule->dl_sched_link);
			
 
				-		spin_unlock_bh(&delay_dd.dd_lock);
			
 
				-
			
 
				-		delayed_msg_check(rule, false, &msgs);
			
 
				-		delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&msgs))
			
 
				-		delayed_msg_process(&msgs, false);
			
 
				-}
			
 
				-
			
 
				-/** daemon thread to handle delayed messages */
			
 
				-static int
			
 
				-lnet_delay_rule_daemon(void *arg)
			
 
				-{
			
 
				-	delay_dd.dd_running = 1;
			
 
				-	wake_up(&delay_dd.dd_ctl_waitq);
			
 
				-
			
 
				-	while (delay_dd.dd_running) {
			
 
				-		wait_event_interruptible(delay_dd.dd_waitq,
			
 
				-					 !delay_dd.dd_running ||
			
 
				-					 !list_empty(&delay_dd.dd_sched_rules));
			
 
				-		lnet_delay_rule_check();
			
 
				-	}
			
 
				-
			
 
				-	/* in case more rules have been enqueued after my last check */
			
 
				-	lnet_delay_rule_check();
			
 
				-	delay_dd.dd_stopped = 1;
			
 
				-	wake_up(&delay_dd.dd_ctl_waitq);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-delay_timer_cb(struct timer_list *t)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer);
			
 
				-
			
 
				-	spin_lock_bh(&delay_dd.dd_lock);
			
 
				-	if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) {
			
 
				-		atomic_inc(&rule->dl_refcount);
			
 
				-		list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules);
			
 
				-		wake_up(&delay_dd.dd_waitq);
			
 
				-	}
			
 
				-	spin_unlock_bh(&delay_dd.dd_lock);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Add a new delay rule to LNet
			
 
				- * There is no check for duplicated delay rule, all rules will be checked for
			
 
				- * incoming message.
			
 
				- */
			
 
				-int
			
 
				-lnet_delay_rule_add(struct lnet_fault_attr *attr)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (attr->u.delay.la_rate & attr->u.delay.la_interval) {
			
 
				-		CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n",
			
 
				-		       attr->u.delay.la_rate, attr->u.delay.la_interval);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (!attr->u.delay.la_latency) {
			
 
				-		CDEBUG(D_NET, "delay latency cannot be zero\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_fault_attr_validate(attr))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	rule = kzalloc(sizeof(*rule), GFP_NOFS);
			
 
				-	if (!rule)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	mutex_lock(&delay_dd.dd_mutex);
			
 
				-	if (!delay_dd.dd_running) {
			
 
				-		struct task_struct *task;
			
 
				-
			
 
				-		/**
			
 
				-		 *  NB: although LND threads will process delayed message
			
 
				-		 * in lnet_finalize, but there is no guarantee that LND
			
 
				-		 * threads will be waken up if no other message needs to
			
 
				-		 * be handled.
			
 
				-		 * Only one daemon thread, performance is not the concern
			
 
				-		 * of this simualation module.
			
 
				-		 */
			
 
				-		task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd");
			
 
				-		if (IS_ERR(task)) {
			
 
				-			rc = PTR_ERR(task);
			
 
				-			goto failed;
			
 
				-		}
			
 
				-		wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
			
 
				-	}
			
 
				-
			
 
				-	timer_setup(&rule->dl_timer, delay_timer_cb, 0);
			
 
				-
			
 
				-	spin_lock_init(&rule->dl_lock);
			
 
				-	INIT_LIST_HEAD(&rule->dl_msg_list);
			
 
				-	INIT_LIST_HEAD(&rule->dl_sched_link);
			
 
				-
			
 
				-	rule->dl_attr = *attr;
			
 
				-	if (attr->u.delay.la_interval) {
			
 
				-		rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
			
 
				-		rule->dl_delay_time = jiffies + 
			
 
				-			prandom_u32_max(attr->u.delay.la_interval) * HZ;
			
 
				-	} else {
			
 
				-		rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
			
 
				-	}
			
 
				-
			
 
				-	rule->dl_msg_send = -1;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	atomic_set(&rule->dl_refcount, 1);
			
 
				-	list_add(&rule->dl_link, &the_lnet.ln_delay_rules);
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n",
			
 
				-	       libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
			
 
				-	       attr->u.delay.la_rate);
			
 
				-
			
 
				-	mutex_unlock(&delay_dd.dd_mutex);
			
 
				-	return 0;
			
 
				-failed:
			
 
				-	mutex_unlock(&delay_dd.dd_mutex);
			
 
				-	kfree(rule);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src
			
 
				- * and \a dst are zero, all rules will be removed, otherwise only matched rules
			
 
				- * will be removed.
			
 
				- * If \a src is zero, then all rules have \a dst as destination will be remove
			
 
				- * If \a dst is zero, then all rules have \a src as source will be removed
			
 
				- *
			
 
				- * When a delay rule is removed, all delayed messages of this rule will be
			
 
				- * processed immediately.
			
 
				- */
			
 
				-int
			
 
				-lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule;
			
 
				-	struct lnet_delay_rule *tmp;
			
 
				-	struct list_head rule_list;
			
 
				-	struct list_head msg_list;
			
 
				-	int n = 0;
			
 
				-	bool cleanup;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&rule_list);
			
 
				-	INIT_LIST_HEAD(&msg_list);
			
 
				-
			
 
				-	if (shutdown) {
			
 
				-		src = 0;
			
 
				-		dst = 0;
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&delay_dd.dd_mutex);
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) {
			
 
				-		if (rule->dl_attr.fa_src != src && src)
			
 
				-			continue;
			
 
				-
			
 
				-		if (rule->dl_attr.fa_dst != dst && dst)
			
 
				-			continue;
			
 
				-
			
 
				-		CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n",
			
 
				-		       libcfs_nid2str(rule->dl_attr.fa_src),
			
 
				-		       libcfs_nid2str(rule->dl_attr.fa_dst),
			
 
				-		       rule->dl_attr.u.delay.la_rate,
			
 
				-		       rule->dl_attr.u.delay.la_interval);
			
 
				-		/* refcount is taken over by rule_list */
			
 
				-		list_move(&rule->dl_link, &rule_list);
			
 
				-	}
			
 
				-
			
 
				-	/* check if we need to shutdown delay_daemon */
			
 
				-	cleanup = list_empty(&the_lnet.ln_delay_rules) &&
			
 
				-		  !list_empty(&rule_list);
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) {
			
 
				-		list_del_init(&rule->dl_link);
			
 
				-
			
 
				-		del_timer_sync(&rule->dl_timer);
			
 
				-		delayed_msg_check(rule, true, &msg_list);
			
 
				-		delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */
			
 
				-		n++;
			
 
				-	}
			
 
				-
			
 
				-	if (cleanup) { /* no more delay rule, shutdown delay_daemon */
			
 
				-		LASSERT(delay_dd.dd_running);
			
 
				-		delay_dd.dd_running = 0;
			
 
				-		wake_up(&delay_dd.dd_waitq);
			
 
				-
			
 
				-		while (!delay_dd.dd_stopped)
			
 
				-			wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped);
			
 
				-	}
			
 
				-	mutex_unlock(&delay_dd.dd_mutex);
			
 
				-
			
 
				-	if (!list_empty(&msg_list))
			
 
				-		delayed_msg_process(&msg_list, shutdown);
			
 
				-
			
 
				-	return n;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * List Delay Rule at position of \a pos
			
 
				- */
			
 
				-int
			
 
				-lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
			
 
				-		     struct lnet_fault_stat *stat)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule;
			
 
				-	int cpt;
			
 
				-	int i = 0;
			
 
				-	int rc = -ENOENT;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-	list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
			
 
				-		if (i++ < pos)
			
 
				-			continue;
			
 
				-
			
 
				-		spin_lock(&rule->dl_lock);
			
 
				-		*attr = rule->dl_attr;
			
 
				-		*stat = rule->dl_stat;
			
 
				-		spin_unlock(&rule->dl_lock);
			
 
				-		rc = 0;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * reset counters for all Delay Rules
			
 
				- */
			
 
				-void
			
 
				-lnet_delay_rule_reset(void)
			
 
				-{
			
 
				-	struct lnet_delay_rule *rule;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
			
 
				-		struct lnet_fault_attr *attr = &rule->dl_attr;
			
 
				-
			
 
				-		spin_lock(&rule->dl_lock);
			
 
				-
			
 
				-		memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
			
 
				-		if (attr->u.delay.la_rate) {
			
 
				-			rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
			
 
				-		} else {
			
 
				-			rule->dl_delay_time =
			
 
				-				jiffies + prandom_u32_max(
			
 
				-					attr->u.delay.la_interval) * HZ;
			
 
				-			rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
			
 
				-		}
			
 
				-		spin_unlock(&rule->dl_lock);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data)
			
 
				-{
			
 
				-	struct lnet_fault_attr *attr;
			
 
				-	struct lnet_fault_stat *stat;
			
 
				-
			
 
				-	attr = (struct lnet_fault_attr *)data->ioc_inlbuf1;
			
 
				-
			
 
				-	switch (opc) {
			
 
				-	default:
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	case LNET_CTL_DROP_ADD:
			
 
				-		if (!attr)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_drop_rule_add(attr);
			
 
				-
			
 
				-	case LNET_CTL_DROP_DEL:
			
 
				-		if (!attr)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		data->ioc_count = lnet_drop_rule_del(attr->fa_src,
			
 
				-						     attr->fa_dst);
			
 
				-		return 0;
			
 
				-
			
 
				-	case LNET_CTL_DROP_RESET:
			
 
				-		lnet_drop_rule_reset();
			
 
				-		return 0;
			
 
				-
			
 
				-	case LNET_CTL_DROP_LIST:
			
 
				-		stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
			
 
				-		if (!attr || !stat)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_drop_rule_list(data->ioc_count, attr, stat);
			
 
				-
			
 
				-	case LNET_CTL_DELAY_ADD:
			
 
				-		if (!attr)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_delay_rule_add(attr);
			
 
				-
			
 
				-	case LNET_CTL_DELAY_DEL:
			
 
				-		if (!attr)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		data->ioc_count = lnet_delay_rule_del(attr->fa_src,
			
 
				-						      attr->fa_dst, false);
			
 
				-		return 0;
			
 
				-
			
 
				-	case LNET_CTL_DELAY_RESET:
			
 
				-		lnet_delay_rule_reset();
			
 
				-		return 0;
			
 
				-
			
 
				-	case LNET_CTL_DELAY_LIST:
			
 
				-		stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
			
 
				-		if (!attr || !stat)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		return lnet_delay_rule_list(data->ioc_count, attr, stat);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_fault_init(void)
			
 
				-{
			
 
				-	BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT);
			
 
				-	BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK);
			
 
				-	BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET);
			
 
				-	BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY);
			
 
				-
			
 
				-	mutex_init(&delay_dd.dd_mutex);
			
 
				-	spin_lock_init(&delay_dd.dd_lock);
			
 
				-	init_waitqueue_head(&delay_dd.dd_waitq);
			
 
				-	init_waitqueue_head(&delay_dd.dd_ctl_waitq);
			
 
				-	INIT_LIST_HEAD(&delay_dd.dd_sched_rules);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_fault_fini(void)
			
 
				-{
			
 
				-	lnet_drop_rule_del(0, 0);
			
 
				-	lnet_delay_rule_del(0, 0, true);
			
 
				-
			
 
				-	LASSERT(list_empty(&the_lnet.ln_drop_rules));
			
 
				-	LASSERT(list_empty(&the_lnet.ln_delay_rules));
			
 
				-	LASSERT(list_empty(&delay_dd.dd_sched_rules));
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/nidstrings.c
+++ b/drivers/staging/lustre/lnet/lnet/nidstrings.c
@@ -1,1261 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/nidstrings.c
			
 
				- *
			
 
				- * Author: Phil Schwan <phil@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/spinlock.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/libcfs/libcfs.h>
			
 
				-#include <linux/libcfs/libcfs_string.h>
			
 
				-#include <uapi/linux/lnet/nidstr.h>
			
 
				-
			
 
				-/* max value for numeric network address */
			
 
				-#define MAX_NUMERIC_VALUE 0xffffffff
			
 
				-
			
 
				-#define IPSTRING_LENGTH 16
			
 
				-
			
 
				-/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
			
 
				- * consistent in all conversion functions.  Some code fragments are copied
			
 
				- * around for the sake of clarity...
			
 
				- */
			
 
				-
			
 
				-/* CAVEAT EMPTOR! Racey temporary buffer allocation!
			
 
				- * Choose the number of nidstrings to support the MAXIMUM expected number of
			
 
				- * concurrent users.  If there are more, the returned string will be volatile.
			
 
				- * NB this number must allow for a process to be descheduled for a timeslice
			
 
				- * between getting its string and using it.
			
 
				- */
			
 
				-
			
 
				-static char      libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
			
 
				-static int       libcfs_nidstring_idx;
			
 
				-
			
 
				-static DEFINE_SPINLOCK(libcfs_nidstring_lock);
			
 
				-
			
 
				-static struct netstrfns *libcfs_namenum2netstrfns(const char *name);
			
 
				-
			
 
				-char *
			
 
				-libcfs_next_nidstring(void)
			
 
				-{
			
 
				-	char *str;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	spin_lock_irqsave(&libcfs_nidstring_lock, flags);
			
 
				-
			
 
				-	str = libcfs_nidstrings[libcfs_nidstring_idx++];
			
 
				-	if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings))
			
 
				-		libcfs_nidstring_idx = 0;
			
 
				-
			
 
				-	spin_unlock_irqrestore(&libcfs_nidstring_lock, flags);
			
 
				-	return str;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_next_nidstring);
			
 
				-
			
 
				-/**
			
 
				- * Nid range list syntax.
			
 
				- * \verbatim
			
 
				- *
			
 
				- * <nidlist>         :== <nidrange> [ ' ' <nidrange> ]
			
 
				- * <nidrange>        :== <addrrange> '@' <net>
			
 
				- * <addrrange>       :== '*' |
			
 
				- *                       <ipaddr_range> |
			
 
				- *			 <cfs_expr_list>
			
 
				- * <ipaddr_range>    :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>.
			
 
				- *			 <cfs_expr_list>
			
 
				- * <cfs_expr_list>   :== <number> |
			
 
				- *                       <expr_list>
			
 
				- * <expr_list>       :== '[' <range_expr> [ ',' <range_expr>] ']'
			
 
				- * <range_expr>      :== <number> |
			
 
				- *                       <number> '-' <number> |
			
 
				- *                       <number> '-' <number> '/' <number>
			
 
				- * <net>             :== <netname> | <netname><number>
			
 
				- * <netname>         :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" |
			
 
				- *                       "vib" | "ra" | "elan" | "mx" | "ptl"
			
 
				- * \endverbatim
			
 
				- */
			
 
				-
			
 
				-/**
			
 
				- * Structure to represent \<nidrange\> token of the syntax.
			
 
				- *
			
 
				- * One of this is created for each \<net\> parsed.
			
 
				- */
			
 
				-struct nidrange {
			
 
				-	/**
			
 
				-	 * Link to list of this structures which is built on nid range
			
 
				-	 * list parsing.
			
 
				-	 */
			
 
				-	struct list_head nr_link;
			
 
				-	/**
			
 
				-	 * List head for addrrange::ar_link.
			
 
				-	 */
			
 
				-	struct list_head nr_addrranges;
			
 
				-	/**
			
 
				-	 * Flag indicating that *@<net> is found.
			
 
				-	 */
			
 
				-	int nr_all;
			
 
				-	/**
			
 
				-	 * Pointer to corresponding element of libcfs_netstrfns.
			
 
				-	 */
			
 
				-	struct netstrfns *nr_netstrfns;
			
 
				-	/**
			
 
				-	 * Number of network. E.g. 5 if \<net\> is "elan5".
			
 
				-	 */
			
 
				-	int nr_netnum;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Structure to represent \<addrrange\> token of the syntax.
			
 
				- */
			
 
				-struct addrrange {
			
 
				-	/**
			
 
				-	 * Link to nidrange::nr_addrranges.
			
 
				-	 */
			
 
				-	struct list_head ar_link;
			
 
				-	/**
			
 
				-	 * List head for cfs_expr_list::el_list.
			
 
				-	 */
			
 
				-	struct list_head ar_numaddr_ranges;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Parses \<addrrange\> token on the syntax.
			
 
				- *
			
 
				- * Allocates struct addrrange and links to \a nidrange via
			
 
				- * (nidrange::nr_addrranges)
			
 
				- *
			
 
				- * \retval 0 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\>
			
 
				- * \retval -errno otherwise
			
 
				- */
			
 
				-static int
			
 
				-parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange)
			
 
				-{
			
 
				-	struct addrrange *addrrange;
			
 
				-
			
 
				-	if (src->ls_len == 1 && src->ls_str[0] == '*') {
			
 
				-		nidrange->nr_all = 1;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS);
			
 
				-	if (!addrrange)
			
 
				-		return -ENOMEM;
			
 
				-	list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges);
			
 
				-	INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges);
			
 
				-
			
 
				-	return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str,
			
 
				-						src->ls_len,
			
 
				-						&addrrange->ar_numaddr_ranges);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Finds or creates struct nidrange.
			
 
				- *
			
 
				- * Checks if \a src is a valid network name, looks for corresponding
			
 
				- * nidrange on the ist of nidranges (\a nidlist), creates new struct
			
 
				- * nidrange if it is not found.
			
 
				- *
			
 
				- * \retval pointer to struct nidrange matching network specified via \a src
			
 
				- * \retval NULL if \a src does not match any network
			
 
				- */
			
 
				-static struct nidrange *
			
 
				-add_nidrange(const struct cfs_lstr *src,
			
 
				-	     struct list_head *nidlist)
			
 
				-{
			
 
				-	struct netstrfns *nf;
			
 
				-	struct nidrange *nr;
			
 
				-	int endlen;
			
 
				-	unsigned int netnum;
			
 
				-
			
 
				-	if (src->ls_len >= LNET_NIDSTR_SIZE)
			
 
				-		return NULL;
			
 
				-
			
 
				-	nf = libcfs_namenum2netstrfns(src->ls_str);
			
 
				-	if (!nf)
			
 
				-		return NULL;
			
 
				-	endlen = src->ls_len - strlen(nf->nf_name);
			
 
				-	if (!endlen)
			
 
				-		/* network name only, e.g. "elan" or "tcp" */
			
 
				-		netnum = 0;
			
 
				-	else {
			
 
				-		/*
			
 
				-		 * e.g. "elan25" or "tcp23", refuse to parse if
			
 
				-		 * network name is not appended with decimal or
			
 
				-		 * hexadecimal number
			
 
				-		 */
			
 
				-		if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name),
			
 
				-				       endlen, &netnum, 0, MAX_NUMERIC_VALUE))
			
 
				-			return NULL;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		if (nr->nr_netstrfns != nf)
			
 
				-			continue;
			
 
				-		if (nr->nr_netnum != netnum)
			
 
				-			continue;
			
 
				-		return nr;
			
 
				-	}
			
 
				-
			
 
				-	nr = kzalloc(sizeof(struct nidrange), GFP_NOFS);
			
 
				-	if (!nr)
			
 
				-		return NULL;
			
 
				-	list_add_tail(&nr->nr_link, nidlist);
			
 
				-	INIT_LIST_HEAD(&nr->nr_addrranges);
			
 
				-	nr->nr_netstrfns = nf;
			
 
				-	nr->nr_all = 0;
			
 
				-	nr->nr_netnum = netnum;
			
 
				-
			
 
				-	return nr;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Parses \<nidrange\> token of the syntax.
			
 
				- *
			
 
				- * \retval 1 if \a src parses to \<addrrange\> '@' \<net\>
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-static int
			
 
				-parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
			
 
				-{
			
 
				-	struct cfs_lstr addrrange;
			
 
				-	struct cfs_lstr net;
			
 
				-	struct nidrange *nr;
			
 
				-
			
 
				-	if (!cfs_gettok(src, '@', &addrrange))
			
 
				-		goto failed;
			
 
				-
			
 
				-	if (!cfs_gettok(src, '@', &net) || src->ls_str)
			
 
				-		goto failed;
			
 
				-
			
 
				-	nr = add_nidrange(&net, nidlist);
			
 
				-	if (!nr)
			
 
				-		goto failed;
			
 
				-
			
 
				-	if (parse_addrange(&addrrange, nr))
			
 
				-		goto failed;
			
 
				-
			
 
				-	return 1;
			
 
				-failed:
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Frees addrrange structures of \a list.
			
 
				- *
			
 
				- * For each struct addrrange structure found on \a list it frees
			
 
				- * cfs_expr_list list attached to it and frees the addrrange itself.
			
 
				- *
			
 
				- * \retval none
			
 
				- */
			
 
				-static void
			
 
				-free_addrranges(struct list_head *list)
			
 
				-{
			
 
				-	while (!list_empty(list)) {
			
 
				-		struct addrrange *ar;
			
 
				-
			
 
				-		ar = list_entry(list->next, struct addrrange, ar_link);
			
 
				-
			
 
				-		cfs_expr_list_free_list(&ar->ar_numaddr_ranges);
			
 
				-		list_del(&ar->ar_link);
			
 
				-		kfree(ar);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Frees nidrange strutures of \a list.
			
 
				- *
			
 
				- * For each struct nidrange structure found on \a list it frees
			
 
				- * addrrange list attached to it and frees the nidrange itself.
			
 
				- *
			
 
				- * \retval none
			
 
				- */
			
 
				-void
			
 
				-cfs_free_nidlist(struct list_head *list)
			
 
				-{
			
 
				-	struct list_head *pos, *next;
			
 
				-	struct nidrange *nr;
			
 
				-
			
 
				-	list_for_each_safe(pos, next, list) {
			
 
				-		nr = list_entry(pos, struct nidrange, nr_link);
			
 
				-		free_addrranges(&nr->nr_addrranges);
			
 
				-		list_del(pos);
			
 
				-		kfree(nr);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_free_nidlist);
			
 
				-
			
 
				-/**
			
 
				- * Parses nid range list.
			
 
				- *
			
 
				- * Parses with rigorous syntax and overflow checking \a str into
			
 
				- * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of
			
 
				- * structures and links that structure to \a nidlist. The resulting
			
 
				- * list can be used to match a NID againts set of NIDS defined by \a
			
 
				- * str.
			
 
				- * \see cfs_match_nid
			
 
				- *
			
 
				- * \retval 1 on success
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-int
			
 
				-cfs_parse_nidlist(char *str, int len, struct list_head *nidlist)
			
 
				-{
			
 
				-	struct cfs_lstr src;
			
 
				-	struct cfs_lstr res;
			
 
				-	int rc;
			
 
				-
			
 
				-	src.ls_str = str;
			
 
				-	src.ls_len = len;
			
 
				-	INIT_LIST_HEAD(nidlist);
			
 
				-	while (src.ls_str) {
			
 
				-		rc = cfs_gettok(&src, ' ', &res);
			
 
				-		if (!rc) {
			
 
				-			cfs_free_nidlist(nidlist);
			
 
				-			return 0;
			
 
				-		}
			
 
				-		rc = parse_nidrange(&res, nidlist);
			
 
				-		if (!rc) {
			
 
				-			cfs_free_nidlist(nidlist);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-	return 1;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_parse_nidlist);
			
 
				-
			
 
				-/**
			
 
				- * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist).
			
 
				- *
			
 
				- * \see cfs_parse_nidlist()
			
 
				- *
			
 
				- * \retval 1 on match
			
 
				- * \retval 0  otherwises
			
 
				- */
			
 
				-int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist)
			
 
				-{
			
 
				-	struct nidrange *nr;
			
 
				-	struct addrrange *ar;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid)))
			
 
				-			continue;
			
 
				-		if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid)))
			
 
				-			continue;
			
 
				-		if (nr->nr_all)
			
 
				-			return 1;
			
 
				-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link)
			
 
				-			if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid),
			
 
				-							    &ar->ar_numaddr_ranges))
			
 
				-				return 1;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_match_nid);
			
 
				-
			
 
				-/**
			
 
				- * Print the network part of the nidrange \a nr into the specified \a buffer.
			
 
				- *
			
 
				- * \retval number of characters written
			
 
				- */
			
 
				-static int
			
 
				-cfs_print_network(char *buffer, int count, struct nidrange *nr)
			
 
				-{
			
 
				-	struct netstrfns *nf = nr->nr_netstrfns;
			
 
				-
			
 
				-	if (!nr->nr_netnum)
			
 
				-		return scnprintf(buffer, count, "@%s", nf->nf_name);
			
 
				-	else
			
 
				-		return scnprintf(buffer, count, "@%s%u",
			
 
				-				 nf->nf_name, nr->nr_netnum);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Print a list of addrrange (\a addrranges) into the specified \a buffer.
			
 
				- * At max \a count characters can be printed into \a buffer.
			
 
				- *
			
 
				- * \retval number of characters written
			
 
				- */
			
 
				-static int
			
 
				-cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges,
			
 
				-		     struct nidrange *nr)
			
 
				-{
			
 
				-	int i = 0;
			
 
				-	struct addrrange *ar;
			
 
				-	struct netstrfns *nf = nr->nr_netstrfns;
			
 
				-
			
 
				-	list_for_each_entry(ar, addrranges, ar_link) {
			
 
				-		if (i)
			
 
				-			i += scnprintf(buffer + i, count - i, " ");
			
 
				-		i += nf->nf_print_addrlist(buffer + i, count - i,
			
 
				-					   &ar->ar_numaddr_ranges);
			
 
				-		i += cfs_print_network(buffer + i, count - i, nr);
			
 
				-	}
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Print a list of nidranges (\a nidlist) into the specified \a buffer.
			
 
				- * At max \a count characters can be printed into \a buffer.
			
 
				- * Nidranges are separated by a space character.
			
 
				- *
			
 
				- * \retval number of characters written
			
 
				- */
			
 
				-int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist)
			
 
				-{
			
 
				-	int i = 0;
			
 
				-	struct nidrange *nr;
			
 
				-
			
 
				-	if (count <= 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		if (i)
			
 
				-			i += scnprintf(buffer + i, count - i, " ");
			
 
				-
			
 
				-		if (nr->nr_all) {
			
 
				-			LASSERT(list_empty(&nr->nr_addrranges));
			
 
				-			i += scnprintf(buffer + i, count - i, "*");
			
 
				-			i += cfs_print_network(buffer + i, count - i, nr);
			
 
				-		} else {
			
 
				-			i += cfs_print_addrranges(buffer + i, count - i,
			
 
				-						  &nr->nr_addrranges, nr);
			
 
				-		}
			
 
				-	}
			
 
				-	return i;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_print_nidlist);
			
 
				-
			
 
				-/**
			
 
				- * Determines minimum and maximum addresses for a single
			
 
				- * numeric address range
			
 
				- *
			
 
				- * \param	ar
			
 
				- * \param	min_nid
			
 
				- * \param	max_nid
			
 
				- */
			
 
				-static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid,
			
 
				-			      __u32 *max_nid)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-	struct cfs_range_expr *re;
			
 
				-	__u32 tmp_ip_addr = 0;
			
 
				-	unsigned int min_ip[4] = {0};
			
 
				-	unsigned int max_ip[4] = {0};
			
 
				-	int re_count = 0;
			
 
				-
			
 
				-	list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
			
 
				-		list_for_each_entry(re, &el->el_exprs, re_link) {
			
 
				-			min_ip[re_count] = re->re_lo;
			
 
				-			max_ip[re_count] = re->re_hi;
			
 
				-			re_count++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) |
			
 
				-		       (min_ip[2] << 8) | min_ip[3]);
			
 
				-
			
 
				-	if (min_nid)
			
 
				-		*min_nid = tmp_ip_addr;
			
 
				-
			
 
				-	tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) |
			
 
				-		       (max_ip[2] << 8) | max_ip[3]);
			
 
				-
			
 
				-	if (max_nid)
			
 
				-		*max_nid = tmp_ip_addr;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Determines minimum and maximum addresses for a single
			
 
				- * numeric address range
			
 
				- *
			
 
				- * \param	ar
			
 
				- * \param	min_nid
			
 
				- * \param	max_nid
			
 
				- */
			
 
				-static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid,
			
 
				-			       __u32 *max_nid)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-	struct cfs_range_expr *re;
			
 
				-	unsigned int min_addr = 0;
			
 
				-	unsigned int max_addr = 0;
			
 
				-
			
 
				-	list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
			
 
				-		list_for_each_entry(re, &el->el_exprs, re_link) {
			
 
				-			if (re->re_lo < min_addr || !min_addr)
			
 
				-				min_addr = re->re_lo;
			
 
				-			if (re->re_hi > max_addr)
			
 
				-				max_addr = re->re_hi;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (min_nid)
			
 
				-		*min_nid = min_addr;
			
 
				-	if (max_nid)
			
 
				-		*max_nid = max_addr;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Determines whether an expression list in an nidrange contains exactly
			
 
				- * one contiguous address range. Calls the correct netstrfns for the LND
			
 
				- *
			
 
				- * \param	*nidlist
			
 
				- *
			
 
				- * \retval	true if contiguous
			
 
				- * \retval	false if not contiguous
			
 
				- */
			
 
				-bool cfs_nidrange_is_contiguous(struct list_head *nidlist)
			
 
				-{
			
 
				-	struct nidrange *nr;
			
 
				-	struct netstrfns *nf = NULL;
			
 
				-	char *lndname = NULL;
			
 
				-	int netnum = -1;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		nf = nr->nr_netstrfns;
			
 
				-		if (!lndname)
			
 
				-			lndname = nf->nf_name;
			
 
				-		if (netnum == -1)
			
 
				-			netnum = nr->nr_netnum;
			
 
				-
			
 
				-		if (strcmp(lndname, nf->nf_name) ||
			
 
				-		    netnum != nr->nr_netnum)
			
 
				-			return false;
			
 
				-	}
			
 
				-
			
 
				-	if (!nf)
			
 
				-		return false;
			
 
				-
			
 
				-	if (!nf->nf_is_contiguous(nidlist))
			
 
				-		return false;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_nidrange_is_contiguous);
			
 
				-
			
 
				-/**
			
 
				- * Determines whether an expression list in an num nidrange contains exactly
			
 
				- * one contiguous address range.
			
 
				- *
			
 
				- * \param	*nidlist
			
 
				- *
			
 
				- * \retval	true if contiguous
			
 
				- * \retval	false if not contiguous
			
 
				- */
			
 
				-static bool cfs_num_is_contiguous(struct list_head *nidlist)
			
 
				-{
			
 
				-	struct nidrange *nr;
			
 
				-	struct addrrange *ar;
			
 
				-	struct cfs_expr_list *el;
			
 
				-	struct cfs_range_expr *re;
			
 
				-	int last_hi = 0;
			
 
				-	__u32 last_end_nid = 0;
			
 
				-	__u32 current_start_nid = 0;
			
 
				-	__u32 current_end_nid = 0;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
			
 
				-			cfs_num_ar_min_max(ar, &current_start_nid,
			
 
				-					   &current_end_nid);
			
 
				-			if (last_end_nid &&
			
 
				-			    (current_start_nid - last_end_nid != 1))
			
 
				-				return false;
			
 
				-			last_end_nid = current_end_nid;
			
 
				-			list_for_each_entry(el, &ar->ar_numaddr_ranges,
			
 
				-					    el_link) {
			
 
				-				list_for_each_entry(re, &el->el_exprs,
			
 
				-						    re_link) {
			
 
				-					if (re->re_stride > 1)
			
 
				-						return false;
			
 
				-					else if (last_hi &&
			
 
				-						 re->re_hi - last_hi != 1)
			
 
				-						return false;
			
 
				-					last_hi = re->re_hi;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Determines whether an expression list in an ip nidrange contains exactly
			
 
				- * one contiguous address range.
			
 
				- *
			
 
				- * \param	*nidlist
			
 
				- *
			
 
				- * \retval	true if contiguous
			
 
				- * \retval	false if not contiguous
			
 
				- */
			
 
				-static bool cfs_ip_is_contiguous(struct list_head *nidlist)
			
 
				-{
			
 
				-	struct nidrange *nr;
			
 
				-	struct addrrange *ar;
			
 
				-	struct cfs_expr_list *el;
			
 
				-	struct cfs_range_expr *re;
			
 
				-	int expr_count;
			
 
				-	int last_hi = 255;
			
 
				-	int last_diff = 0;
			
 
				-	__u32 last_end_nid = 0;
			
 
				-	__u32 current_start_nid = 0;
			
 
				-	__u32 current_end_nid = 0;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
			
 
				-			last_hi = 255;
			
 
				-			last_diff = 0;
			
 
				-			cfs_ip_ar_min_max(ar, &current_start_nid,
			
 
				-					  &current_end_nid);
			
 
				-			if (last_end_nid &&
			
 
				-			    (current_start_nid - last_end_nid != 1))
			
 
				-				return false;
			
 
				-			last_end_nid = current_end_nid;
			
 
				-			list_for_each_entry(el, &ar->ar_numaddr_ranges,
			
 
				-					    el_link) {
			
 
				-				expr_count = 0;
			
 
				-				list_for_each_entry(re, &el->el_exprs,
			
 
				-						    re_link) {
			
 
				-					expr_count++;
			
 
				-					if (re->re_stride > 1 ||
			
 
				-					    (last_diff > 0 && last_hi != 255) ||
			
 
				-					    (last_diff > 0 && last_hi == 255 &&
			
 
				-					     re->re_lo > 0))
			
 
				-						return false;
			
 
				-					last_hi = re->re_hi;
			
 
				-					last_diff = re->re_hi - re->re_lo;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Takes a linked list of nidrange expressions, determines the minimum
			
 
				- * and maximum nid and creates appropriate nid structures
			
 
				- *
			
 
				- * \param	*nidlist
			
 
				- * \param	*min_nid
			
 
				- * \param	*max_nid
			
 
				- */
			
 
				-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
			
 
				-			       char *max_nid, size_t nidstr_length)
			
 
				-{
			
 
				-	struct nidrange *nr;
			
 
				-	struct netstrfns *nf = NULL;
			
 
				-	int netnum = -1;
			
 
				-	__u32 min_addr;
			
 
				-	__u32 max_addr;
			
 
				-	char *lndname = NULL;
			
 
				-	char min_addr_str[IPSTRING_LENGTH];
			
 
				-	char max_addr_str[IPSTRING_LENGTH];
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		nf = nr->nr_netstrfns;
			
 
				-		lndname = nf->nf_name;
			
 
				-		if (netnum == -1)
			
 
				-			netnum = nr->nr_netnum;
			
 
				-
			
 
				-		nf->nf_min_max(nidlist, &min_addr, &max_addr);
			
 
				-	}
			
 
				-	nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str));
			
 
				-	nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str));
			
 
				-
			
 
				-	snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname,
			
 
				-		 netnum);
			
 
				-	snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname,
			
 
				-		 netnum);
			
 
				-}
			
 
				-EXPORT_SYMBOL(cfs_nidrange_find_min_max);
			
 
				-
			
 
				-/**
			
 
				- * Determines the min and max NID values for num LNDs
			
 
				- *
			
 
				- * \param	*nidlist
			
 
				- * \param	*min_nid
			
 
				- * \param	*max_nid
			
 
				- */
			
 
				-static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid,
			
 
				-			    __u32 *max_nid)
			
 
				-{
			
 
				-	struct nidrange	*nr;
			
 
				-	struct addrrange *ar;
			
 
				-	unsigned int tmp_min_addr = 0;
			
 
				-	unsigned int tmp_max_addr = 0;
			
 
				-	unsigned int min_addr = 0;
			
 
				-	unsigned int max_addr = 0;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
			
 
				-			cfs_num_ar_min_max(ar, &tmp_min_addr,
			
 
				-					   &tmp_max_addr);
			
 
				-			if (tmp_min_addr < min_addr || !min_addr)
			
 
				-				min_addr = tmp_min_addr;
			
 
				-			if (tmp_max_addr > max_addr)
			
 
				-				max_addr = tmp_min_addr;
			
 
				-		}
			
 
				-	}
			
 
				-	*max_nid = max_addr;
			
 
				-	*min_nid = min_addr;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Takes an nidlist and determines the minimum and maximum
			
 
				- * ip addresses.
			
 
				- *
			
 
				- * \param	*nidlist
			
 
				- * \param	*min_nid
			
 
				- * \param	*max_nid
			
 
				- */
			
 
				-static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid,
			
 
				-			   __u32 *max_nid)
			
 
				-{
			
 
				-	struct nidrange *nr;
			
 
				-	struct addrrange *ar;
			
 
				-	__u32 tmp_min_ip_addr = 0;
			
 
				-	__u32 tmp_max_ip_addr = 0;
			
 
				-	__u32 min_ip_addr = 0;
			
 
				-	__u32 max_ip_addr = 0;
			
 
				-
			
 
				-	list_for_each_entry(nr, nidlist, nr_link) {
			
 
				-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
			
 
				-			cfs_ip_ar_min_max(ar, &tmp_min_ip_addr,
			
 
				-					  &tmp_max_ip_addr);
			
 
				-			if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr)
			
 
				-				min_ip_addr = tmp_min_ip_addr;
			
 
				-			if (tmp_max_ip_addr > max_ip_addr)
			
 
				-				max_ip_addr = tmp_max_ip_addr;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (min_nid)
			
 
				-		*min_nid = min_ip_addr;
			
 
				-	if (max_nid)
			
 
				-		*max_nid = max_ip_addr;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
			
 
				-{
			
 
				-	*addr = 0;
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-libcfs_ip_addr2str(__u32 addr, char *str, size_t size)
			
 
				-{
			
 
				-	snprintf(str, size, "%u.%u.%u.%u",
			
 
				-		 (addr >> 24) & 0xff, (addr >> 16) & 0xff,
			
 
				-		 (addr >> 8) & 0xff, addr & 0xff);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * CAVEAT EMPTOR XscanfX
			
 
				- * I use "%n" at the end of a sscanf format to detect trailing junk.  However
			
 
				- * sscanf may return immediately if it sees the terminating '0' in a string, so
			
 
				- * I initialise the %n variable to the expected length.  If sscanf sets it;
			
 
				- * fine, if it doesn't, then the scan ended at the end of the string, which is
			
 
				- * fine too :)
			
 
				- */
			
 
				-static int
			
 
				-libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
			
 
				-{
			
 
				-	unsigned int	a;
			
 
				-	unsigned int	b;
			
 
				-	unsigned int	c;
			
 
				-	unsigned int	d;
			
 
				-	int		n = nob; /* XscanfX */
			
 
				-
			
 
				-	/* numeric IP? */
			
 
				-	if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
			
 
				-	    n == nob &&
			
 
				-	    !(a & ~0xff) && !(b & ~0xff) &&
			
 
				-	    !(c & ~0xff) && !(d & ~0xff)) {
			
 
				-		*addr = ((a << 24) | (b << 16) | (c << 8) | d);
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/* Used by lnet/config.c so it can't be static */
			
 
				-int
			
 
				-cfs_ip_addr_parse(char *str, int len, struct list_head *list)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-	struct cfs_lstr src;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	src.ls_str = str;
			
 
				-	src.ls_len = len;
			
 
				-	i = 0;
			
 
				-
			
 
				-	while (src.ls_str) {
			
 
				-		struct cfs_lstr res;
			
 
				-
			
 
				-		if (!cfs_gettok(&src, '.', &res)) {
			
 
				-			rc = -EINVAL;
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el);
			
 
				-		if (rc)
			
 
				-			goto out;
			
 
				-
			
 
				-		list_add_tail(&el->el_link, list);
			
 
				-		i++;
			
 
				-	}
			
 
				-
			
 
				-	if (i == 4)
			
 
				-		return 0;
			
 
				-
			
 
				-	rc = -EINVAL;
			
 
				-out:
			
 
				-	cfs_expr_list_free_list(list);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list)
			
 
				-{
			
 
				-	int i = 0, j = 0;
			
 
				-	struct cfs_expr_list *el;
			
 
				-
			
 
				-	list_for_each_entry(el, list, el_link) {
			
 
				-		LASSERT(j++ < 4);
			
 
				-		if (i)
			
 
				-			i += scnprintf(buffer + i, count - i, ".");
			
 
				-		i += cfs_expr_list_print(buffer + i, count - i, el);
			
 
				-	}
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Matches address (\a addr) against address set encoded in \a list.
			
 
				- *
			
 
				- * \retval 1 if \a addr matches
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-int
			
 
				-cfs_ip_addr_match(__u32 addr, struct list_head *list)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-	int i = 0;
			
 
				-
			
 
				-	list_for_each_entry_reverse(el, list, el_link) {
			
 
				-		if (!cfs_expr_list_match(addr & 0xff, el))
			
 
				-			return 0;
			
 
				-		addr >>= 8;
			
 
				-		i++;
			
 
				-	}
			
 
				-
			
 
				-	return i == 4;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-libcfs_decnum_addr2str(__u32 addr, char *str, size_t size)
			
 
				-{
			
 
				-	snprintf(str, size, "%u", addr);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
			
 
				-{
			
 
				-	int     n;
			
 
				-
			
 
				-	n = nob;
			
 
				-	if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
			
 
				-		return 1;
			
 
				-
			
 
				-	n = nob;
			
 
				-	if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
			
 
				-		return 1;
			
 
				-
			
 
				-	n = nob;
			
 
				-	if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
			
 
				-		return 1;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Nf_parse_addrlist method for networks using numeric addresses.
			
 
				- *
			
 
				- * Examples of such networks are gm and elan.
			
 
				- *
			
 
				- * \retval 0 if \a str parsed to numeric address
			
 
				- * \retval errno otherwise
			
 
				- */
			
 
				-static int
			
 
				-libcfs_num_parse(char *str, int len, struct list_head *list)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-	int	rc;
			
 
				-
			
 
				-	rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el);
			
 
				-	if (!rc)
			
 
				-		list_add_tail(&el->el_link, list);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list)
			
 
				-{
			
 
				-	int i = 0, j = 0;
			
 
				-	struct cfs_expr_list *el;
			
 
				-
			
 
				-	list_for_each_entry(el, list, el_link) {
			
 
				-		LASSERT(j++ < 1);
			
 
				-		i += cfs_expr_list_print(buffer + i, count - i, el);
			
 
				-	}
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Nf_match_addr method for networks using numeric addresses
			
 
				- *
			
 
				- * \retval 1 on match
			
 
				- * \retval 0 otherwise
			
 
				- */
			
 
				-static int
			
 
				-libcfs_num_match(__u32 addr, struct list_head *numaddr)
			
 
				-{
			
 
				-	struct cfs_expr_list *el;
			
 
				-
			
 
				-	LASSERT(!list_empty(numaddr));
			
 
				-	el = list_entry(numaddr->next, struct cfs_expr_list, el_link);
			
 
				-
			
 
				-	return cfs_expr_list_match(addr, el);
			
 
				-}
			
 
				-
			
 
				-static struct netstrfns libcfs_netstrfns[] = {
			
 
				-	{ .nf_type		= LOLND,
			
 
				-	  .nf_name		= "lo",
			
 
				-	  .nf_modname		= "klolnd",
			
 
				-	  .nf_addr2str		= libcfs_decnum_addr2str,
			
 
				-	  .nf_str2addr		= libcfs_lo_str2addr,
			
 
				-	  .nf_parse_addrlist	= libcfs_num_parse,
			
 
				-	  .nf_print_addrlist	= libcfs_num_addr_range_print,
			
 
				-	  .nf_match_addr	= libcfs_num_match,
			
 
				-	  .nf_is_contiguous	= cfs_num_is_contiguous,
			
 
				-	  .nf_min_max		= cfs_num_min_max },
			
 
				-	{ .nf_type		= SOCKLND,
			
 
				-	  .nf_name		= "tcp",
			
 
				-	  .nf_modname		= "ksocklnd",
			
 
				-	  .nf_addr2str		= libcfs_ip_addr2str,
			
 
				-	  .nf_str2addr		= libcfs_ip_str2addr,
			
 
				-	  .nf_parse_addrlist	= cfs_ip_addr_parse,
			
 
				-	  .nf_print_addrlist	= libcfs_ip_addr_range_print,
			
 
				-	  .nf_match_addr	= cfs_ip_addr_match,
			
 
				-	  .nf_is_contiguous	= cfs_ip_is_contiguous,
			
 
				-	  .nf_min_max		= cfs_ip_min_max },
			
 
				-	{ .nf_type		= O2IBLND,
			
 
				-	  .nf_name		= "o2ib",
			
 
				-	  .nf_modname		= "ko2iblnd",
			
 
				-	  .nf_addr2str		= libcfs_ip_addr2str,
			
 
				-	  .nf_str2addr		= libcfs_ip_str2addr,
			
 
				-	  .nf_parse_addrlist	= cfs_ip_addr_parse,
			
 
				-	  .nf_print_addrlist	= libcfs_ip_addr_range_print,
			
 
				-	  .nf_match_addr	= cfs_ip_addr_match,
			
 
				-	  .nf_is_contiguous	= cfs_ip_is_contiguous,
			
 
				-	  .nf_min_max		= cfs_ip_min_max },
			
 
				-	{ .nf_type		= GNILND,
			
 
				-	  .nf_name		= "gni",
			
 
				-	  .nf_modname		= "kgnilnd",
			
 
				-	  .nf_addr2str		= libcfs_decnum_addr2str,
			
 
				-	  .nf_str2addr		= libcfs_num_str2addr,
			
 
				-	  .nf_parse_addrlist	= libcfs_num_parse,
			
 
				-	  .nf_print_addrlist	= libcfs_num_addr_range_print,
			
 
				-	  .nf_match_addr	= libcfs_num_match,
			
 
				-	  .nf_is_contiguous	= cfs_num_is_contiguous,
			
 
				-	  .nf_min_max		= cfs_num_min_max },
			
 
				-	{ .nf_type		= GNIIPLND,
			
 
				-	  .nf_name		= "gip",
			
 
				-	  .nf_modname		= "kgnilnd",
			
 
				-	  .nf_addr2str		= libcfs_ip_addr2str,
			
 
				-	  .nf_str2addr		= libcfs_ip_str2addr,
			
 
				-	  .nf_parse_addrlist	= cfs_ip_addr_parse,
			
 
				-	  .nf_print_addrlist	= libcfs_ip_addr_range_print,
			
 
				-	  .nf_match_addr	= cfs_ip_addr_match,
			
 
				-	  .nf_is_contiguous	= cfs_ip_is_contiguous,
			
 
				-	  .nf_min_max		= cfs_ip_min_max },
			
 
				-};
			
 
				-
			
 
				-static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns);
			
 
				-
			
 
				-static struct netstrfns *
			
 
				-libcfs_lnd2netstrfns(__u32 lnd)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < libcfs_nnetstrfns; i++)
			
 
				-		if (lnd == libcfs_netstrfns[i].nf_type)
			
 
				-			return &libcfs_netstrfns[i];
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct netstrfns *
			
 
				-libcfs_namenum2netstrfns(const char *name)
			
 
				-{
			
 
				-	struct netstrfns *nf;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < libcfs_nnetstrfns; i++) {
			
 
				-		nf = &libcfs_netstrfns[i];
			
 
				-		if (!strncmp(name, nf->nf_name, strlen(nf->nf_name)))
			
 
				-			return nf;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct netstrfns *
			
 
				-libcfs_name2netstrfns(const char *name)
			
 
				-{
			
 
				-	int    i;
			
 
				-
			
 
				-	for (i = 0; i < libcfs_nnetstrfns; i++)
			
 
				-		if (!strcmp(libcfs_netstrfns[i].nf_name, name))
			
 
				-			return &libcfs_netstrfns[i];
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-libcfs_isknown_lnd(__u32 lnd)
			
 
				-{
			
 
				-	return !!libcfs_lnd2netstrfns(lnd);
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_isknown_lnd);
			
 
				-
			
 
				-char *
			
 
				-libcfs_lnd2modname(__u32 lnd)
			
 
				-{
			
 
				-	struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
			
 
				-
			
 
				-	return nf ? nf->nf_modname : NULL;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_lnd2modname);
			
 
				-
			
 
				-int
			
 
				-libcfs_str2lnd(const char *str)
			
 
				-{
			
 
				-	struct netstrfns *nf = libcfs_name2netstrfns(str);
			
 
				-
			
 
				-	if (nf)
			
 
				-		return nf->nf_type;
			
 
				-
			
 
				-	return -ENXIO;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_str2lnd);
			
 
				-
			
 
				-char *
			
 
				-libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size)
			
 
				-{
			
 
				-	struct netstrfns *nf;
			
 
				-
			
 
				-	nf = libcfs_lnd2netstrfns(lnd);
			
 
				-	if (!nf)
			
 
				-		snprintf(buf, buf_size, "?%u?", lnd);
			
 
				-	else
			
 
				-		snprintf(buf, buf_size, "%s", nf->nf_name);
			
 
				-
			
 
				-	return buf;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_lnd2str_r);
			
 
				-
			
 
				-char *
			
 
				-libcfs_net2str_r(__u32 net, char *buf, size_t buf_size)
			
 
				-{
			
 
				-	__u32 nnum = LNET_NETNUM(net);
			
 
				-	__u32 lnd = LNET_NETTYP(net);
			
 
				-	struct netstrfns *nf;
			
 
				-
			
 
				-	nf = libcfs_lnd2netstrfns(lnd);
			
 
				-	if (!nf)
			
 
				-		snprintf(buf, buf_size, "<%u:%u>", lnd, nnum);
			
 
				-	else if (!nnum)
			
 
				-		snprintf(buf, buf_size, "%s", nf->nf_name);
			
 
				-	else
			
 
				-		snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum);
			
 
				-
			
 
				-	return buf;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_net2str_r);
			
 
				-
			
 
				-char *
			
 
				-libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size)
			
 
				-{
			
 
				-	__u32 addr = LNET_NIDADDR(nid);
			
 
				-	__u32 net = LNET_NIDNET(nid);
			
 
				-	__u32 nnum = LNET_NETNUM(net);
			
 
				-	__u32 lnd = LNET_NETTYP(net);
			
 
				-	struct netstrfns *nf;
			
 
				-
			
 
				-	if (nid == LNET_NID_ANY) {
			
 
				-		strncpy(buf, "<?>", buf_size);
			
 
				-		buf[buf_size - 1] = '\0';
			
 
				-		return buf;
			
 
				-	}
			
 
				-
			
 
				-	nf = libcfs_lnd2netstrfns(lnd);
			
 
				-	if (!nf) {
			
 
				-		snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum);
			
 
				-	} else {
			
 
				-		size_t addr_len;
			
 
				-
			
 
				-		nf->nf_addr2str(addr, buf, buf_size);
			
 
				-		addr_len = strlen(buf);
			
 
				-		if (!nnum)
			
 
				-			snprintf(buf + addr_len, buf_size - addr_len, "@%s",
			
 
				-				 nf->nf_name);
			
 
				-		else
			
 
				-			snprintf(buf + addr_len, buf_size - addr_len, "@%s%u",
			
 
				-				 nf->nf_name, nnum);
			
 
				-	}
			
 
				-
			
 
				-	return buf;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_nid2str_r);
			
 
				-
			
 
				-static struct netstrfns *
			
 
				-libcfs_str2net_internal(const char *str, __u32 *net)
			
 
				-{
			
 
				-	struct netstrfns *nf = NULL;
			
 
				-	int nob;
			
 
				-	unsigned int netnum;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < libcfs_nnetstrfns; i++) {
			
 
				-		nf = &libcfs_netstrfns[i];
			
 
				-		if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (i == libcfs_nnetstrfns)
			
 
				-		return NULL;
			
 
				-
			
 
				-	nob = strlen(nf->nf_name);
			
 
				-
			
 
				-	if (strlen(str) == (unsigned int)nob) {
			
 
				-		netnum = 0;
			
 
				-	} else {
			
 
				-		if (nf->nf_type == LOLND) /* net number not allowed */
			
 
				-			return NULL;
			
 
				-
			
 
				-		str += nob;
			
 
				-		i = strlen(str);
			
 
				-		if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
			
 
				-		    i != (int)strlen(str))
			
 
				-			return NULL;
			
 
				-	}
			
 
				-
			
 
				-	*net = LNET_MKNET(nf->nf_type, netnum);
			
 
				-	return nf;
			
 
				-}
			
 
				-
			
 
				-__u32
			
 
				-libcfs_str2net(const char *str)
			
 
				-{
			
 
				-	__u32  net;
			
 
				-
			
 
				-	if (libcfs_str2net_internal(str, &net))
			
 
				-		return net;
			
 
				-
			
 
				-	return LNET_NIDNET(LNET_NID_ANY);
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_str2net);
			
 
				-
			
 
				-lnet_nid_t
			
 
				-libcfs_str2nid(const char *str)
			
 
				-{
			
 
				-	const char *sep = strchr(str, '@');
			
 
				-	struct netstrfns *nf;
			
 
				-	__u32 net;
			
 
				-	__u32 addr;
			
 
				-
			
 
				-	if (sep) {
			
 
				-		nf = libcfs_str2net_internal(sep + 1, &net);
			
 
				-		if (!nf)
			
 
				-			return LNET_NID_ANY;
			
 
				-	} else {
			
 
				-		sep = str + strlen(str);
			
 
				-		net = LNET_MKNET(SOCKLND, 0);
			
 
				-		nf = libcfs_lnd2netstrfns(SOCKLND);
			
 
				-		LASSERT(nf);
			
 
				-	}
			
 
				-
			
 
				-	if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
			
 
				-		return LNET_NID_ANY;
			
 
				-
			
 
				-	return LNET_MKNID(net, addr);
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_str2nid);
			
 
				-
			
 
				-char *
			
 
				-libcfs_id2str(struct lnet_process_id id)
			
 
				-{
			
 
				-	char *str = libcfs_next_nidstring();
			
 
				-
			
 
				-	if (id.pid == LNET_PID_ANY) {
			
 
				-		snprintf(str, LNET_NIDSTR_SIZE,
			
 
				-			 "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
			
 
				-		return str;
			
 
				-	}
			
 
				-
			
 
				-	snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
			
 
				-		 id.pid & LNET_PID_USERFLAG ? "U" : "",
			
 
				-		 id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid));
			
 
				-	return str;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_id2str);
			
 
				-
			
 
				-int
			
 
				-libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
			
 
				-{
			
 
				-	if (!strcmp(str, "*")) {
			
 
				-		*nidp = LNET_NID_ANY;
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	*nidp = libcfs_str2nid(str);
			
 
				-	return *nidp != LNET_NID_ANY;
			
 
				-}
			
 
				-EXPORT_SYMBOL(libcfs_str2anynid);
			
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -1,456 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/lnet/peer.c
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <uapi/linux/lnet/lnet-dlc.h>
			
 
				-
			
 
				-int
			
 
				-lnet_peer_tables_create(void)
			
 
				-{
			
 
				-	struct lnet_peer_table *ptable;
			
 
				-	struct list_head *hash;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-						   sizeof(*ptable));
			
 
				-	if (!the_lnet.ln_peer_tables) {
			
 
				-		CERROR("Failed to allocate cpu-partition peer tables\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				-		INIT_LIST_HEAD(&ptable->pt_deathrow);
			
 
				-
			
 
				-		hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash),
			
 
				-				    GFP_KERNEL, i);
			
 
				-		if (!hash) {
			
 
				-			CERROR("Failed to create peer hash table\n");
			
 
				-			lnet_peer_tables_destroy();
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-
			
 
				-		for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
			
 
				-			INIT_LIST_HEAD(&hash[j]);
			
 
				-		ptable->pt_hash = hash; /* sign of initialization */
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_peer_tables_destroy(void)
			
 
				-{
			
 
				-	struct lnet_peer_table *ptable;
			
 
				-	struct list_head *hash;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	if (!the_lnet.ln_peer_tables)
			
 
				-		return;
			
 
				-
			
 
				-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				-		hash = ptable->pt_hash;
			
 
				-		if (!hash) /* not initialized */
			
 
				-			break;
			
 
				-
			
 
				-		LASSERT(list_empty(&ptable->pt_deathrow));
			
 
				-
			
 
				-		ptable->pt_hash = NULL;
			
 
				-		for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
			
 
				-			LASSERT(list_empty(&hash[j]));
			
 
				-
			
 
				-		kvfree(hash);
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_free(the_lnet.ln_peer_tables);
			
 
				-	the_lnet.ln_peer_tables = NULL;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
			
 
				-			       struct lnet_peer_table *ptable)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct lnet_peer *lp;
			
 
				-	struct lnet_peer *tmp;
			
 
				-
			
 
				-	for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
			
 
				-		list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
			
 
				-					 lp_hashlist) {
			
 
				-			if (ni && ni != lp->lp_ni)
			
 
				-				continue;
			
 
				-			list_del_init(&lp->lp_hashlist);
			
 
				-			/* Lose hash table's ref */
			
 
				-			ptable->pt_zombies++;
			
 
				-			lnet_peer_decref_locked(lp);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
			
 
				-				     int cpt_locked)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 3; ptable->pt_zombies; i++) {
			
 
				-		lnet_net_unlock(cpt_locked);
			
 
				-
			
 
				-		if (is_power_of_2(i)) {
			
 
				-			CDEBUG(D_WARNING,
			
 
				-			       "Waiting for %d zombies on peer table\n",
			
 
				-			       ptable->pt_zombies);
			
 
				-		}
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ >> 1);
			
 
				-		lnet_net_lock(cpt_locked);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
			
 
				-				struct lnet_peer_table *ptable,
			
 
				-				int cpt_locked)
			
 
				-{
			
 
				-	struct lnet_peer *lp;
			
 
				-	struct lnet_peer *tmp;
			
 
				-	lnet_nid_t lp_nid;
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
			
 
				-		list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
			
 
				-					 lp_hashlist) {
			
 
				-			if (ni != lp->lp_ni)
			
 
				-				continue;
			
 
				-
			
 
				-			if (!lp->lp_rtr_refcount)
			
 
				-				continue;
			
 
				-
			
 
				-			lp_nid = lp->lp_nid;
			
 
				-
			
 
				-			lnet_net_unlock(cpt_locked);
			
 
				-			lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
			
 
				-			lnet_net_lock(cpt_locked);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_peer_tables_cleanup(struct lnet_ni *ni)
			
 
				-{
			
 
				-	struct lnet_peer_table *ptable;
			
 
				-	struct list_head deathrow;
			
 
				-	struct lnet_peer *lp;
			
 
				-	struct lnet_peer *temp;
			
 
				-	int i;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&deathrow);
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_shutdown || ni);
			
 
				-	/*
			
 
				-	 * If just deleting the peers for a NI, get rid of any routes these
			
 
				-	 * peers are gateways for.
			
 
				-	 */
			
 
				-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				-		lnet_net_lock(i);
			
 
				-		lnet_peer_table_del_rtrs_locked(ni, ptable, i);
			
 
				-		lnet_net_unlock(i);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Start the process of moving the applicable peers to
			
 
				-	 * deathrow.
			
 
				-	 */
			
 
				-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				-		lnet_net_lock(i);
			
 
				-		lnet_peer_table_cleanup_locked(ni, ptable);
			
 
				-		lnet_net_unlock(i);
			
 
				-	}
			
 
				-
			
 
				-	/* Cleanup all entries on deathrow. */
			
 
				-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				-		lnet_net_lock(i);
			
 
				-		lnet_peer_table_deathrow_wait_locked(ptable, i);
			
 
				-		list_splice_init(&ptable->pt_deathrow, &deathrow);
			
 
				-		lnet_net_unlock(i);
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) {
			
 
				-		list_del(&lp->lp_hashlist);
			
 
				-		kfree(lp);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_destroy_peer_locked(struct lnet_peer *lp)
			
 
				-{
			
 
				-	struct lnet_peer_table *ptable;
			
 
				-
			
 
				-	LASSERT(!lp->lp_refcount);
			
 
				-	LASSERT(!lp->lp_rtr_refcount);
			
 
				-	LASSERT(list_empty(&lp->lp_txq));
			
 
				-	LASSERT(list_empty(&lp->lp_hashlist));
			
 
				-	LASSERT(!lp->lp_txqnob);
			
 
				-
			
 
				-	ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
			
 
				-	LASSERT(ptable->pt_number > 0);
			
 
				-	ptable->pt_number--;
			
 
				-
			
 
				-	lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
			
 
				-	lp->lp_ni = NULL;
			
 
				-
			
 
				-	list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
			
 
				-	LASSERT(ptable->pt_zombies > 0);
			
 
				-	ptable->pt_zombies--;
			
 
				-}
			
 
				-
			
 
				-struct lnet_peer *
			
 
				-lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
			
 
				-{
			
 
				-	struct list_head *peers;
			
 
				-	struct lnet_peer *lp;
			
 
				-
			
 
				-	LASSERT(!the_lnet.ln_shutdown);
			
 
				-
			
 
				-	peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
			
 
				-	list_for_each_entry(lp, peers, lp_hashlist) {
			
 
				-		if (lp->lp_nid == nid) {
			
 
				-			lnet_peer_addref_locked(lp);
			
 
				-			return lp;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
			
 
				-{
			
 
				-	struct lnet_peer_table *ptable;
			
 
				-	struct lnet_peer *lp = NULL;
			
 
				-	struct lnet_peer *lp2;
			
 
				-	int cpt2;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	*lpp = NULL;
			
 
				-	if (the_lnet.ln_shutdown) /* it's shutting down */
			
 
				-		return -ESHUTDOWN;
			
 
				-
			
 
				-	/* cpt can be LNET_LOCK_EX if it's called from router functions */
			
 
				-	cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
			
 
				-
			
 
				-	ptable = the_lnet.ln_peer_tables[cpt2];
			
 
				-	lp = lnet_find_peer_locked(ptable, nid);
			
 
				-	if (lp) {
			
 
				-		*lpp = lp;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&ptable->pt_deathrow)) {
			
 
				-		lp = list_entry(ptable->pt_deathrow.next,
			
 
				-				struct lnet_peer, lp_hashlist);
			
 
				-		list_del(&lp->lp_hashlist);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * take extra refcount in case another thread has shutdown LNet
			
 
				-	 * and destroyed locks and peer-table before I finish the allocation
			
 
				-	 */
			
 
				-	ptable->pt_number++;
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	if (lp)
			
 
				-		memset(lp, 0, sizeof(*lp));
			
 
				-	else
			
 
				-		lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2);
			
 
				-
			
 
				-	if (!lp) {
			
 
				-		rc = -ENOMEM;
			
 
				-		lnet_net_lock(cpt);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&lp->lp_txq);
			
 
				-	INIT_LIST_HEAD(&lp->lp_rtrq);
			
 
				-	INIT_LIST_HEAD(&lp->lp_routes);
			
 
				-
			
 
				-	lp->lp_notify = 0;
			
 
				-	lp->lp_notifylnd = 0;
			
 
				-	lp->lp_notifying = 0;
			
 
				-	lp->lp_alive_count = 0;
			
 
				-	lp->lp_timestamp = 0;
			
 
				-	lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
			
 
				-	lp->lp_last_alive = jiffies; /* assumes alive */
			
 
				-	lp->lp_last_query = 0; /* haven't asked NI yet */
			
 
				-	lp->lp_ping_timestamp = 0;
			
 
				-	lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				-	lp->lp_nid = nid;
			
 
				-	lp->lp_cpt = cpt2;
			
 
				-	lp->lp_refcount = 2;	/* 1 for caller; 1 for hash */
			
 
				-	lp->lp_rtr_refcount = 0;
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-
			
 
				-	if (the_lnet.ln_shutdown) {
			
 
				-		rc = -ESHUTDOWN;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	lp2 = lnet_find_peer_locked(ptable, nid);
			
 
				-	if (lp2) {
			
 
				-		*lpp = lp2;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
			
 
				-	if (!lp->lp_ni) {
			
 
				-		rc = -EHOSTUNREACH;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	lp->lp_txcredits = lp->lp_ni->ni_peertxcredits;
			
 
				-	lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
			
 
				-	lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
			
 
				-	lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
			
 
				-
			
 
				-	list_add_tail(&lp->lp_hashlist,
			
 
				-		      &ptable->pt_hash[lnet_nid2peerhash(nid)]);
			
 
				-	ptable->pt_version++;
			
 
				-	*lpp = lp;
			
 
				-
			
 
				-	return 0;
			
 
				-out:
			
 
				-	if (lp)
			
 
				-		list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
			
 
				-	ptable->pt_number--;
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_debug_peer(lnet_nid_t nid)
			
 
				-{
			
 
				-	char *aliveness = "NA";
			
 
				-	struct lnet_peer *lp;
			
 
				-	int rc;
			
 
				-	int cpt;
			
 
				-
			
 
				-	cpt = lnet_cpt_of_nid(nid);
			
 
				-	lnet_net_lock(cpt);
			
 
				-
			
 
				-	rc = lnet_nid2peer_locked(&lp, nid, cpt);
			
 
				-	if (rc) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
			
 
				-		aliveness = lp->lp_alive ? "up" : "down";
			
 
				-
			
 
				-	CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
			
 
				-	       libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
			
 
				-	       aliveness, lp->lp_ni->ni_peertxcredits,
			
 
				-	       lp->lp_rtrcredits, lp->lp_minrtrcredits,
			
 
				-	       lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
			
 
				-
			
 
				-	lnet_peer_decref_locked(lp);
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_get_peer_info(__u32 peer_index, __u64 *nid,
			
 
				-		   char aliveness[LNET_MAX_STR_LEN],
			
 
				-		   __u32 *cpt_iter, __u32 *refcount,
			
 
				-		   __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
			
 
				-		   __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
			
 
				-		   __u32 *peer_tx_qnob)
			
 
				-{
			
 
				-	struct lnet_peer_table *peer_table;
			
 
				-	struct lnet_peer *lp;
			
 
				-	bool found = false;
			
 
				-	int lncpt, j;
			
 
				-
			
 
				-	/* get the number of CPTs */
			
 
				-	lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
			
 
				-
			
 
				-	/*
			
 
				-	 * if the cpt number to be examined is >= the number of cpts in
			
 
				-	 * the system then indicate that there are no more cpts to examin
			
 
				-	 */
			
 
				-	if (*cpt_iter >= lncpt)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	/* get the current table */
			
 
				-	peer_table = the_lnet.ln_peer_tables[*cpt_iter];
			
 
				-	/* if the ptable is NULL then there are no more cpts to examine */
			
 
				-	if (!peer_table)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	lnet_net_lock(*cpt_iter);
			
 
				-
			
 
				-	for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
			
 
				-		struct list_head *peers = &peer_table->pt_hash[j];
			
 
				-
			
 
				-		list_for_each_entry(lp, peers, lp_hashlist) {
			
 
				-			if (peer_index-- > 0)
			
 
				-				continue;
			
 
				-
			
 
				-			snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
			
 
				-			if (lnet_isrouter(lp) ||
			
 
				-			    lnet_peer_aliveness_enabled(lp))
			
 
				-				snprintf(aliveness, LNET_MAX_STR_LEN,
			
 
				-					 lp->lp_alive ? "up" : "down");
			
 
				-
			
 
				-			*nid = lp->lp_nid;
			
 
				-			*refcount = lp->lp_refcount;
			
 
				-			*ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
			
 
				-			*peer_tx_credits = lp->lp_txcredits;
			
 
				-			*peer_rtr_credits = lp->lp_rtrcredits;
			
 
				-			*peer_min_rtr_credits = lp->lp_mintxcredits;
			
 
				-			*peer_tx_qnob = lp->lp_txqnob;
			
 
				-
			
 
				-			found = true;
			
 
				-		}
			
 
				-	}
			
 
				-	lnet_net_unlock(*cpt_iter);
			
 
				-
			
 
				-	*cpt_iter = lncpt;
			
 
				-
			
 
				-	return found ? 0 : -ENOENT;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1,1799 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- *
			
 
				- *   This file is part of Portals
			
 
				- *   http://sourceforge.net/projects/sandiaportals/
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/completion.h>
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-#define LNET_NRB_TINY_MIN	512	/* min value for each CPT */
			
 
				-#define LNET_NRB_TINY		(LNET_NRB_TINY_MIN * 4)
			
 
				-#define LNET_NRB_SMALL_MIN	4096	/* min value for each CPT */
			
 
				-#define LNET_NRB_SMALL		(LNET_NRB_SMALL_MIN * 4)
			
 
				-#define LNET_NRB_SMALL_PAGES	1
			
 
				-#define LNET_NRB_LARGE_MIN	256	/* min value for each CPT */
			
 
				-#define LNET_NRB_LARGE		(LNET_NRB_LARGE_MIN * 4)
			
 
				-#define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_SIZE - 1) >> \
			
 
				-				 PAGE_SHIFT)
			
 
				-
			
 
				-static char *forwarding = "";
			
 
				-module_param(forwarding, charp, 0444);
			
 
				-MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
			
 
				-
			
 
				-static int tiny_router_buffers;
			
 
				-module_param(tiny_router_buffers, int, 0444);
			
 
				-MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router");
			
 
				-static int small_router_buffers;
			
 
				-module_param(small_router_buffers, int, 0444);
			
 
				-MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router");
			
 
				-static int large_router_buffers;
			
 
				-module_param(large_router_buffers, int, 0444);
			
 
				-MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router");
			
 
				-static int peer_buffer_credits;
			
 
				-module_param(peer_buffer_credits, int, 0444);
			
 
				-MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer");
			
 
				-
			
 
				-static int auto_down = 1;
			
 
				-module_param(auto_down, int, 0444);
			
 
				-MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
			
 
				-
			
 
				-int
			
 
				-lnet_peer_buffer_credits(struct lnet_ni *ni)
			
 
				-{
			
 
				-	/* NI option overrides LNet default */
			
 
				-	if (ni->ni_peerrtrcredits > 0)
			
 
				-		return ni->ni_peerrtrcredits;
			
 
				-	if (peer_buffer_credits > 0)
			
 
				-		return peer_buffer_credits;
			
 
				-
			
 
				-	/*
			
 
				-	 * As an approximation, allow this peer the same number of router
			
 
				-	 * buffers as it is allowed outstanding sends
			
 
				-	 */
			
 
				-	return ni->ni_peertxcredits;
			
 
				-}
			
 
				-
			
 
				-/* forward ref's */
			
 
				-static int lnet_router_checker(void *);
			
 
				-
			
 
				-static int check_routers_before_use;
			
 
				-module_param(check_routers_before_use, int, 0444);
			
 
				-MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
			
 
				-
			
 
				-int avoid_asym_router_failure = 1;
			
 
				-module_param(avoid_asym_router_failure, int, 0644);
			
 
				-MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
			
 
				-
			
 
				-static int dead_router_check_interval = 60;
			
 
				-module_param(dead_router_check_interval, int, 0644);
			
 
				-MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)");
			
 
				-
			
 
				-static int live_router_check_interval = 60;
			
 
				-module_param(live_router_check_interval, int, 0644);
			
 
				-MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
			
 
				-
			
 
				-static int router_ping_timeout = 50;
			
 
				-module_param(router_ping_timeout, int, 0644);
			
 
				-MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query");
			
 
				-
			
 
				-int
			
 
				-lnet_peers_start_down(void)
			
 
				-{
			
 
				-	return check_routers_before_use;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
			
 
				-		   unsigned long when)
			
 
				-{
			
 
				-	if (time_before(when, lp->lp_timestamp)) { /* out of date information */
			
 
				-		CDEBUG(D_NET, "Out of date\n");
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	lp->lp_timestamp = when;		/* update timestamp */
			
 
				-	lp->lp_ping_deadline = 0;	       /* disable ping timeout */
			
 
				-
			
 
				-	if (lp->lp_alive_count &&	  /* got old news */
			
 
				-	    (!lp->lp_alive) == (!alive)) {      /* new date for old news */
			
 
				-		CDEBUG(D_NET, "Old news\n");
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* Flag that notification is outstanding */
			
 
				-
			
 
				-	lp->lp_alive_count++;
			
 
				-	lp->lp_alive = !(!alive);	       /* 1 bit! */
			
 
				-	lp->lp_notify = 1;
			
 
				-	lp->lp_notifylnd |= notifylnd;
			
 
				-	if (lp->lp_alive)
			
 
				-		lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
			
 
				-
			
 
				-	CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp)
			
 
				-{
			
 
				-	int alive;
			
 
				-	int notifylnd;
			
 
				-
			
 
				-	/*
			
 
				-	 * Notify only in 1 thread at any time to ensure ordered notification.
			
 
				-	 * NB individual events can be missed; the only guarantee is that you
			
 
				-	 * always get the most recent news
			
 
				-	 */
			
 
				-	if (lp->lp_notifying || !ni)
			
 
				-		return;
			
 
				-
			
 
				-	lp->lp_notifying = 1;
			
 
				-
			
 
				-	while (lp->lp_notify) {
			
 
				-		alive = lp->lp_alive;
			
 
				-		notifylnd = lp->lp_notifylnd;
			
 
				-
			
 
				-		lp->lp_notifylnd = 0;
			
 
				-		lp->lp_notify    = 0;
			
 
				-
			
 
				-		if (notifylnd && ni->ni_lnd->lnd_notify) {
			
 
				-			lnet_net_unlock(lp->lp_cpt);
			
 
				-
			
 
				-			/*
			
 
				-			 * A new notification could happen now; I'll handle it
			
 
				-			 * when control returns to me
			
 
				-			 */
			
 
				-			ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive);
			
 
				-
			
 
				-			lnet_net_lock(lp->lp_cpt);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lp->lp_notifying = 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_rtr_addref_locked(struct lnet_peer *lp)
			
 
				-{
			
 
				-	LASSERT(lp->lp_refcount > 0);
			
 
				-	LASSERT(lp->lp_rtr_refcount >= 0);
			
 
				-
			
 
				-	/* lnet_net_lock must be exclusively locked */
			
 
				-	lp->lp_rtr_refcount++;
			
 
				-	if (lp->lp_rtr_refcount == 1) {
			
 
				-		struct list_head *pos;
			
 
				-
			
 
				-		/* a simple insertion sort */
			
 
				-		list_for_each_prev(pos, &the_lnet.ln_routers) {
			
 
				-			struct lnet_peer *rtr;
			
 
				-
			
 
				-			rtr = list_entry(pos, struct lnet_peer, lp_rtr_list);
			
 
				-			if (rtr->lp_nid < lp->lp_nid)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		list_add(&lp->lp_rtr_list, pos);
			
 
				-		/* addref for the_lnet.ln_routers */
			
 
				-		lnet_peer_addref_locked(lp);
			
 
				-		the_lnet.ln_routers_version++;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_rtr_decref_locked(struct lnet_peer *lp)
			
 
				-{
			
 
				-	LASSERT(lp->lp_refcount > 0);
			
 
				-	LASSERT(lp->lp_rtr_refcount > 0);
			
 
				-
			
 
				-	/* lnet_net_lock must be exclusively locked */
			
 
				-	lp->lp_rtr_refcount--;
			
 
				-	if (!lp->lp_rtr_refcount) {
			
 
				-		LASSERT(list_empty(&lp->lp_routes));
			
 
				-
			
 
				-		if (lp->lp_rcd) {
			
 
				-			list_add(&lp->lp_rcd->rcd_list,
			
 
				-				 &the_lnet.ln_rcd_deathrow);
			
 
				-			lp->lp_rcd = NULL;
			
 
				-		}
			
 
				-
			
 
				-		list_del(&lp->lp_rtr_list);
			
 
				-		/* decref for the_lnet.ln_routers */
			
 
				-		lnet_peer_decref_locked(lp);
			
 
				-		the_lnet.ln_routers_version++;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-struct lnet_remotenet *
			
 
				-lnet_find_net_locked(__u32 net)
			
 
				-{
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	struct list_head *rn_list;
			
 
				-
			
 
				-	LASSERT(!the_lnet.ln_shutdown);
			
 
				-
			
 
				-	rn_list = lnet_net2rnethash(net);
			
 
				-	list_for_each_entry(rnet, rn_list, lrn_list) {
			
 
				-		if (rnet->lrn_net == net)
			
 
				-			return rnet;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static void lnet_shuffle_seed(void)
			
 
				-{
			
 
				-	static int seeded;
			
 
				-	struct lnet_ni *ni;
			
 
				-
			
 
				-	if (seeded)
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * Nodes with small feet have little entropy
			
 
				-	 * the NID for this node gives the most entropy in the low bits
			
 
				-	 */
			
 
				-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
			
 
				-		__u32 lnd_type, seed;
			
 
				-
			
 
				-		lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
			
 
				-		if (lnd_type != LOLND) {
			
 
				-			seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type);
			
 
				-			add_device_randomness(&seed, sizeof(seed));
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	seeded = 1;
			
 
				-}
			
 
				-
			
 
				-/* NB expects LNET_LOCK held */
			
 
				-static void
			
 
				-lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
			
 
				-{
			
 
				-	unsigned int len = 0;
			
 
				-	unsigned int offset = 0;
			
 
				-	struct list_head *e;
			
 
				-
			
 
				-	lnet_shuffle_seed();
			
 
				-
			
 
				-	list_for_each(e, &rnet->lrn_routes) {
			
 
				-		len++;
			
 
				-	}
			
 
				-
			
 
				-	/* len+1 positions to add a new entry */
			
 
				-	offset = prandom_u32_max(len + 1);
			
 
				-	list_for_each(e, &rnet->lrn_routes) {
			
 
				-		if (!offset)
			
 
				-			break;
			
 
				-		offset--;
			
 
				-	}
			
 
				-	list_add(&route->lr_list, e);
			
 
				-	list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
			
 
				-
			
 
				-	the_lnet.ln_remote_nets_version++;
			
 
				-	lnet_rtr_addref_locked(route->lr_gateway);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
			
 
				-	       unsigned int priority)
			
 
				-{
			
 
				-	struct list_head *e;
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	struct lnet_remotenet *rnet2;
			
 
				-	struct lnet_route *route;
			
 
				-	struct lnet_ni *ni;
			
 
				-	int add_route;
			
 
				-	int rc;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n",
			
 
				-	       libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
			
 
				-
			
 
				-	if (gateway == LNET_NID_ANY ||
			
 
				-	    LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
			
 
				-	    net == LNET_NIDNET(LNET_NID_ANY) ||
			
 
				-	    LNET_NETTYP(net) == LOLND ||
			
 
				-	    LNET_NIDNET(gateway) == net ||
			
 
				-	    (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (lnet_islocalnet(net))	       /* it's a local network */
			
 
				-		return -EEXIST;
			
 
				-
			
 
				-	/* Assume net, route, all new */
			
 
				-	route = kzalloc(sizeof(*route), GFP_NOFS);
			
 
				-	rnet = kzalloc(sizeof(*rnet), GFP_NOFS);
			
 
				-	if (!route || !rnet) {
			
 
				-		CERROR("Out of memory creating route %s %d %s\n",
			
 
				-		       libcfs_net2str(net), hops, libcfs_nid2str(gateway));
			
 
				-		kfree(route);
			
 
				-		kfree(rnet);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&rnet->lrn_routes);
			
 
				-	rnet->lrn_net = net;
			
 
				-	route->lr_hops = hops;
			
 
				-	route->lr_net = net;
			
 
				-	route->lr_priority = priority;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
			
 
				-	if (rc) {
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-		kfree(route);
			
 
				-		kfree(rnet);
			
 
				-
			
 
				-		if (rc == -EHOSTUNREACH) /* gateway is not on a local net */
			
 
				-			return rc;	/* ignore the route entry */
			
 
				-		CERROR("Error %d creating route %s %d %s\n", rc,
			
 
				-		       libcfs_net2str(net), hops,
			
 
				-		       libcfs_nid2str(gateway));
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!the_lnet.ln_shutdown);
			
 
				-
			
 
				-	rnet2 = lnet_find_net_locked(net);
			
 
				-	if (!rnet2) {
			
 
				-		/* new network */
			
 
				-		list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
			
 
				-		rnet2 = rnet;
			
 
				-	}
			
 
				-
			
 
				-	/* Search for a duplicate route (it's a NOOP if it is) */
			
 
				-	add_route = 1;
			
 
				-	list_for_each(e, &rnet2->lrn_routes) {
			
 
				-		struct lnet_route *route2;
			
 
				-
			
 
				-		route2 = list_entry(e, struct lnet_route, lr_list);
			
 
				-		if (route2->lr_gateway == route->lr_gateway) {
			
 
				-			add_route = 0;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* our lookups must be true */
			
 
				-		LASSERT(route2->lr_gateway->lp_nid != gateway);
			
 
				-	}
			
 
				-
			
 
				-	if (add_route) {
			
 
				-		lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
			
 
				-		lnet_add_route_to_rnet(rnet2, route);
			
 
				-
			
 
				-		ni = route->lr_gateway->lp_ni;
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-		/* XXX Assume alive */
			
 
				-		if (ni->ni_lnd->lnd_notify)
			
 
				-			ni->ni_lnd->lnd_notify(ni, gateway, 1);
			
 
				-
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-	}
			
 
				-
			
 
				-	/* -1 for notify or !add_route */
			
 
				-	lnet_peer_decref_locked(route->lr_gateway);
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-	rc = 0;
			
 
				-
			
 
				-	if (!add_route) {
			
 
				-		rc = -EEXIST;
			
 
				-		kfree(route);
			
 
				-	}
			
 
				-
			
 
				-	if (rnet != rnet2)
			
 
				-		kfree(rnet);
			
 
				-
			
 
				-	/* indicate to startup the router checker if configured */
			
 
				-	wake_up(&the_lnet.ln_rc_waitq);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_check_routes(void)
			
 
				-{
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	struct lnet_route *route;
			
 
				-	struct lnet_route *route2;
			
 
				-	struct list_head *e1;
			
 
				-	struct list_head *e2;
			
 
				-	int cpt;
			
 
				-	struct list_head *rn_list;
			
 
				-	int i;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
			
 
				-		rn_list = &the_lnet.ln_remote_nets_hash[i];
			
 
				-		list_for_each(e1, rn_list) {
			
 
				-			rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
			
 
				-
			
 
				-			route2 = NULL;
			
 
				-			list_for_each(e2, &rnet->lrn_routes) {
			
 
				-				lnet_nid_t nid1;
			
 
				-				lnet_nid_t nid2;
			
 
				-				int net;
			
 
				-
			
 
				-				route = list_entry(e2, struct lnet_route, lr_list);
			
 
				-
			
 
				-				if (!route2) {
			
 
				-					route2 = route;
			
 
				-					continue;
			
 
				-				}
			
 
				-
			
 
				-				if (route->lr_gateway->lp_ni ==
			
 
				-				    route2->lr_gateway->lp_ni)
			
 
				-					continue;
			
 
				-
			
 
				-				nid1 = route->lr_gateway->lp_nid;
			
 
				-				nid2 = route2->lr_gateway->lp_nid;
			
 
				-				net = rnet->lrn_net;
			
 
				-
			
 
				-				lnet_net_unlock(cpt);
			
 
				-
			
 
				-				CERROR("Routes to %s via %s and %s not supported\n",
			
 
				-				       libcfs_net2str(net),
			
 
				-				       libcfs_nid2str(nid1),
			
 
				-				       libcfs_nid2str(nid2));
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_del_route(__u32 net, lnet_nid_t gw_nid)
			
 
				-{
			
 
				-	struct lnet_peer *gateway;
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	struct lnet_route *route;
			
 
				-	struct list_head *e1;
			
 
				-	struct list_head *e2;
			
 
				-	int rc = -ENOENT;
			
 
				-	struct list_head *rn_list;
			
 
				-	int idx = 0;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Del route: net %s : gw %s\n",
			
 
				-	       libcfs_net2str(net), libcfs_nid2str(gw_nid));
			
 
				-
			
 
				-	/*
			
 
				-	 * NB Caller may specify either all routes via the given gateway
			
 
				-	 * or a specific route entry actual NIDs)
			
 
				-	 */
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	if (net == LNET_NIDNET(LNET_NID_ANY))
			
 
				-		rn_list = &the_lnet.ln_remote_nets_hash[0];
			
 
				-	else
			
 
				-		rn_list = lnet_net2rnethash(net);
			
 
				-
			
 
				- again:
			
 
				-	list_for_each(e1, rn_list) {
			
 
				-		rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
			
 
				-
			
 
				-		if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
			
 
				-		      net == rnet->lrn_net))
			
 
				-			continue;
			
 
				-
			
 
				-		list_for_each(e2, &rnet->lrn_routes) {
			
 
				-			route = list_entry(e2, struct lnet_route, lr_list);
			
 
				-
			
 
				-			gateway = route->lr_gateway;
			
 
				-			if (!(gw_nid == LNET_NID_ANY ||
			
 
				-			      gw_nid == gateway->lp_nid))
			
 
				-				continue;
			
 
				-
			
 
				-			list_del(&route->lr_list);
			
 
				-			list_del(&route->lr_gwlist);
			
 
				-			the_lnet.ln_remote_nets_version++;
			
 
				-
			
 
				-			if (list_empty(&rnet->lrn_routes))
			
 
				-				list_del(&rnet->lrn_list);
			
 
				-			else
			
 
				-				rnet = NULL;
			
 
				-
			
 
				-			lnet_rtr_decref_locked(gateway);
			
 
				-			lnet_peer_decref_locked(gateway);
			
 
				-
			
 
				-			lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-			kfree(route);
			
 
				-			kfree(rnet);
			
 
				-
			
 
				-			rc = 0;
			
 
				-			lnet_net_lock(LNET_LOCK_EX);
			
 
				-			goto again;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (net == LNET_NIDNET(LNET_NID_ANY) &&
			
 
				-	    ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
			
 
				-		rn_list = &the_lnet.ln_remote_nets_hash[idx];
			
 
				-		goto again;
			
 
				-	}
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_destroy_routes(void)
			
 
				-{
			
 
				-	lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
			
 
				-}
			
 
				-
			
 
				-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
			
 
				-{
			
 
				-	int i, rc = -ENOENT, j;
			
 
				-
			
 
				-	if (!the_lnet.ln_rtrpools)
			
 
				-		return rc;
			
 
				-
			
 
				-	for (i = 0; i < LNET_NRBPOOLS; i++) {
			
 
				-		struct lnet_rtrbufpool *rbp;
			
 
				-
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-		cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) {
			
 
				-			if (i++ != idx)
			
 
				-				continue;
			
 
				-
			
 
				-			pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages;
			
 
				-			pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers;
			
 
				-			pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits;
			
 
				-			pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits;
			
 
				-			rc = 0;
			
 
				-			break;
			
 
				-		}
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	pool_cfg->pl_routing = the_lnet.ln_routing;
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_get_route(int idx, __u32 *net, __u32 *hops,
			
 
				-	       lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
			
 
				-{
			
 
				-	struct list_head *e1;
			
 
				-	struct list_head *e2;
			
 
				-	struct lnet_remotenet *rnet;
			
 
				-	struct lnet_route *route;
			
 
				-	int cpt;
			
 
				-	int i;
			
 
				-	struct list_head *rn_list;
			
 
				-
			
 
				-	cpt = lnet_net_lock_current();
			
 
				-
			
 
				-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
			
 
				-		rn_list = &the_lnet.ln_remote_nets_hash[i];
			
 
				-		list_for_each(e1, rn_list) {
			
 
				-			rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
			
 
				-
			
 
				-			list_for_each(e2, &rnet->lrn_routes) {
			
 
				-				route = list_entry(e2, struct lnet_route,
			
 
				-						   lr_list);
			
 
				-
			
 
				-				if (!idx--) {
			
 
				-					*net      = rnet->lrn_net;
			
 
				-					*hops     = route->lr_hops;
			
 
				-					*priority = route->lr_priority;
			
 
				-					*gateway  = route->lr_gateway->lp_nid;
			
 
				-					*alive = lnet_is_route_alive(route);
			
 
				-					lnet_net_unlock(cpt);
			
 
				-					return 0;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_swap_pinginfo(struct lnet_ping_info *info)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct lnet_ni_status *stat;
			
 
				-
			
 
				-	__swab32s(&info->pi_magic);
			
 
				-	__swab32s(&info->pi_features);
			
 
				-	__swab32s(&info->pi_pid);
			
 
				-	__swab32s(&info->pi_nnis);
			
 
				-	for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
			
 
				-		stat = &info->pi_ni[i];
			
 
				-		__swab64s(&stat->ns_nid);
			
 
				-		__swab32s(&stat->ns_status);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * parse router-checker pinginfo, record number of down NIs for remote
			
 
				- * networks on that router.
			
 
				- */
			
 
				-static void
			
 
				-lnet_parse_rc_info(struct lnet_rc_data *rcd)
			
 
				-{
			
 
				-	struct lnet_ping_info *info = rcd->rcd_pinginfo;
			
 
				-	struct lnet_peer *gw = rcd->rcd_gateway;
			
 
				-	struct lnet_route *rte;
			
 
				-
			
 
				-	if (!gw->lp_alive)
			
 
				-		return;
			
 
				-
			
 
				-	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
			
 
				-		lnet_swap_pinginfo(info);
			
 
				-
			
 
				-	/* NB always racing with network! */
			
 
				-	if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
			
 
				-		CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
			
 
				-		       libcfs_nid2str(gw->lp_nid), info->pi_magic);
			
 
				-		gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	gw->lp_ping_feats = info->pi_features;
			
 
				-	if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) {
			
 
				-		CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
			
 
				-		       libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
			
 
				-		return; /* nothing I can understand */
			
 
				-	}
			
 
				-
			
 
				-	if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS))
			
 
				-		return; /* can't carry NI status info */
			
 
				-
			
 
				-	list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
			
 
				-		int down = 0;
			
 
				-		int up = 0;
			
 
				-		int i;
			
 
				-
			
 
				-		if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) {
			
 
				-			rte->lr_downis = 1;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
			
 
				-			struct lnet_ni_status *stat = &info->pi_ni[i];
			
 
				-			lnet_nid_t nid = stat->ns_nid;
			
 
				-
			
 
				-			if (nid == LNET_NID_ANY) {
			
 
				-				CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
			
 
				-				       libcfs_nid2str(gw->lp_nid));
			
 
				-				gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				-				return;
			
 
				-			}
			
 
				-
			
 
				-			if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
			
 
				-				continue;
			
 
				-
			
 
				-			if (stat->ns_status == LNET_NI_STATUS_DOWN) {
			
 
				-				down++;
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			if (stat->ns_status == LNET_NI_STATUS_UP) {
			
 
				-				if (LNET_NIDNET(nid) == rte->lr_net) {
			
 
				-					up = 1;
			
 
				-					break;
			
 
				-				}
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
			
 
				-			       libcfs_nid2str(gw->lp_nid), stat->ns_status);
			
 
				-			gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		if (up) { /* ignore downed NIs if NI for dest network is up */
			
 
				-			rte->lr_downis = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-		/**
			
 
				-		 * if @down is zero and this route is single-hop, it means
			
 
				-		 * we can't find NI for target network
			
 
				-		 */
			
 
				-		if (!down && rte->lr_hops == 1)
			
 
				-			down = 1;
			
 
				-
			
 
				-		rte->lr_downis = down;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_router_checker_event(struct lnet_event *event)
			
 
				-{
			
 
				-	struct lnet_rc_data *rcd = event->md.user_ptr;
			
 
				-	struct lnet_peer *lp;
			
 
				-
			
 
				-	LASSERT(rcd);
			
 
				-
			
 
				-	if (event->unlinked) {
			
 
				-		LNetInvalidateMDHandle(&rcd->rcd_mdh);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(event->type == LNET_EVENT_SEND ||
			
 
				-		event->type == LNET_EVENT_REPLY);
			
 
				-
			
 
				-	lp = rcd->rcd_gateway;
			
 
				-	LASSERT(lp);
			
 
				-
			
 
				-	/*
			
 
				-	 * NB: it's called with holding lnet_res_lock, we have a few
			
 
				-	 * places need to hold both locks at the same time, please take
			
 
				-	 * care of lock ordering
			
 
				-	 */
			
 
				-	lnet_net_lock(lp->lp_cpt);
			
 
				-	if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
			
 
				-		/* ignore if no longer a router or rcd is replaced */
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (event->type == LNET_EVENT_SEND) {
			
 
				-		lp->lp_ping_notsent = 0;
			
 
				-		if (!event->status)
			
 
				-			goto out;
			
 
				-	}
			
 
				-
			
 
				-	/* LNET_EVENT_REPLY */
			
 
				-	/*
			
 
				-	 * A successful REPLY means the router is up.  If _any_ comms
			
 
				-	 * to the router fail I assume it's down (this will happen if
			
 
				-	 * we ping alive routers to try to detect router death before
			
 
				-	 * apps get burned).
			
 
				-	 */
			
 
				-	lnet_notify_locked(lp, 1, !event->status, jiffies);
			
 
				-
			
 
				-	/*
			
 
				-	 * The router checker will wake up very shortly and do the
			
 
				-	 * actual notification.
			
 
				-	 * XXX If 'lp' stops being a router before then, it will still
			
 
				-	 * have the notification pending!!!
			
 
				-	 */
			
 
				-	if (avoid_asym_router_failure && !event->status)
			
 
				-		lnet_parse_rc_info(rcd);
			
 
				-
			
 
				- out:
			
 
				-	lnet_net_unlock(lp->lp_cpt);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_wait_known_routerstate(void)
			
 
				-{
			
 
				-	struct lnet_peer *rtr;
			
 
				-	struct list_head *entry;
			
 
				-	int all_known;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		int cpt = lnet_net_lock_current();
			
 
				-
			
 
				-		all_known = 1;
			
 
				-		list_for_each(entry, &the_lnet.ln_routers) {
			
 
				-			rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
			
 
				-
			
 
				-			if (!rtr->lp_alive_count) {
			
 
				-				all_known = 0;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		if (all_known)
			
 
				-			return;
			
 
				-
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net)
			
 
				-{
			
 
				-	struct lnet_route *rte;
			
 
				-
			
 
				-	if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) {
			
 
				-		list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
			
 
				-			if (rte->lr_net == net) {
			
 
				-				rte->lr_downis = 0;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_update_ni_status_locked(void)
			
 
				-{
			
 
				-	struct lnet_ni *ni;
			
 
				-	time64_t now;
			
 
				-	int timeout;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_routing);
			
 
				-
			
 
				-	timeout = router_ping_timeout +
			
 
				-		  max(live_router_check_interval, dead_router_check_interval);
			
 
				-
			
 
				-	now = ktime_get_real_seconds();
			
 
				-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
			
 
				-		if (ni->ni_lnd->lnd_type == LOLND)
			
 
				-			continue;
			
 
				-
			
 
				-		if (now < ni->ni_last_alive + timeout)
			
 
				-			continue;
			
 
				-
			
 
				-		lnet_ni_lock(ni);
			
 
				-		/* re-check with lock */
			
 
				-		if (now < ni->ni_last_alive + timeout) {
			
 
				-			lnet_ni_unlock(ni);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(ni->ni_status);
			
 
				-
			
 
				-		if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
			
 
				-			CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
			
 
				-			       libcfs_nid2str(ni->ni_nid), timeout);
			
 
				-			/*
			
 
				-			 * NB: so far, this is the only place to set
			
 
				-			 * NI status to "down"
			
 
				-			 */
			
 
				-			ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
			
 
				-		}
			
 
				-		lnet_ni_unlock(ni);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_destroy_rc_data(struct lnet_rc_data *rcd)
			
 
				-{
			
 
				-	LASSERT(list_empty(&rcd->rcd_list));
			
 
				-	/* detached from network */
			
 
				-	LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh));
			
 
				-
			
 
				-	if (rcd->rcd_gateway) {
			
 
				-		int cpt = rcd->rcd_gateway->lp_cpt;
			
 
				-
			
 
				-		lnet_net_lock(cpt);
			
 
				-		lnet_peer_decref_locked(rcd->rcd_gateway);
			
 
				-		lnet_net_unlock(cpt);
			
 
				-	}
			
 
				-
			
 
				-	kfree(rcd->rcd_pinginfo);
			
 
				-
			
 
				-	kfree(rcd);
			
 
				-}
			
 
				-
			
 
				-static struct lnet_rc_data *
			
 
				-lnet_create_rc_data_locked(struct lnet_peer *gateway)
			
 
				-{
			
 
				-	struct lnet_rc_data *rcd = NULL;
			
 
				-	struct lnet_ping_info *pi;
			
 
				-	struct lnet_md md;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	lnet_net_unlock(gateway->lp_cpt);
			
 
				-
			
 
				-	rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
			
 
				-	if (!rcd)
			
 
				-		goto out;
			
 
				-
			
 
				-	LNetInvalidateMDHandle(&rcd->rcd_mdh);
			
 
				-	INIT_LIST_HEAD(&rcd->rcd_list);
			
 
				-
			
 
				-	pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS);
			
 
				-	if (!pi)
			
 
				-		goto out;
			
 
				-
			
 
				-	for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
			
 
				-		pi->pi_ni[i].ns_nid = LNET_NID_ANY;
			
 
				-		pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
			
 
				-	}
			
 
				-	rcd->rcd_pinginfo = pi;
			
 
				-
			
 
				-	md.start = pi;
			
 
				-	md.user_ptr = rcd;
			
 
				-	md.length = LNET_PINGINFO_SIZE;
			
 
				-	md.threshold = LNET_MD_THRESH_INF;
			
 
				-	md.options = LNET_MD_TRUNCATE;
			
 
				-	md.eq_handle = the_lnet.ln_rc_eqh;
			
 
				-
			
 
				-	LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh));
			
 
				-	rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
			
 
				-	if (rc < 0) {
			
 
				-		CERROR("Can't bind MD: %d\n", rc);
			
 
				-		goto out;
			
 
				-	}
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	lnet_net_lock(gateway->lp_cpt);
			
 
				-	/* router table changed or someone has created rcd for this gateway */
			
 
				-	if (!lnet_isrouter(gateway) || gateway->lp_rcd) {
			
 
				-		lnet_net_unlock(gateway->lp_cpt);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	lnet_peer_addref_locked(gateway);
			
 
				-	rcd->rcd_gateway = gateway;
			
 
				-	gateway->lp_rcd = rcd;
			
 
				-	gateway->lp_ping_notsent = 0;
			
 
				-
			
 
				-	return rcd;
			
 
				-
			
 
				- out:
			
 
				-	if (rcd) {
			
 
				-		if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) {
			
 
				-			rc = LNetMDUnlink(rcd->rcd_mdh);
			
 
				-			LASSERT(!rc);
			
 
				-		}
			
 
				-		lnet_destroy_rc_data(rcd);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(gateway->lp_cpt);
			
 
				-	return gateway->lp_rcd;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_router_check_interval(struct lnet_peer *rtr)
			
 
				-{
			
 
				-	int secs;
			
 
				-
			
 
				-	secs = rtr->lp_alive ? live_router_check_interval :
			
 
				-			       dead_router_check_interval;
			
 
				-	if (secs < 0)
			
 
				-		secs = 0;
			
 
				-
			
 
				-	return secs;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_ping_router_locked(struct lnet_peer *rtr)
			
 
				-{
			
 
				-	struct lnet_rc_data *rcd = NULL;
			
 
				-	unsigned long now = jiffies;
			
 
				-	int secs;
			
 
				-
			
 
				-	lnet_peer_addref_locked(rtr);
			
 
				-
			
 
				-	if (rtr->lp_ping_deadline && /* ping timed out? */
			
 
				-	    time_after(now, rtr->lp_ping_deadline))
			
 
				-		lnet_notify_locked(rtr, 1, 0, now);
			
 
				-
			
 
				-	/* Run any outstanding notifications */
			
 
				-	lnet_ni_notify_locked(rtr->lp_ni, rtr);
			
 
				-
			
 
				-	if (!lnet_isrouter(rtr) ||
			
 
				-	    the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
			
 
				-		/* router table changed or router checker is shutting down */
			
 
				-		lnet_peer_decref_locked(rtr);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	rcd = rtr->lp_rcd ?
			
 
				-	      rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
			
 
				-
			
 
				-	if (!rcd)
			
 
				-		return;
			
 
				-
			
 
				-	secs = lnet_router_check_interval(rtr);
			
 
				-
			
 
				-	CDEBUG(D_NET,
			
 
				-	       "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
			
 
				-	       libcfs_nid2str(rtr->lp_nid), secs,
			
 
				-	       rtr->lp_ping_deadline, rtr->lp_ping_notsent,
			
 
				-	       rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
			
 
				-
			
 
				-	if (secs && !rtr->lp_ping_notsent &&
			
 
				-	    time_after(now, rtr->lp_ping_timestamp + secs * HZ)) {
			
 
				-		int rc;
			
 
				-		struct lnet_process_id id;
			
 
				-		struct lnet_handle_md mdh;
			
 
				-
			
 
				-		id.nid = rtr->lp_nid;
			
 
				-		id.pid = LNET_PID_LUSTRE;
			
 
				-		CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
			
 
				-
			
 
				-		rtr->lp_ping_notsent   = 1;
			
 
				-		rtr->lp_ping_timestamp = now;
			
 
				-
			
 
				-		mdh = rcd->rcd_mdh;
			
 
				-
			
 
				-		if (!rtr->lp_ping_deadline) {
			
 
				-			rtr->lp_ping_deadline =
			
 
				-				jiffies + router_ping_timeout * HZ;
			
 
				-		}
			
 
				-
			
 
				-		lnet_net_unlock(rtr->lp_cpt);
			
 
				-
			
 
				-		rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
			
 
				-			     LNET_PROTO_PING_MATCHBITS, 0);
			
 
				-
			
 
				-		lnet_net_lock(rtr->lp_cpt);
			
 
				-		if (rc)
			
 
				-			rtr->lp_ping_notsent = 0; /* no event pending */
			
 
				-	}
			
 
				-
			
 
				-	lnet_peer_decref_locked(rtr);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_router_checker_start(void)
			
 
				-{
			
 
				-	struct task_struct *task;
			
 
				-	int rc;
			
 
				-	int eqsz = 0;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
			
 
				-
			
 
				-	if (check_routers_before_use &&
			
 
				-	    dead_router_check_interval <= 0) {
			
 
				-		LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	init_completion(&the_lnet.ln_rc_signal);
			
 
				-
			
 
				-	rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
			
 
				-	task = kthread_run(lnet_router_checker, NULL, "router_checker");
			
 
				-	if (IS_ERR(task)) {
			
 
				-		rc = PTR_ERR(task);
			
 
				-		CERROR("Can't start router checker thread: %d\n", rc);
			
 
				-		/* block until event callback signals exit */
			
 
				-		wait_for_completion(&the_lnet.ln_rc_signal);
			
 
				-		rc = LNetEQFree(the_lnet.ln_rc_eqh);
			
 
				-		LASSERT(!rc);
			
 
				-		the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	if (check_routers_before_use) {
			
 
				-		/*
			
 
				-		 * Note that a helpful side-effect of pinging all known routers
			
 
				-		 * at startup is that it makes them drop stale connections they
			
 
				-		 * may have to a previous instance of me.
			
 
				-		 */
			
 
				-		lnet_wait_known_routerstate();
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_router_checker_stop(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
			
 
				-		return;
			
 
				-
			
 
				-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
			
 
				-	the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
			
 
				-	/* wakeup the RC thread if it's sleeping */
			
 
				-	wake_up(&the_lnet.ln_rc_waitq);
			
 
				-
			
 
				-	/* block until event callback signals exit */
			
 
				-	wait_for_completion(&the_lnet.ln_rc_signal);
			
 
				-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
			
 
				-
			
 
				-	rc = LNetEQFree(the_lnet.ln_rc_eqh);
			
 
				-	LASSERT(!rc);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_prune_rc_data(int wait_unlink)
			
 
				-{
			
 
				-	struct lnet_rc_data *rcd;
			
 
				-	struct lnet_rc_data *tmp;
			
 
				-	struct lnet_peer *lp;
			
 
				-	struct list_head head;
			
 
				-	int i = 2;
			
 
				-
			
 
				-	if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
			
 
				-		   list_empty(&the_lnet.ln_rcd_deathrow) &&
			
 
				-		   list_empty(&the_lnet.ln_rcd_zombie)))
			
 
				-		return;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&head);
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-
			
 
				-	if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
			
 
				-		/* router checker is stopping, prune all */
			
 
				-		list_for_each_entry(lp, &the_lnet.ln_routers,
			
 
				-				    lp_rtr_list) {
			
 
				-			if (!lp->lp_rcd)
			
 
				-				continue;
			
 
				-
			
 
				-			LASSERT(list_empty(&lp->lp_rcd->rcd_list));
			
 
				-			list_add(&lp->lp_rcd->rcd_list,
			
 
				-				 &the_lnet.ln_rcd_deathrow);
			
 
				-			lp->lp_rcd = NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* unlink all RCDs on deathrow list */
			
 
				-	list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
			
 
				-
			
 
				-	if (!list_empty(&head)) {
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-		list_for_each_entry(rcd, &head, rcd_list)
			
 
				-			LNetMDUnlink(rcd->rcd_mdh);
			
 
				-
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-	}
			
 
				-
			
 
				-	list_splice_init(&head, &the_lnet.ln_rcd_zombie);
			
 
				-
			
 
				-	/* release all zombie RCDs */
			
 
				-	while (!list_empty(&the_lnet.ln_rcd_zombie)) {
			
 
				-		list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
			
 
				-					 rcd_list) {
			
 
				-			if (LNetMDHandleIsInvalid(rcd->rcd_mdh))
			
 
				-				list_move(&rcd->rcd_list, &head);
			
 
				-		}
			
 
				-
			
 
				-		wait_unlink = wait_unlink &&
			
 
				-			      !list_empty(&the_lnet.ln_rcd_zombie);
			
 
				-
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-		while (!list_empty(&head)) {
			
 
				-			rcd = list_entry(head.next,
			
 
				-					 struct lnet_rc_data, rcd_list);
			
 
				-			list_del_init(&rcd->rcd_list);
			
 
				-			lnet_destroy_rc_data(rcd);
			
 
				-		}
			
 
				-
			
 
				-		if (!wait_unlink)
			
 
				-			return;
			
 
				-
			
 
				-		i++;
			
 
				-		CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
			
 
				-		       "Waiting for rc buffers to unlink\n");
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ / 4);
			
 
				-
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This function is called to check if the RC should block indefinitely.
			
 
				- * It's called from lnet_router_checker() as well as being passed to
			
 
				- * wait_event_interruptible() to avoid the lost wake_up problem.
			
 
				- *
			
 
				- * When it's called from wait_event_interruptible() it is necessary to
			
 
				- * also not sleep if the rc state is not running to avoid a deadlock
			
 
				- * when the system is shutting down
			
 
				- */
			
 
				-static inline bool
			
 
				-lnet_router_checker_active(void)
			
 
				-{
			
 
				-	if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING)
			
 
				-		return true;
			
 
				-
			
 
				-	/*
			
 
				-	 * Router Checker thread needs to run when routing is enabled in
			
 
				-	 * order to call lnet_update_ni_status_locked()
			
 
				-	 */
			
 
				-	if (the_lnet.ln_routing)
			
 
				-		return true;
			
 
				-
			
 
				-	return !list_empty(&the_lnet.ln_routers) &&
			
 
				-		(live_router_check_interval > 0 ||
			
 
				-		 dead_router_check_interval > 0);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_router_checker(void *arg)
			
 
				-{
			
 
				-	struct lnet_peer *rtr;
			
 
				-	struct list_head *entry;
			
 
				-
			
 
				-	while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
			
 
				-		__u64 version;
			
 
				-		int cpt;
			
 
				-		int cpt2;
			
 
				-
			
 
				-		cpt = lnet_net_lock_current();
			
 
				-rescan:
			
 
				-		version = the_lnet.ln_routers_version;
			
 
				-
			
 
				-		list_for_each(entry, &the_lnet.ln_routers) {
			
 
				-			rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
			
 
				-
			
 
				-			cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
			
 
				-			if (cpt != cpt2) {
			
 
				-				lnet_net_unlock(cpt);
			
 
				-				cpt = cpt2;
			
 
				-				lnet_net_lock(cpt);
			
 
				-				/* the routers list has changed */
			
 
				-				if (version != the_lnet.ln_routers_version)
			
 
				-					goto rescan;
			
 
				-			}
			
 
				-
			
 
				-			lnet_ping_router_locked(rtr);
			
 
				-
			
 
				-			/* NB dropped lock */
			
 
				-			if (version != the_lnet.ln_routers_version) {
			
 
				-				/* the routers list has changed */
			
 
				-				goto rescan;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if (the_lnet.ln_routing)
			
 
				-			lnet_update_ni_status_locked();
			
 
				-
			
 
				-		lnet_net_unlock(cpt);
			
 
				-
			
 
				-		lnet_prune_rc_data(0); /* don't wait for UNLINK */
			
 
				-
			
 
				-		/*
			
 
				-		 * Call schedule_timeout() here always adds 1 to load average
			
 
				-		 * because kernel counts # active tasks as nr_running
			
 
				-		 * + nr_uninterruptible.
			
 
				-		 */
			
 
				-		/*
			
 
				-		 * if there are any routes then wakeup every second.  If
			
 
				-		 * there are no routes then sleep indefinitely until woken
			
 
				-		 * up by a user adding a route
			
 
				-		 */
			
 
				-		if (!lnet_router_checker_active())
			
 
				-			wait_event_interruptible(the_lnet.ln_rc_waitq,
			
 
				-						 lnet_router_checker_active());
			
 
				-		else
			
 
				-			wait_event_interruptible_timeout(the_lnet.ln_rc_waitq,
			
 
				-							 false,
			
 
				-							 HZ);
			
 
				-	}
			
 
				-
			
 
				-	lnet_prune_rc_data(1); /* wait for UNLINK */
			
 
				-
			
 
				-	the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
			
 
				-	complete(&the_lnet.ln_rc_signal);
			
 
				-	/* The unlink event callback will signal final completion */
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages)
			
 
				-{
			
 
				-	while (--npages >= 0)
			
 
				-		__free_page(rb->rb_kiov[npages].bv_page);
			
 
				-
			
 
				-	kfree(rb);
			
 
				-}
			
 
				-
			
 
				-static struct lnet_rtrbuf *
			
 
				-lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
			
 
				-{
			
 
				-	int npages = rbp->rbp_npages;
			
 
				-	int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
			
 
				-	struct page *page;
			
 
				-	struct lnet_rtrbuf *rb;
			
 
				-	int i;
			
 
				-
			
 
				-	rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
			
 
				-	if (!rb)
			
 
				-		return NULL;
			
 
				-
			
 
				-	rb->rb_pool = rbp;
			
 
				-
			
 
				-	for (i = 0; i < npages; i++) {
			
 
				-		page = alloc_pages_node(
			
 
				-				cfs_cpt_spread_node(lnet_cpt_table(), cpt),
			
 
				-				GFP_KERNEL | __GFP_ZERO, 0);
			
 
				-		if (!page) {
			
 
				-			while (--i >= 0)
			
 
				-				__free_page(rb->rb_kiov[i].bv_page);
			
 
				-
			
 
				-			kfree(rb);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-
			
 
				-		rb->rb_kiov[i].bv_len = PAGE_SIZE;
			
 
				-		rb->rb_kiov[i].bv_offset = 0;
			
 
				-		rb->rb_kiov[i].bv_page = page;
			
 
				-	}
			
 
				-
			
 
				-	return rb;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
			
 
				-{
			
 
				-	int npages = rbp->rbp_npages;
			
 
				-	struct list_head tmp;
			
 
				-	struct lnet_rtrbuf *rb;
			
 
				-	struct lnet_rtrbuf *temp;
			
 
				-
			
 
				-	if (!rbp->rbp_nbuffers) /* not initialized or already freed */
			
 
				-		return;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&tmp);
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-	lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt);
			
 
				-	list_splice_init(&rbp->rbp_bufs, &tmp);
			
 
				-	rbp->rbp_req_nbuffers = 0;
			
 
				-	rbp->rbp_nbuffers = 0;
			
 
				-	rbp->rbp_credits = 0;
			
 
				-	rbp->rbp_mincredits = 0;
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	/* Free buffers on the free list. */
			
 
				-	list_for_each_entry_safe(rb, temp, &tmp, rb_list) {
			
 
				-		list_del(&rb->rb_list);
			
 
				-		lnet_destroy_rtrbuf(rb, npages);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
			
 
				-{
			
 
				-	struct list_head rb_list;
			
 
				-	struct lnet_rtrbuf *rb;
			
 
				-	int num_rb;
			
 
				-	int num_buffers = 0;
			
 
				-	int old_req_nbufs;
			
 
				-	int npages = rbp->rbp_npages;
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-	/*
			
 
				-	 * If we are called for less buffers than already in the pool, we
			
 
				-	 * just lower the req_nbuffers number and excess buffers will be
			
 
				-	 * thrown away as they are returned to the free list.  Credits
			
 
				-	 * then get adjusted as well.
			
 
				-	 * If we already have enough buffers allocated to serve the
			
 
				-	 * increase requested, then we can treat that the same way as we
			
 
				-	 * do the decrease.
			
 
				-	 */
			
 
				-	num_rb = nbufs - rbp->rbp_nbuffers;
			
 
				-	if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) {
			
 
				-		rbp->rbp_req_nbuffers = nbufs;
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		return 0;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * store the older value of rbp_req_nbuffers and then set it to
			
 
				-	 * the new request to prevent lnet_return_rx_credits_locked() from
			
 
				-	 * freeing buffers that we need to keep around
			
 
				-	 */
			
 
				-	old_req_nbufs = rbp->rbp_req_nbuffers;
			
 
				-	rbp->rbp_req_nbuffers = nbufs;
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&rb_list);
			
 
				-
			
 
				-	/*
			
 
				-	 * allocate the buffers on a local list first.  If all buffers are
			
 
				-	 * allocated successfully then join this list to the rbp buffer
			
 
				-	 * list. If not then free all allocated buffers.
			
 
				-	 */
			
 
				-	while (num_rb-- > 0) {
			
 
				-		rb = lnet_new_rtrbuf(rbp, cpt);
			
 
				-		if (!rb) {
			
 
				-			CERROR("Failed to allocate %d route bufs of %d pages\n",
			
 
				-			       nbufs, npages);
			
 
				-
			
 
				-			lnet_net_lock(cpt);
			
 
				-			rbp->rbp_req_nbuffers = old_req_nbufs;
			
 
				-			lnet_net_unlock(cpt);
			
 
				-
			
 
				-			goto failed;
			
 
				-		}
			
 
				-
			
 
				-		list_add(&rb->rb_list, &rb_list);
			
 
				-		num_buffers++;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-
			
 
				-	list_splice_tail(&rb_list, &rbp->rbp_bufs);
			
 
				-	rbp->rbp_nbuffers += num_buffers;
			
 
				-	rbp->rbp_credits += num_buffers;
			
 
				-	rbp->rbp_mincredits = rbp->rbp_credits;
			
 
				-	/*
			
 
				-	 * We need to schedule blocked msg using the newly
			
 
				-	 * added buffers.
			
 
				-	 */
			
 
				-	while (!list_empty(&rbp->rbp_bufs) &&
			
 
				-	       !list_empty(&rbp->rbp_msgs))
			
 
				-		lnet_schedule_blocked_locked(rbp);
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-failed:
			
 
				-	while (!list_empty(&rb_list)) {
			
 
				-		rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
			
 
				-		list_del(&rb->rb_list);
			
 
				-		lnet_destroy_rtrbuf(rb, npages);
			
 
				-	}
			
 
				-
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages)
			
 
				-{
			
 
				-	INIT_LIST_HEAD(&rbp->rbp_msgs);
			
 
				-	INIT_LIST_HEAD(&rbp->rbp_bufs);
			
 
				-
			
 
				-	rbp->rbp_npages = npages;
			
 
				-	rbp->rbp_credits = 0;
			
 
				-	rbp->rbp_mincredits = 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_rtrpools_free(int keep_pools)
			
 
				-{
			
 
				-	struct lnet_rtrbufpool *rtrp;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!the_lnet.ln_rtrpools) /* uninitialized or freed */
			
 
				-		return;
			
 
				-
			
 
				-	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				-		lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i);
			
 
				-		lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i);
			
 
				-		lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i);
			
 
				-	}
			
 
				-
			
 
				-	if (!keep_pools) {
			
 
				-		cfs_percpt_free(the_lnet.ln_rtrpools);
			
 
				-		the_lnet.ln_rtrpools = NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_nrb_tiny_calculate(void)
			
 
				-{
			
 
				-	int nrbs = LNET_NRB_TINY;
			
 
				-
			
 
				-	if (tiny_router_buffers < 0) {
			
 
				-		LCONSOLE_ERROR_MSG(0x10c,
			
 
				-				   "tiny_router_buffers=%d invalid when routing enabled\n",
			
 
				-				   tiny_router_buffers);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (tiny_router_buffers > 0)
			
 
				-		nrbs = tiny_router_buffers;
			
 
				-
			
 
				-	nrbs /= LNET_CPT_NUMBER;
			
 
				-	return max(nrbs, LNET_NRB_TINY_MIN);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_nrb_small_calculate(void)
			
 
				-{
			
 
				-	int nrbs = LNET_NRB_SMALL;
			
 
				-
			
 
				-	if (small_router_buffers < 0) {
			
 
				-		LCONSOLE_ERROR_MSG(0x10c,
			
 
				-				   "small_router_buffers=%d invalid when routing enabled\n",
			
 
				-				   small_router_buffers);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (small_router_buffers > 0)
			
 
				-		nrbs = small_router_buffers;
			
 
				-
			
 
				-	nrbs /= LNET_CPT_NUMBER;
			
 
				-	return max(nrbs, LNET_NRB_SMALL_MIN);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_nrb_large_calculate(void)
			
 
				-{
			
 
				-	int nrbs = LNET_NRB_LARGE;
			
 
				-
			
 
				-	if (large_router_buffers < 0) {
			
 
				-		LCONSOLE_ERROR_MSG(0x10c,
			
 
				-				   "large_router_buffers=%d invalid when routing enabled\n",
			
 
				-				   large_router_buffers);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (large_router_buffers > 0)
			
 
				-		nrbs = large_router_buffers;
			
 
				-
			
 
				-	nrbs /= LNET_CPT_NUMBER;
			
 
				-	return max(nrbs, LNET_NRB_LARGE_MIN);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_rtrpools_alloc(int im_a_router)
			
 
				-{
			
 
				-	struct lnet_rtrbufpool *rtrp;
			
 
				-	int nrb_tiny;
			
 
				-	int nrb_small;
			
 
				-	int nrb_large;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!strcmp(forwarding, "")) {
			
 
				-		/* not set either way */
			
 
				-		if (!im_a_router)
			
 
				-			return 0;
			
 
				-	} else if (!strcmp(forwarding, "disabled")) {
			
 
				-		/* explicitly disabled */
			
 
				-		return 0;
			
 
				-	} else if (!strcmp(forwarding, "enabled")) {
			
 
				-		/* explicitly enabled */
			
 
				-	} else {
			
 
				-		LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	nrb_tiny = lnet_nrb_tiny_calculate();
			
 
				-	if (nrb_tiny < 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	nrb_small = lnet_nrb_small_calculate();
			
 
				-	if (nrb_small < 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	nrb_large = lnet_nrb_large_calculate();
			
 
				-	if (nrb_large < 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-						LNET_NRBPOOLS *
			
 
				-						sizeof(struct lnet_rtrbufpool));
			
 
				-	if (!the_lnet.ln_rtrpools) {
			
 
				-		LCONSOLE_ERROR_MSG(0x10c,
			
 
				-				   "Failed to initialize router buffe pool\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				-		lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
			
 
				-		rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
			
 
				-					      nrb_tiny, i);
			
 
				-		if (rc)
			
 
				-			goto failed;
			
 
				-
			
 
				-		lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
			
 
				-				  LNET_NRB_SMALL_PAGES);
			
 
				-		rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
			
 
				-					      nrb_small, i);
			
 
				-		if (rc)
			
 
				-			goto failed;
			
 
				-
			
 
				-		lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
			
 
				-				  LNET_NRB_LARGE_PAGES);
			
 
				-		rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
			
 
				-					      nrb_large, i);
			
 
				-		if (rc)
			
 
				-			goto failed;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	the_lnet.ln_routing = 1;
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	lnet_rtrpools_free(0);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_rtrpools_adjust_helper(int tiny, int small, int large)
			
 
				-{
			
 
				-	int nrb = 0;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-	struct lnet_rtrbufpool *rtrp;
			
 
				-
			
 
				-	/*
			
 
				-	 * If the provided values for each buffer pool are different than the
			
 
				-	 * configured values, we need to take action.
			
 
				-	 */
			
 
				-	if (tiny >= 0) {
			
 
				-		tiny_router_buffers = tiny;
			
 
				-		nrb = lnet_nrb_tiny_calculate();
			
 
				-		cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				-			rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
			
 
				-						      nrb, i);
			
 
				-			if (rc)
			
 
				-				return rc;
			
 
				-		}
			
 
				-	}
			
 
				-	if (small >= 0) {
			
 
				-		small_router_buffers = small;
			
 
				-		nrb = lnet_nrb_small_calculate();
			
 
				-		cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				-			rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
			
 
				-						      nrb, i);
			
 
				-			if (rc)
			
 
				-				return rc;
			
 
				-		}
			
 
				-	}
			
 
				-	if (large >= 0) {
			
 
				-		large_router_buffers = large;
			
 
				-		nrb = lnet_nrb_large_calculate();
			
 
				-		cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				-			rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
			
 
				-						      nrb, i);
			
 
				-			if (rc)
			
 
				-				return rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_rtrpools_adjust(int tiny, int small, int large)
			
 
				-{
			
 
				-	/*
			
 
				-	 * this function doesn't revert the changes if adding new buffers
			
 
				-	 * failed.  It's up to the user space caller to revert the
			
 
				-	 * changes.
			
 
				-	 */
			
 
				-	if (!the_lnet.ln_routing)
			
 
				-		return 0;
			
 
				-
			
 
				-	return lnet_rtrpools_adjust_helper(tiny, small, large);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_rtrpools_enable(void)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (the_lnet.ln_routing)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (!the_lnet.ln_rtrpools)
			
 
				-		/*
			
 
				-		 * If routing is turned off, and we have never
			
 
				-		 * initialized the pools before, just call the
			
 
				-		 * standard buffer pool allocation routine as
			
 
				-		 * if we are just configuring this for the first
			
 
				-		 * time.
			
 
				-		 */
			
 
				-		rc = lnet_rtrpools_alloc(1);
			
 
				-	else
			
 
				-		rc = lnet_rtrpools_adjust_helper(0, 0, 0);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	the_lnet.ln_routing = 1;
			
 
				-
			
 
				-	the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lnet_rtrpools_disable(void)
			
 
				-{
			
 
				-	if (!the_lnet.ln_routing)
			
 
				-		return;
			
 
				-
			
 
				-	lnet_net_lock(LNET_LOCK_EX);
			
 
				-	the_lnet.ln_routing = 0;
			
 
				-	the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
			
 
				-
			
 
				-	tiny_router_buffers = 0;
			
 
				-	small_router_buffers = 0;
			
 
				-	large_router_buffers = 0;
			
 
				-	lnet_net_unlock(LNET_LOCK_EX);
			
 
				-	lnet_rtrpools_free(1);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when)
			
 
				-{
			
 
				-	struct lnet_peer *lp = NULL;
			
 
				-	unsigned long now = jiffies;
			
 
				-	int cpt = lnet_cpt_of_nid(nid);
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	CDEBUG(D_NET, "%s notifying %s: %s\n",
			
 
				-	       !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
			
 
				-	       libcfs_nid2str(nid),
			
 
				-	       alive ? "up" : "down");
			
 
				-
			
 
				-	if (ni &&
			
 
				-	    LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
			
 
				-		CWARN("Ignoring notification of %s %s by %s (different net)\n",
			
 
				-		      libcfs_nid2str(nid), alive ? "birth" : "death",
			
 
				-		      libcfs_nid2str(ni->ni_nid));
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	/* can't do predictions... */
			
 
				-	if (time_after(when, now)) {
			
 
				-		CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
			
 
				-		      !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
			
 
				-		      libcfs_nid2str(nid), alive ? "up" : "down",
			
 
				-		      (when - now) / HZ);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (ni && !alive &&	     /* LND telling me she's down */
			
 
				-	    !auto_down) {		       /* auto-down disabled */
			
 
				-		CDEBUG(D_NET, "Auto-down disabled\n");
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	lnet_net_lock(cpt);
			
 
				-
			
 
				-	if (the_lnet.ln_shutdown) {
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
			
 
				-	if (!lp) {
			
 
				-		/* nid not found */
			
 
				-		lnet_net_unlock(cpt);
			
 
				-		CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * We can't fully trust LND on reporting exact peer last_alive
			
 
				-	 * if he notifies us about dead peer. For example ksocklnd can
			
 
				-	 * call us with when == _time_when_the_node_was_booted_ if
			
 
				-	 * no connections were successfully established
			
 
				-	 */
			
 
				-	if (ni && !alive && when < lp->lp_last_alive)
			
 
				-		when = lp->lp_last_alive;
			
 
				-
			
 
				-	lnet_notify_locked(lp, !ni, alive, when);
			
 
				-
			
 
				-	if (ni)
			
 
				-		lnet_ni_notify_locked(ni, lp);
			
 
				-
			
 
				-	lnet_peer_decref_locked(lp);
			
 
				-
			
 
				-	lnet_net_unlock(cpt);
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL(lnet_notify);
			
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ b/drivers/staging/lustre/lnet/lnet/router_proc.c
@@ -1,907 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- *
			
 
				- *   This file is part of Portals
			
 
				- *   http://sourceforge.net/projects/sandiaportals/
			
 
				- *
			
 
				- *   Portals is free software; you can redistribute it and/or
			
 
				- *   modify it under the terms of version 2 of the GNU General Public
			
 
				- *   License as published by the Free Software Foundation.
			
 
				- *
			
 
				- *   Portals is distributed in the hope that it will be useful,
			
 
				- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- *   GNU General Public License for more details.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-
			
 
				-/*
			
 
				- * This is really lnet_proc.c. You might need to update sanity test 215
			
 
				- * if any file format is changed.
			
 
				- */
			
 
				-
			
 
				-#define LNET_LOFFT_BITS		(sizeof(loff_t) * 8)
			
 
				-/*
			
 
				- * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
			
 
				- */
			
 
				-#define LNET_PROC_CPT_BITS	(LNET_CPT_BITS + 1)
			
 
				-/* change version, 16 bits or 8 bits */
			
 
				-#define LNET_PROC_VER_BITS	max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8)
			
 
				-
			
 
				-#define LNET_PROC_HASH_BITS	LNET_PEER_HASH_BITS
			
 
				-/*
			
 
				- * bits for peer hash offset
			
 
				- * NB: we don't use the highest bit of *ppos because it's signed
			
 
				- */
			
 
				-#define LNET_PROC_HOFF_BITS	(LNET_LOFFT_BITS -       \
			
 
				-				 LNET_PROC_CPT_BITS -    \
			
 
				-				 LNET_PROC_VER_BITS -    \
			
 
				-				 LNET_PROC_HASH_BITS - 1)
			
 
				-/* bits for hash index + position */
			
 
				-#define LNET_PROC_HPOS_BITS	(LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
			
 
				-/* bits for peer hash table + hash version */
			
 
				-#define LNET_PROC_VPOS_BITS	(LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
			
 
				-
			
 
				-#define LNET_PROC_CPT_MASK	((1ULL << LNET_PROC_CPT_BITS) - 1)
			
 
				-#define LNET_PROC_VER_MASK	((1ULL << LNET_PROC_VER_BITS) - 1)
			
 
				-#define LNET_PROC_HASH_MASK	((1ULL << LNET_PROC_HASH_BITS) - 1)
			
 
				-#define LNET_PROC_HOFF_MASK	((1ULL << LNET_PROC_HOFF_BITS) - 1)
			
 
				-
			
 
				-#define LNET_PROC_CPT_GET(pos)				\
			
 
				-	(int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
			
 
				-
			
 
				-#define LNET_PROC_VER_GET(pos)				\
			
 
				-	(int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
			
 
				-
			
 
				-#define LNET_PROC_HASH_GET(pos)				\
			
 
				-	(int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
			
 
				-
			
 
				-#define LNET_PROC_HOFF_GET(pos)				\
			
 
				-	(int)((pos) & LNET_PROC_HOFF_MASK)
			
 
				-
			
 
				-#define LNET_PROC_POS_MAKE(cpt, ver, hash, off)		\
			
 
				-	(((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) |   \
			
 
				-	((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) |   \
			
 
				-	((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
			
 
				-	((off) & LNET_PROC_HOFF_MASK))
			
 
				-
			
 
				-#define LNET_PROC_VERSION(v)	((unsigned int)((v) & LNET_PROC_VER_MASK))
			
 
				-
			
 
				-static int __proc_lnet_stats(void *data, int write,
			
 
				-			     loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	int rc;
			
 
				-	struct lnet_counters *ctrs;
			
 
				-	int len;
			
 
				-	char *tmpstr;
			
 
				-	const int tmpsiz = 256; /* 7 %u and 4 %llu */
			
 
				-
			
 
				-	if (write) {
			
 
				-		lnet_counters_reset();
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* read */
			
 
				-
			
 
				-	ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS);
			
 
				-	if (!ctrs)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
			
 
				-	if (!tmpstr) {
			
 
				-		kfree(ctrs);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	lnet_counters_get(ctrs);
			
 
				-
			
 
				-	len = snprintf(tmpstr, tmpsiz,
			
 
				-		       "%u %u %u %u %u %u %u %llu %llu %llu %llu",
			
 
				-		       ctrs->msgs_alloc, ctrs->msgs_max,
			
 
				-		       ctrs->errors,
			
 
				-		       ctrs->send_count, ctrs->recv_count,
			
 
				-		       ctrs->route_count, ctrs->drop_count,
			
 
				-		       ctrs->send_length, ctrs->recv_length,
			
 
				-		       ctrs->route_length, ctrs->drop_length);
			
 
				-
			
 
				-	if (pos >= min_t(int, len, strlen(tmpstr)))
			
 
				-		rc = 0;
			
 
				-	else
			
 
				-		rc = cfs_trace_copyout_string(buffer, nob,
			
 
				-					      tmpstr + pos, "\n");
			
 
				-
			
 
				-	kfree(tmpstr);
			
 
				-	kfree(ctrs);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_stats(struct ctl_table *table, int write,
			
 
				-			   void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_lnet_stats);
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_routes(struct ctl_table *table, int write,
			
 
				-			    void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	const int tmpsiz = 256;
			
 
				-	char *tmpstr;
			
 
				-	char *s;
			
 
				-	int rc = 0;
			
 
				-	int len;
			
 
				-	int ver;
			
 
				-	int off;
			
 
				-
			
 
				-	BUILD_BUG_ON(sizeof(loff_t) < 4);
			
 
				-
			
 
				-	off = LNET_PROC_HOFF_GET(*ppos);
			
 
				-	ver = LNET_PROC_VER_GET(*ppos);
			
 
				-
			
 
				-	LASSERT(!write);
			
 
				-
			
 
				-	if (!*lenp)
			
 
				-		return 0;
			
 
				-
			
 
				-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
			
 
				-	if (!tmpstr)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				-
			
 
				-	if (!*ppos) {
			
 
				-		s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
			
 
				-			      the_lnet.ln_routing ? "enabled" : "disabled");
			
 
				-		LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-
			
 
				-		s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n",
			
 
				-			      "net", "hops", "priority", "state", "router");
			
 
				-		LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-
			
 
				-		lnet_net_lock(0);
			
 
				-		ver = (unsigned int)the_lnet.ln_remote_nets_version;
			
 
				-		lnet_net_unlock(0);
			
 
				-		*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				-	} else {
			
 
				-		struct list_head *n;
			
 
				-		struct list_head *r;
			
 
				-		struct lnet_route *route = NULL;
			
 
				-		struct lnet_remotenet *rnet  = NULL;
			
 
				-		int skip  = off - 1;
			
 
				-		struct list_head *rn_list;
			
 
				-		int i;
			
 
				-
			
 
				-		lnet_net_lock(0);
			
 
				-
			
 
				-		if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
			
 
				-			lnet_net_unlock(0);
			
 
				-			kfree(tmpstr);
			
 
				-			return -ESTALE;
			
 
				-		}
			
 
				-
			
 
				-		for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) {
			
 
				-			rn_list = &the_lnet.ln_remote_nets_hash[i];
			
 
				-
			
 
				-			n = rn_list->next;
			
 
				-
			
 
				-			while (n != rn_list && !route) {
			
 
				-				rnet = list_entry(n, struct lnet_remotenet,
			
 
				-						  lrn_list);
			
 
				-
			
 
				-				r = rnet->lrn_routes.next;
			
 
				-
			
 
				-				while (r != &rnet->lrn_routes) {
			
 
				-					struct lnet_route *re;
			
 
				-
			
 
				-					re = list_entry(r, struct lnet_route,
			
 
				-							lr_list);
			
 
				-					if (!skip) {
			
 
				-						route = re;
			
 
				-						break;
			
 
				-					}
			
 
				-
			
 
				-					skip--;
			
 
				-					r = r->next;
			
 
				-				}
			
 
				-
			
 
				-				n = n->next;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if (route) {
			
 
				-			__u32 net = rnet->lrn_net;
			
 
				-			__u32 hops = route->lr_hops;
			
 
				-			unsigned int priority = route->lr_priority;
			
 
				-			lnet_nid_t nid = route->lr_gateway->lp_nid;
			
 
				-			int alive = lnet_is_route_alive(route);
			
 
				-
			
 
				-			s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-				      "%-8s %4u %8u %7s %s\n",
			
 
				-				      libcfs_net2str(net), hops,
			
 
				-				      priority,
			
 
				-				      alive ? "up" : "down",
			
 
				-				      libcfs_nid2str(nid));
			
 
				-			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-		}
			
 
				-
			
 
				-		lnet_net_unlock(0);
			
 
				-	}
			
 
				-
			
 
				-	len = s - tmpstr;     /* how many bytes was written */
			
 
				-
			
 
				-	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				-		rc = -EINVAL;
			
 
				-	} else if (len > 0) { /* wrote something */
			
 
				-		if (copy_to_user(buffer, tmpstr, len)) {
			
 
				-			rc = -EFAULT;
			
 
				-		} else {
			
 
				-			off += 1;
			
 
				-			*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	kfree(tmpstr);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*lenp = len;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_routers(struct ctl_table *table, int write,
			
 
				-			     void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	char *tmpstr;
			
 
				-	char *s;
			
 
				-	const int tmpsiz = 256;
			
 
				-	int len;
			
 
				-	int ver;
			
 
				-	int off;
			
 
				-
			
 
				-	off = LNET_PROC_HOFF_GET(*ppos);
			
 
				-	ver = LNET_PROC_VER_GET(*ppos);
			
 
				-
			
 
				-	LASSERT(!write);
			
 
				-
			
 
				-	if (!*lenp)
			
 
				-		return 0;
			
 
				-
			
 
				-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
			
 
				-	if (!tmpstr)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				-
			
 
				-	if (!*ppos) {
			
 
				-		s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-			      "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
			
 
				-			      "ref", "rtr_ref", "alive_cnt", "state",
			
 
				-			      "last_ping", "ping_sent", "deadline",
			
 
				-			      "down_ni", "router");
			
 
				-		LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-
			
 
				-		lnet_net_lock(0);
			
 
				-		ver = (unsigned int)the_lnet.ln_routers_version;
			
 
				-		lnet_net_unlock(0);
			
 
				-		*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				-	} else {
			
 
				-		struct list_head *r;
			
 
				-		struct lnet_peer *peer = NULL;
			
 
				-		int skip = off - 1;
			
 
				-
			
 
				-		lnet_net_lock(0);
			
 
				-
			
 
				-		if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
			
 
				-			lnet_net_unlock(0);
			
 
				-
			
 
				-			kfree(tmpstr);
			
 
				-			return -ESTALE;
			
 
				-		}
			
 
				-
			
 
				-		r = the_lnet.ln_routers.next;
			
 
				-
			
 
				-		while (r != &the_lnet.ln_routers) {
			
 
				-			struct lnet_peer *lp;
			
 
				-
			
 
				-			lp = list_entry(r, struct lnet_peer, lp_rtr_list);
			
 
				-			if (!skip) {
			
 
				-				peer = lp;
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			skip--;
			
 
				-			r = r->next;
			
 
				-		}
			
 
				-
			
 
				-		if (peer) {
			
 
				-			lnet_nid_t nid = peer->lp_nid;
			
 
				-			unsigned long now = jiffies;
			
 
				-			unsigned long deadline = peer->lp_ping_deadline;
			
 
				-			int nrefs = peer->lp_refcount;
			
 
				-			int nrtrrefs = peer->lp_rtr_refcount;
			
 
				-			int alive_cnt = peer->lp_alive_count;
			
 
				-			int alive = peer->lp_alive;
			
 
				-			int pingsent = !peer->lp_ping_notsent;
			
 
				-			int last_ping = (now - peer->lp_ping_timestamp) / HZ;
			
 
				-			int down_ni = 0;
			
 
				-			struct lnet_route *rtr;
			
 
				-
			
 
				-			if ((peer->lp_ping_feats &
			
 
				-			     LNET_PING_FEAT_NI_STATUS)) {
			
 
				-				list_for_each_entry(rtr, &peer->lp_routes,
			
 
				-						    lr_gwlist) {
			
 
				-					/*
			
 
				-					 * downis on any route should be the
			
 
				-					 * number of downis on the gateway
			
 
				-					 */
			
 
				-					if (rtr->lr_downis) {
			
 
				-						down_ni = rtr->lr_downis;
			
 
				-						break;
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-			if (!deadline)
			
 
				-				s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-					      "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
			
 
				-					      nrefs, nrtrrefs, alive_cnt,
			
 
				-					      alive ? "up" : "down", last_ping,
			
 
				-					      pingsent, "NA", down_ni,
			
 
				-					      libcfs_nid2str(nid));
			
 
				-			else
			
 
				-				s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-					      "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
			
 
				-					      nrefs, nrtrrefs, alive_cnt,
			
 
				-					      alive ? "up" : "down", last_ping,
			
 
				-					      pingsent,
			
 
				-					      (deadline - now) / HZ,
			
 
				-					      down_ni, libcfs_nid2str(nid));
			
 
				-			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-		}
			
 
				-
			
 
				-		lnet_net_unlock(0);
			
 
				-	}
			
 
				-
			
 
				-	len = s - tmpstr;     /* how many bytes was written */
			
 
				-
			
 
				-	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				-		rc = -EINVAL;
			
 
				-	} else if (len > 0) { /* wrote something */
			
 
				-		if (copy_to_user(buffer, tmpstr, len)) {
			
 
				-			rc = -EFAULT;
			
 
				-		} else {
			
 
				-			off += 1;
			
 
				-			*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	kfree(tmpstr);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*lenp = len;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_peers(struct ctl_table *table, int write,
			
 
				-			   void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	const int tmpsiz  = 256;
			
 
				-	struct lnet_peer_table *ptable;
			
 
				-	char *tmpstr;
			
 
				-	char *s;
			
 
				-	int cpt  = LNET_PROC_CPT_GET(*ppos);
			
 
				-	int ver  = LNET_PROC_VER_GET(*ppos);
			
 
				-	int hash = LNET_PROC_HASH_GET(*ppos);
			
 
				-	int hoff = LNET_PROC_HOFF_GET(*ppos);
			
 
				-	int rc = 0;
			
 
				-	int len;
			
 
				-
			
 
				-	BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS);
			
 
				-	LASSERT(!write);
			
 
				-
			
 
				-	if (!*lenp)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (cpt >= LNET_CPT_NUMBER) {
			
 
				-		*lenp = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
			
 
				-	if (!tmpstr)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				-
			
 
				-	if (!*ppos) {
			
 
				-		s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-			      "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
			
 
				-			      "nid", "refs", "state", "last", "max",
			
 
				-			      "rtr", "min", "tx", "min", "queue");
			
 
				-		LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-
			
 
				-		hoff++;
			
 
				-	} else {
			
 
				-		struct lnet_peer *peer;
			
 
				-		struct list_head *p;
			
 
				-		int skip;
			
 
				- again:
			
 
				-		p = NULL;
			
 
				-		peer = NULL;
			
 
				-		skip = hoff - 1;
			
 
				-
			
 
				-		lnet_net_lock(cpt);
			
 
				-		ptable = the_lnet.ln_peer_tables[cpt];
			
 
				-		if (hoff == 1)
			
 
				-			ver = LNET_PROC_VERSION(ptable->pt_version);
			
 
				-
			
 
				-		if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
			
 
				-			lnet_net_unlock(cpt);
			
 
				-			kfree(tmpstr);
			
 
				-			return -ESTALE;
			
 
				-		}
			
 
				-
			
 
				-		while (hash < LNET_PEER_HASH_SIZE) {
			
 
				-			if (!p)
			
 
				-				p = ptable->pt_hash[hash].next;
			
 
				-
			
 
				-			while (p != &ptable->pt_hash[hash]) {
			
 
				-				struct lnet_peer *lp;
			
 
				-
			
 
				-				lp = list_entry(p, struct lnet_peer,
			
 
				-						lp_hashlist);
			
 
				-				if (!skip) {
			
 
				-					peer = lp;
			
 
				-
			
 
				-					/*
			
 
				-					 * minor optimization: start from idx+1
			
 
				-					 * on next iteration if we've just
			
 
				-					 * drained lp_hashlist
			
 
				-					 */
			
 
				-					if (lp->lp_hashlist.next ==
			
 
				-					    &ptable->pt_hash[hash]) {
			
 
				-						hoff = 1;
			
 
				-						hash++;
			
 
				-					} else {
			
 
				-						hoff++;
			
 
				-					}
			
 
				-
			
 
				-					break;
			
 
				-				}
			
 
				-
			
 
				-				skip--;
			
 
				-				p = lp->lp_hashlist.next;
			
 
				-			}
			
 
				-
			
 
				-			if (peer)
			
 
				-				break;
			
 
				-
			
 
				-			p = NULL;
			
 
				-			hoff = 1;
			
 
				-			hash++;
			
 
				-		}
			
 
				-
			
 
				-		if (peer) {
			
 
				-			lnet_nid_t nid = peer->lp_nid;
			
 
				-			int nrefs = peer->lp_refcount;
			
 
				-			int lastalive = -1;
			
 
				-			char *aliveness = "NA";
			
 
				-			int maxcr = peer->lp_ni->ni_peertxcredits;
			
 
				-			int txcr = peer->lp_txcredits;
			
 
				-			int mintxcr = peer->lp_mintxcredits;
			
 
				-			int rtrcr = peer->lp_rtrcredits;
			
 
				-			int minrtrcr = peer->lp_minrtrcredits;
			
 
				-			int txqnob = peer->lp_txqnob;
			
 
				-
			
 
				-			if (lnet_isrouter(peer) ||
			
 
				-			    lnet_peer_aliveness_enabled(peer))
			
 
				-				aliveness = peer->lp_alive ? "up" : "down";
			
 
				-
			
 
				-			if (lnet_peer_aliveness_enabled(peer)) {
			
 
				-				unsigned long now = jiffies;
			
 
				-				long delta;
			
 
				-
			
 
				-				delta = now - peer->lp_last_alive;
			
 
				-				lastalive = (delta) / HZ;
			
 
				-
			
 
				-				/* No need to mess up peers contents with
			
 
				-				 * arbitrarily long integers - it suffices to
			
 
				-				 * know that lastalive is more than 10000s old
			
 
				-				 */
			
 
				-				if (lastalive >= 10000)
			
 
				-					lastalive = 9999;
			
 
				-			}
			
 
				-
			
 
				-			lnet_net_unlock(cpt);
			
 
				-
			
 
				-			s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-				      "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
			
 
				-				      libcfs_nid2str(nid), nrefs, aliveness,
			
 
				-				      lastalive, maxcr, rtrcr, minrtrcr, txcr,
			
 
				-				      mintxcr, txqnob);
			
 
				-			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-
			
 
				-		} else { /* peer is NULL */
			
 
				-			lnet_net_unlock(cpt);
			
 
				-		}
			
 
				-
			
 
				-		if (hash == LNET_PEER_HASH_SIZE) {
			
 
				-			cpt++;
			
 
				-			hash = 0;
			
 
				-			hoff = 1;
			
 
				-			if (!peer && cpt < LNET_CPT_NUMBER)
			
 
				-				goto again;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	len = s - tmpstr;     /* how many bytes was written */
			
 
				-
			
 
				-	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				-		rc = -EINVAL;
			
 
				-	} else if (len > 0) { /* wrote something */
			
 
				-		if (copy_to_user(buffer, tmpstr, len))
			
 
				-			rc = -EFAULT;
			
 
				-		else
			
 
				-			*ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
			
 
				-	}
			
 
				-
			
 
				-	kfree(tmpstr);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*lenp = len;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int __proc_lnet_buffers(void *data, int write,
			
 
				-			       loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	char *s;
			
 
				-	char *tmpstr;
			
 
				-	int tmpsiz;
			
 
				-	int idx;
			
 
				-	int len;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!write);
			
 
				-
			
 
				-	/* (4 %d) * 4 * LNET_CPT_NUMBER */
			
 
				-	tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
			
 
				-	tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
			
 
				-	if (!tmpstr)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				-
			
 
				-	s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-		      "%5s %5s %7s %7s\n",
			
 
				-		      "pages", "count", "credits", "min");
			
 
				-	LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-
			
 
				-	if (!the_lnet.ln_rtrpools)
			
 
				-		goto out; /* I'm not a router */
			
 
				-
			
 
				-	for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
			
 
				-		struct lnet_rtrbufpool *rbp;
			
 
				-
			
 
				-		lnet_net_lock(LNET_LOCK_EX);
			
 
				-		cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
			
 
				-			s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-				      "%5d %5d %7d %7d\n",
			
 
				-				      rbp[idx].rbp_npages,
			
 
				-				      rbp[idx].rbp_nbuffers,
			
 
				-				      rbp[idx].rbp_credits,
			
 
				-				      rbp[idx].rbp_mincredits);
			
 
				-			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-		}
			
 
				-		lnet_net_unlock(LNET_LOCK_EX);
			
 
				-	}
			
 
				-
			
 
				- out:
			
 
				-	len = s - tmpstr;
			
 
				-
			
 
				-	if (pos >= min_t(int, len, strlen(tmpstr)))
			
 
				-		rc = 0;
			
 
				-	else
			
 
				-		rc = cfs_trace_copyout_string(buffer, nob,
			
 
				-					      tmpstr + pos, NULL);
			
 
				-
			
 
				-	kvfree(tmpstr);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_buffers(struct ctl_table *table, int write,
			
 
				-			     void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_lnet_buffers);
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_nis(struct ctl_table *table, int write,
			
 
				-			 void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				-{
			
 
				-	int tmpsiz = 128 * LNET_CPT_NUMBER;
			
 
				-	int rc = 0;
			
 
				-	char *tmpstr;
			
 
				-	char *s;
			
 
				-	int len;
			
 
				-
			
 
				-	LASSERT(!write);
			
 
				-
			
 
				-	if (!*lenp)
			
 
				-		return 0;
			
 
				-
			
 
				-	tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
			
 
				-	if (!tmpstr)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				-
			
 
				-	if (!*ppos) {
			
 
				-		s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-			      "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
			
 
				-			      "nid", "status", "alive", "refs", "peer",
			
 
				-			      "rtr", "max", "tx", "min");
			
 
				-		LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-	} else {
			
 
				-		struct list_head *n;
			
 
				-		struct lnet_ni *ni = NULL;
			
 
				-		int skip = *ppos - 1;
			
 
				-
			
 
				-		lnet_net_lock(0);
			
 
				-
			
 
				-		n = the_lnet.ln_nis.next;
			
 
				-
			
 
				-		while (n != &the_lnet.ln_nis) {
			
 
				-			struct lnet_ni *a_ni;
			
 
				-
			
 
				-			a_ni = list_entry(n, struct lnet_ni, ni_list);
			
 
				-			if (!skip) {
			
 
				-				ni = a_ni;
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			skip--;
			
 
				-			n = n->next;
			
 
				-		}
			
 
				-
			
 
				-		if (ni) {
			
 
				-			struct lnet_tx_queue *tq;
			
 
				-			char *stat;
			
 
				-			time64_t now = ktime_get_real_seconds();
			
 
				-			int last_alive = -1;
			
 
				-			int i;
			
 
				-			int j;
			
 
				-
			
 
				-			if (the_lnet.ln_routing)
			
 
				-				last_alive = now - ni->ni_last_alive;
			
 
				-
			
 
				-			/* @lo forever alive */
			
 
				-			if (ni->ni_lnd->lnd_type == LOLND)
			
 
				-				last_alive = 0;
			
 
				-
			
 
				-			lnet_ni_lock(ni);
			
 
				-			LASSERT(ni->ni_status);
			
 
				-			stat = (ni->ni_status->ns_status ==
			
 
				-				LNET_NI_STATUS_UP) ? "up" : "down";
			
 
				-			lnet_ni_unlock(ni);
			
 
				-
			
 
				-			/*
			
 
				-			 * we actually output credits information for
			
 
				-			 * TX queue of each partition
			
 
				-			 */
			
 
				-			cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
			
 
				-				for (j = 0; ni->ni_cpts &&
			
 
				-				     j < ni->ni_ncpts; j++) {
			
 
				-					if (i == ni->ni_cpts[j])
			
 
				-						break;
			
 
				-				}
			
 
				-
			
 
				-				if (j == ni->ni_ncpts)
			
 
				-					continue;
			
 
				-
			
 
				-				if (i)
			
 
				-					lnet_net_lock(i);
			
 
				-
			
 
				-				s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				-					      "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
			
 
				-					      libcfs_nid2str(ni->ni_nid), stat,
			
 
				-					      last_alive, *ni->ni_refs[i],
			
 
				-					      ni->ni_peertxcredits,
			
 
				-					      ni->ni_peerrtrcredits,
			
 
				-					      tq->tq_credits_max,
			
 
				-					      tq->tq_credits,
			
 
				-					      tq->tq_credits_min);
			
 
				-				if (i)
			
 
				-					lnet_net_unlock(i);
			
 
				-			}
			
 
				-			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				-		}
			
 
				-
			
 
				-		lnet_net_unlock(0);
			
 
				-	}
			
 
				-
			
 
				-	len = s - tmpstr;     /* how many bytes was written */
			
 
				-
			
 
				-	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				-		rc = -EINVAL;
			
 
				-	} else if (len > 0) { /* wrote something */
			
 
				-		if (copy_to_user(buffer, tmpstr, len))
			
 
				-			rc = -EFAULT;
			
 
				-		else
			
 
				-			*ppos += 1;
			
 
				-	}
			
 
				-
			
 
				-	kvfree(tmpstr);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*lenp = len;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-struct lnet_portal_rotors {
			
 
				-	int pr_value;
			
 
				-	const char *pr_name;
			
 
				-	const char *pr_desc;
			
 
				-};
			
 
				-
			
 
				-static struct lnet_portal_rotors	portal_rotors[] = {
			
 
				-	{
			
 
				-		.pr_value = LNET_PTL_ROTOR_OFF,
			
 
				-		.pr_name  = "OFF",
			
 
				-		.pr_desc  = "Turn off message rotor for wildcard portals"
			
 
				-	},
			
 
				-	{
			
 
				-		.pr_value = LNET_PTL_ROTOR_ON,
			
 
				-		.pr_name  = "ON",
			
 
				-		.pr_desc  = "round-robin dispatch all PUT messages for wildcard portals"
			
 
				-	},
			
 
				-	{
			
 
				-		.pr_value = LNET_PTL_ROTOR_RR_RT,
			
 
				-		.pr_name  = "RR_RT",
			
 
				-		.pr_desc  = "round-robin dispatch routed PUT message for wildcard portals"
			
 
				-	},
			
 
				-	{
			
 
				-		.pr_value = LNET_PTL_ROTOR_HASH_RT,
			
 
				-		.pr_name  = "HASH_RT",
			
 
				-		.pr_desc  = "dispatch routed PUT message by hashing source NID for wildcard portals"
			
 
				-	},
			
 
				-	{
			
 
				-		.pr_value = -1,
			
 
				-		.pr_name  = NULL,
			
 
				-		.pr_desc  = NULL
			
 
				-	},
			
 
				-};
			
 
				-
			
 
				-static int __proc_lnet_portal_rotor(void *data, int write,
			
 
				-				    loff_t pos, void __user *buffer, int nob)
			
 
				-{
			
 
				-	const int buf_len = 128;
			
 
				-	char *buf;
			
 
				-	char *tmp;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	buf = kmalloc(buf_len, GFP_KERNEL);
			
 
				-	if (!buf)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	if (!write) {
			
 
				-		lnet_res_lock(0);
			
 
				-
			
 
				-		for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
			
 
				-			if (portal_rotors[i].pr_value == portal_rotor)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(portal_rotors[i].pr_value == portal_rotor);
			
 
				-		lnet_res_unlock(0);
			
 
				-
			
 
				-		rc = snprintf(buf, buf_len,
			
 
				-			      "{\n\tportals: all\n"
			
 
				-			      "\trotor: %s\n\tdescription: %s\n}",
			
 
				-			      portal_rotors[i].pr_name,
			
 
				-			      portal_rotors[i].pr_desc);
			
 
				-
			
 
				-		if (pos >= min_t(int, rc, buf_len)) {
			
 
				-			rc = 0;
			
 
				-		} else {
			
 
				-			rc = cfs_trace_copyout_string(buffer, nob,
			
 
				-						      buf + pos, "\n");
			
 
				-		}
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
			
 
				-	if (rc < 0)
			
 
				-		goto out;
			
 
				-
			
 
				-	tmp = strim(buf);
			
 
				-
			
 
				-	rc = -EINVAL;
			
 
				-	lnet_res_lock(0);
			
 
				-	for (i = 0; portal_rotors[i].pr_name; i++) {
			
 
				-		if (!strncasecmp(portal_rotors[i].pr_name, tmp,
			
 
				-				 strlen(portal_rotors[i].pr_name))) {
			
 
				-			portal_rotor = portal_rotors[i].pr_value;
			
 
				-			rc = 0;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	lnet_res_unlock(0);
			
 
				-out:
			
 
				-	kfree(buf);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int proc_lnet_portal_rotor(struct ctl_table *table, int write,
			
 
				-				  void __user *buffer, size_t *lenp,
			
 
				-				  loff_t *ppos)
			
 
				-{
			
 
				-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
			
 
				-				    __proc_lnet_portal_rotor);
			
 
				-}
			
 
				-
			
 
				-static struct ctl_table lnet_table[] = {
			
 
				-	/*
			
 
				-	 * NB No .strategy entries have been provided since sysctl(8) prefers
			
 
				-	 * to go via /proc for portability.
			
 
				-	 */
			
 
				-	{
			
 
				-		.procname     = "stats",
			
 
				-		.mode         = 0644,
			
 
				-		.proc_handler = &proc_lnet_stats,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname     = "routes",
			
 
				-		.mode         = 0444,
			
 
				-		.proc_handler = &proc_lnet_routes,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname     = "routers",
			
 
				-		.mode         = 0444,
			
 
				-		.proc_handler = &proc_lnet_routers,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname     = "peers",
			
 
				-		.mode         = 0444,
			
 
				-		.proc_handler = &proc_lnet_peers,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname     = "buffers",
			
 
				-		.mode         = 0444,
			
 
				-		.proc_handler = &proc_lnet_buffers,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname     = "nis",
			
 
				-		.mode         = 0444,
			
 
				-		.proc_handler = &proc_lnet_nis,
			
 
				-	},
			
 
				-	{
			
 
				-		.procname     = "portal_rotor",
			
 
				-		.mode         = 0644,
			
 
				-		.proc_handler = &proc_lnet_portal_rotor,
			
 
				-	},
			
 
				-	{
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-void lnet_router_debugfs_init(void)
			
 
				-{
			
 
				-	lustre_insert_debugfs(lnet_table);
			
 
				-}
			
 
				-
			
 
				-void lnet_router_debugfs_fini(void)
			
 
				-{
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/Makefile
+++ b/drivers/staging/lustre/lnet/selftest/Makefile
@@ -1,7 +0,0 @@
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
			
 
				-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
			
 
				-
			
 
				-obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
			
 
				-
			
 
				-lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
			
 
				-		   module.o ping_test.o brw_test.o
			
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -1,526 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/brw_test.c
			
 
				- *
			
 
				- * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#include "selftest.h"
			
 
				-
			
 
				-static int brw_srv_workitems = SFW_TEST_WI_MAX;
			
 
				-module_param(brw_srv_workitems, int, 0644);
			
 
				-MODULE_PARM_DESC(brw_srv_workitems, "# BRW server workitems");
			
 
				-
			
 
				-static int brw_inject_errors;
			
 
				-module_param(brw_inject_errors, int, 0644);
			
 
				-MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default");
			
 
				-
			
 
				-#define BRW_POISON	0xbeefbeefbeefbeefULL
			
 
				-#define BRW_MAGIC	0xeeb0eeb1eeb2eeb3ULL
			
 
				-#define BRW_MSIZE	sizeof(u64)
			
 
				-
			
 
				-static void
			
 
				-brw_client_fini(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct srpc_bulk *bulk;
			
 
				-	struct sfw_test_unit	*tsu;
			
 
				-
			
 
				-	LASSERT(tsi->tsi_is_client);
			
 
				-
			
 
				-	list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
			
 
				-		bulk = tsu->tsu_private;
			
 
				-		if (!bulk)
			
 
				-			continue;
			
 
				-
			
 
				-		srpc_free_bulk(bulk);
			
 
				-		tsu->tsu_private = NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-brw_client_init(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-	int flags;
			
 
				-	int off;
			
 
				-	int npg;
			
 
				-	int len;
			
 
				-	int opc;
			
 
				-	struct srpc_bulk *bulk;
			
 
				-	struct sfw_test_unit *tsu;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-	LASSERT(tsi->tsi_is_client);
			
 
				-
			
 
				-	if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
			
 
				-		struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
			
 
				-
			
 
				-		opc = breq->blk_opc;
			
 
				-		flags = breq->blk_flags;
			
 
				-		npg = breq->blk_npg;
			
 
				-		/*
			
 
				-		 * NB: this is not going to work for variable page size,
			
 
				-		 * but we have to keep it for compatibility
			
 
				-		 */
			
 
				-		len = npg * PAGE_SIZE;
			
 
				-		off = 0;
			
 
				-	} else {
			
 
				-		struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
			
 
				-
			
 
				-		/*
			
 
				-		 * I should never get this step if it's unknown feature
			
 
				-		 * because make_session will reject unknown feature
			
 
				-		 */
			
 
				-		LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
			
 
				-
			
 
				-		opc = breq->blk_opc;
			
 
				-		flags = breq->blk_flags;
			
 
				-		len = breq->blk_len;
			
 
				-		off = breq->blk_offset & ~PAGE_MASK;
			
 
				-		npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
			
 
				-	}
			
 
				-
			
 
				-	if (off % BRW_MSIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (npg > LNET_MAX_IOV || npg <= 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (flags != LST_BRW_CHECK_NONE &&
			
 
				-	    flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
			
 
				-		bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
			
 
				-				       off, npg, len, opc == LST_BRW_READ);
			
 
				-		if (!bulk) {
			
 
				-			brw_client_fini(tsi);
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-
			
 
				-		tsu->tsu_private = bulk;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int brw_inject_one_error(void)
			
 
				-{
			
 
				-	struct timespec64 ts;
			
 
				-
			
 
				-	if (brw_inject_errors <= 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	ktime_get_ts64(&ts);
			
 
				-
			
 
				-	if (!((ts.tv_nsec / NSEC_PER_USEC) & 1))
			
 
				-		return 0;
			
 
				-
			
 
				-	return brw_inject_errors--;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic)
			
 
				-{
			
 
				-	char *addr = page_address(pg) + off;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(addr);
			
 
				-	LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
			
 
				-
			
 
				-	if (pattern == LST_BRW_CHECK_NONE)
			
 
				-		return;
			
 
				-
			
 
				-	if (magic == BRW_MAGIC)
			
 
				-		magic += brw_inject_one_error();
			
 
				-
			
 
				-	if (pattern == LST_BRW_CHECK_SIMPLE) {
			
 
				-		memcpy(addr, &magic, BRW_MSIZE);
			
 
				-		if (len > BRW_MSIZE) {
			
 
				-			addr += PAGE_SIZE - BRW_MSIZE;
			
 
				-			memcpy(addr, &magic, BRW_MSIZE);
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (pattern == LST_BRW_CHECK_FULL) {
			
 
				-		for (i = 0; i < len; i += BRW_MSIZE)
			
 
				-			memcpy(addr + i, &magic, BRW_MSIZE);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LBUG();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic)
			
 
				-{
			
 
				-	char *addr = page_address(pg) + off;
			
 
				-	__u64 data = 0; /* make compiler happy */
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(addr);
			
 
				-	LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
			
 
				-
			
 
				-	if (pattern == LST_BRW_CHECK_NONE)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (pattern == LST_BRW_CHECK_SIMPLE) {
			
 
				-		data = *((__u64 *)addr);
			
 
				-		if (data != magic)
			
 
				-			goto bad_data;
			
 
				-
			
 
				-		if (len > BRW_MSIZE) {
			
 
				-			addr += PAGE_SIZE - BRW_MSIZE;
			
 
				-			data = *((__u64 *)addr);
			
 
				-			if (data != magic)
			
 
				-				goto bad_data;
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (pattern == LST_BRW_CHECK_FULL) {
			
 
				-		for (i = 0; i < len; i += BRW_MSIZE) {
			
 
				-			data = *(u64 *)(addr + i);
			
 
				-			if (data != magic)
			
 
				-				goto bad_data;
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	LBUG();
			
 
				-
			
 
				-bad_data:
			
 
				-	CERROR("Bad data in page %p: %#llx, %#llx expected\n",
			
 
				-	       pg, data, magic);
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct page *pg;
			
 
				-
			
 
				-	for (i = 0; i < bk->bk_niov; i++) {
			
 
				-		int off, len;
			
 
				-
			
 
				-		pg = bk->bk_iovs[i].bv_page;
			
 
				-		off = bk->bk_iovs[i].bv_offset;
			
 
				-		len = bk->bk_iovs[i].bv_len;
			
 
				-		brw_fill_page(pg, off, len, pattern, magic);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct page *pg;
			
 
				-
			
 
				-	for (i = 0; i < bk->bk_niov; i++) {
			
 
				-		int off, len;
			
 
				-
			
 
				-		pg = bk->bk_iovs[i].bv_page;
			
 
				-		off = bk->bk_iovs[i].bv_offset;
			
 
				-		len = bk->bk_iovs[i].bv_len;
			
 
				-		if (brw_check_page(pg, off, len, pattern, magic)) {
			
 
				-			CERROR("Bulk page %p (%d/%d) is corrupted!\n",
			
 
				-			       pg, i, bk->bk_niov);
			
 
				-			return 1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
			
 
				-		    struct srpc_client_rpc **rpcpp)
			
 
				-{
			
 
				-	struct srpc_bulk *bulk = tsu->tsu_private;
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-	struct srpc_brw_reqst *req;
			
 
				-	int flags;
			
 
				-	int npg;
			
 
				-	int len;
			
 
				-	int opc;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-	LASSERT(bulk);
			
 
				-
			
 
				-	if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
			
 
				-		struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
			
 
				-
			
 
				-		opc = breq->blk_opc;
			
 
				-		flags = breq->blk_flags;
			
 
				-		npg = breq->blk_npg;
			
 
				-		len = npg * PAGE_SIZE;
			
 
				-	} else {
			
 
				-		struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
			
 
				-		int off;
			
 
				-
			
 
				-		/*
			
 
				-		 * I should never get this step if it's unknown feature
			
 
				-		 * because make_session will reject unknown feature
			
 
				-		 */
			
 
				-		LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
			
 
				-
			
 
				-		opc = breq->blk_opc;
			
 
				-		flags = breq->blk_flags;
			
 
				-		len = breq->blk_len;
			
 
				-		off = breq->blk_offset;
			
 
				-		npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
			
 
				-	}
			
 
				-
			
 
				-	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg]));
			
 
				-	if (opc == LST_BRW_WRITE)
			
 
				-		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
			
 
				-	else
			
 
				-		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
			
 
				-
			
 
				-	req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
			
 
				-	req->brw_flags = flags;
			
 
				-	req->brw_rw = opc;
			
 
				-	req->brw_len = len;
			
 
				-
			
 
				-	*rpcpp = rpc;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	__u64 magic = BRW_MAGIC;
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-	struct srpc_msg *msg = &rpc->crpc_replymsg;
			
 
				-	struct srpc_brw_reply *reply = &msg->msg_body.brw_reply;
			
 
				-	struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-
			
 
				-	if (rpc->crpc_status) {
			
 
				-		CERROR("BRW RPC to %s failed with %d\n",
			
 
				-		       libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
			
 
				-		if (!tsi->tsi_stopping)	/* rpc could have been aborted */
			
 
				-			atomic_inc(&sn->sn_brw_errors);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_magic != SRPC_MSG_MAGIC) {
			
 
				-		__swab64s(&magic);
			
 
				-		__swab32s(&reply->brw_status);
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(reply->brw_status ? D_WARNING : D_NET,
			
 
				-	       "BRW RPC to %s finished with brw_status: %d\n",
			
 
				-	       libcfs_id2str(rpc->crpc_dest), reply->brw_status);
			
 
				-
			
 
				-	if (reply->brw_status) {
			
 
				-		atomic_inc(&sn->sn_brw_errors);
			
 
				-		rpc->crpc_status = -(int)reply->brw_status;
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (reqst->brw_rw == LST_BRW_WRITE)
			
 
				-		return;
			
 
				-
			
 
				-	if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) {
			
 
				-		CERROR("Bulk data from %s is corrupted!\n",
			
 
				-		       libcfs_id2str(rpc->crpc_dest));
			
 
				-		atomic_inc(&sn->sn_brw_errors);
			
 
				-		rpc->crpc_status = -EBADMSG;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-brw_server_rpc_done(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_bulk *blk = rpc->srpc_bulk;
			
 
				-
			
 
				-	if (!blk)
			
 
				-		return;
			
 
				-
			
 
				-	if (rpc->srpc_status)
			
 
				-		CERROR("Bulk transfer %s %s has failed: %d\n",
			
 
				-		       blk->bk_sink ? "from" : "to",
			
 
				-		       libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
			
 
				-	else
			
 
				-		CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
			
 
				-		       blk->bk_niov, blk->bk_sink ? "from" : "to",
			
 
				-		       libcfs_id2str(rpc->srpc_peer));
			
 
				-
			
 
				-	sfw_free_pages(rpc);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-brw_bulk_ready(struct srpc_server_rpc *rpc, int status)
			
 
				-{
			
 
				-	__u64 magic = BRW_MAGIC;
			
 
				-	struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
			
 
				-	struct srpc_brw_reqst *reqst;
			
 
				-	struct srpc_msg *reqstmsg;
			
 
				-
			
 
				-	LASSERT(rpc->srpc_bulk);
			
 
				-	LASSERT(rpc->srpc_reqstbuf);
			
 
				-
			
 
				-	reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				-	reqst = &reqstmsg->msg_body.brw_reqst;
			
 
				-
			
 
				-	if (status) {
			
 
				-		CERROR("BRW bulk %s failed for RPC from %s: %d\n",
			
 
				-		       reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
			
 
				-		       libcfs_id2str(rpc->srpc_peer), status);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	if (reqst->brw_rw == LST_BRW_READ)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
			
 
				-		__swab64s(&magic);
			
 
				-
			
 
				-	if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) {
			
 
				-		CERROR("Bulk data from %s is corrupted!\n",
			
 
				-		       libcfs_id2str(rpc->srpc_peer));
			
 
				-		reply->brw_status = EBADMSG;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-brw_server_handle(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
			
 
				-	struct srpc_msg *replymsg = &rpc->srpc_replymsg;
			
 
				-	struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				-	struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
			
 
				-	struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
			
 
				-	int npg;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(sv->sv_id == SRPC_SERVICE_BRW);
			
 
				-
			
 
				-	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
			
 
				-		LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				-
			
 
				-		__swab32s(&reqst->brw_rw);
			
 
				-		__swab32s(&reqst->brw_len);
			
 
				-		__swab32s(&reqst->brw_flags);
			
 
				-		__swab64s(&reqst->brw_rpyid);
			
 
				-		__swab64s(&reqst->brw_bulkid);
			
 
				-	}
			
 
				-	LASSERT(reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
			
 
				-
			
 
				-	reply->brw_status = 0;
			
 
				-	rpc->srpc_done = brw_server_rpc_done;
			
 
				-
			
 
				-	if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
			
 
				-	    (reqst->brw_flags != LST_BRW_CHECK_NONE &&
			
 
				-	     reqst->brw_flags != LST_BRW_CHECK_FULL &&
			
 
				-	     reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
			
 
				-		reply->brw_status = EINVAL;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
			
 
				-		replymsg->msg_ses_feats = LST_FEATS_MASK;
			
 
				-		reply->brw_status = EPROTO;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
			
 
				-		/* compat with old version */
			
 
				-		if (reqst->brw_len & ~PAGE_MASK) {
			
 
				-			reply->brw_status = EINVAL;
			
 
				-			return 0;
			
 
				-		}
			
 
				-		npg = reqst->brw_len >> PAGE_SHIFT;
			
 
				-
			
 
				-	} else {
			
 
				-		npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
			
 
				-	}
			
 
				-
			
 
				-	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
			
 
				-
			
 
				-	if (!reqst->brw_len || npg > LNET_MAX_IOV) {
			
 
				-		reply->brw_status = EINVAL;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
			
 
				-			     reqst->brw_len,
			
 
				-			     reqst->brw_rw == LST_BRW_WRITE);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (reqst->brw_rw == LST_BRW_READ)
			
 
				-		brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
			
 
				-	else
			
 
				-		brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-struct sfw_test_client_ops brw_test_client;
			
 
				-
			
 
				-void brw_init_test_client(void)
			
 
				-{
			
 
				-	brw_test_client.tso_init = brw_client_init;
			
 
				-	brw_test_client.tso_fini = brw_client_fini;
			
 
				-	brw_test_client.tso_prep_rpc = brw_client_prep_rpc;
			
 
				-	brw_test_client.tso_done_rpc = brw_client_done_rpc;
			
 
				-};
			
 
				-
			
 
				-struct srpc_service brw_test_service;
			
 
				-
			
 
				-void brw_init_test_service(void)
			
 
				-{
			
 
				-	brw_test_service.sv_id = SRPC_SERVICE_BRW;
			
 
				-	brw_test_service.sv_name = "brw_test";
			
 
				-	brw_test_service.sv_handler = brw_server_handle;
			
 
				-	brw_test_service.sv_bulk_ready = brw_bulk_ready;
			
 
				-	brw_test_service.sv_wi_total = brw_srv_workitems;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -1,801 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/conctl.c
			
 
				- *
			
 
				- * IOC handle in kernel
			
 
				- *
			
 
				- * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <uapi/linux/lnet/lnetst.h>
			
 
				-#include "console.h"
			
 
				-
			
 
				-static int
			
 
				-lst_session_new_ioctl(struct lstio_session_new_args *args)
			
 
				-{
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-	int rc;
			
 
				-
			
 
				-	if (!args->lstio_ses_idp ||	/* address for output sid */
			
 
				-	    !args->lstio_ses_key ||	/* no key is specified */
			
 
				-	    !args->lstio_ses_namep ||	/* session name */
			
 
				-	    args->lstio_ses_nmlen <= 0 ||
			
 
				-	    args->lstio_ses_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_ses_namep,
			
 
				-			   args->lstio_ses_nmlen)) {
			
 
				-		return -EFAULT;
			
 
				-	}
			
 
				-
			
 
				-	name[args->lstio_ses_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_session_new(name,
			
 
				-				args->lstio_ses_key,
			
 
				-				args->lstio_ses_feats,
			
 
				-				args->lstio_ses_timeout,
			
 
				-				args->lstio_ses_force,
			
 
				-				args->lstio_ses_idp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_session_end_ioctl(struct lstio_session_end_args *args)
			
 
				-{
			
 
				-	if (args->lstio_ses_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	return lstcon_session_end();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_session_info_ioctl(struct lstio_session_info_args *args)
			
 
				-{
			
 
				-	/* no checking of key */
			
 
				-
			
 
				-	if (!args->lstio_ses_idp ||	/* address for output sid */
			
 
				-	    !args->lstio_ses_keyp ||	/* address for output key */
			
 
				-	    !args->lstio_ses_featp ||	/* address for output features */
			
 
				-	    !args->lstio_ses_ndinfo ||	/* address for output ndinfo */
			
 
				-	    !args->lstio_ses_namep ||	/* address for output name */
			
 
				-	    args->lstio_ses_nmlen <= 0 ||
			
 
				-	    args->lstio_ses_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	return lstcon_session_info(args->lstio_ses_idp,
			
 
				-				   args->lstio_ses_keyp,
			
 
				-				   args->lstio_ses_featp,
			
 
				-				   args->lstio_ses_ndinfo,
			
 
				-				   args->lstio_ses_namep,
			
 
				-				   args->lstio_ses_nmlen);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_debug_ioctl(struct lstio_debug_args *args)
			
 
				-{
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-	int client = 1;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (args->lstio_dbg_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_dbg_resultp)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_dbg_namep &&	/* name of batch/group */
			
 
				-	    (args->lstio_dbg_nmlen <= 0 ||
			
 
				-	     args->lstio_dbg_nmlen > LST_NAME_SIZE))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_dbg_namep) {
			
 
				-
			
 
				-		if (copy_from_user(name, args->lstio_dbg_namep,
			
 
				-				   args->lstio_dbg_nmlen))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		name[args->lstio_dbg_nmlen] = 0;
			
 
				-	}
			
 
				-
			
 
				-	rc = -EINVAL;
			
 
				-
			
 
				-	switch (args->lstio_dbg_type) {
			
 
				-	case LST_OPC_SESSION:
			
 
				-		rc = lstcon_session_debug(args->lstio_dbg_timeout,
			
 
				-					  args->lstio_dbg_resultp);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_OPC_BATCHSRV:
			
 
				-		client = 0;
			
 
				-		/* fall through */
			
 
				-	case LST_OPC_BATCHCLI:
			
 
				-		if (!args->lstio_dbg_namep)
			
 
				-			goto out;
			
 
				-
			
 
				-		rc = lstcon_batch_debug(args->lstio_dbg_timeout,
			
 
				-					name, client, args->lstio_dbg_resultp);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_OPC_GROUP:
			
 
				-		if (!args->lstio_dbg_namep)
			
 
				-			goto out;
			
 
				-
			
 
				-		rc = lstcon_group_debug(args->lstio_dbg_timeout,
			
 
				-					name, args->lstio_dbg_resultp);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_OPC_NODES:
			
 
				-		if (args->lstio_dbg_count <= 0 ||
			
 
				-		    !args->lstio_dbg_idsp)
			
 
				-			goto out;
			
 
				-
			
 
				-		rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
			
 
				-					args->lstio_dbg_count,
			
 
				-					args->lstio_dbg_idsp,
			
 
				-					args->lstio_dbg_resultp);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_group_add_ioctl(struct lstio_group_add_args *args)
			
 
				-{
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-	int rc;
			
 
				-
			
 
				-	if (args->lstio_grp_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_grp_namep ||
			
 
				-	    args->lstio_grp_nmlen <= 0 ||
			
 
				-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_grp_namep,
			
 
				-			   args->lstio_grp_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_grp_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_group_add(name);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_group_del_ioctl(struct lstio_group_del_args *args)
			
 
				-{
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	if (args->lstio_grp_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_grp_namep ||
			
 
				-	    args->lstio_grp_nmlen <= 0 ||
			
 
				-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_grp_namep,
			
 
				-			   args->lstio_grp_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_grp_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_group_del(name);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_group_update_ioctl(struct lstio_group_update_args *args)
			
 
				-{
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	if (args->lstio_grp_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_grp_resultp ||
			
 
				-	    !args->lstio_grp_namep ||
			
 
				-	    args->lstio_grp_nmlen <= 0 ||
			
 
				-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_grp_namep,
			
 
				-			   args->lstio_grp_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_grp_nmlen] = 0;
			
 
				-
			
 
				-	switch (args->lstio_grp_opc) {
			
 
				-	case LST_GROUP_CLEAN:
			
 
				-		rc = lstcon_group_clean(name, args->lstio_grp_args);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_GROUP_REFRESH:
			
 
				-		rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_GROUP_RMND:
			
 
				-		if (args->lstio_grp_count <= 0 ||
			
 
				-		    !args->lstio_grp_idsp) {
			
 
				-			rc = -EINVAL;
			
 
				-			break;
			
 
				-		}
			
 
				-		rc = lstcon_nodes_remove(name, args->lstio_grp_count,
			
 
				-					 args->lstio_grp_idsp,
			
 
				-					 args->lstio_grp_resultp);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		rc = -EINVAL;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_nodes_add_ioctl(struct lstio_group_nodes_args *args)
			
 
				-{
			
 
				-	unsigned int feats;
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	if (args->lstio_grp_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_grp_idsp ||	/* array of ids */
			
 
				-	    args->lstio_grp_count <= 0 ||
			
 
				-	    !args->lstio_grp_resultp ||
			
 
				-	    !args->lstio_grp_featp ||
			
 
				-	    !args->lstio_grp_namep ||
			
 
				-	    args->lstio_grp_nmlen <= 0 ||
			
 
				-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_grp_namep,
			
 
				-			   args->lstio_grp_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_grp_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_nodes_add(name, args->lstio_grp_count,
			
 
				-			      args->lstio_grp_idsp, &feats,
			
 
				-			      args->lstio_grp_resultp);
			
 
				-
			
 
				-	if (!rc &&
			
 
				-	    copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_group_list_ioctl(struct lstio_group_list_args *args)
			
 
				-{
			
 
				-	if (args->lstio_grp_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (args->lstio_grp_idx < 0 ||
			
 
				-	    !args->lstio_grp_namep ||
			
 
				-	    args->lstio_grp_nmlen <= 0 ||
			
 
				-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	return lstcon_group_list(args->lstio_grp_idx,
			
 
				-				 args->lstio_grp_nmlen,
			
 
				-				 args->lstio_grp_namep);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_group_info_ioctl(struct lstio_group_info_args *args)
			
 
				-{
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-	int ndent;
			
 
				-	int index;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (args->lstio_grp_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_grp_namep ||
			
 
				-	    args->lstio_grp_nmlen <= 0 ||
			
 
				-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (!args->lstio_grp_entp &&	/* output: group entry */
			
 
				-	    !args->lstio_grp_dentsp)	/* output: node entry */
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_grp_dentsp) {		/* have node entry */
			
 
				-		if (!args->lstio_grp_idxp ||	/* node index */
			
 
				-		    !args->lstio_grp_ndentp)	/* # of node entry */
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		if (copy_from_user(&ndent, args->lstio_grp_ndentp,
			
 
				-				   sizeof(ndent)) ||
			
 
				-		    copy_from_user(&index, args->lstio_grp_idxp,
			
 
				-				   sizeof(index)))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		if (ndent <= 0 || index < 0)
			
 
				-			return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_grp_namep,
			
 
				-			   args->lstio_grp_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_grp_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_group_info(name, args->lstio_grp_entp,
			
 
				-			       &index, &ndent, args->lstio_grp_dentsp);
			
 
				-
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (args->lstio_grp_dentsp &&
			
 
				-	    (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
			
 
				-	     copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_batch_add_ioctl(struct lstio_batch_add_args *args)
			
 
				-{
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	if (args->lstio_bat_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_bat_namep ||
			
 
				-	    args->lstio_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_bat_namep,
			
 
				-			   args->lstio_bat_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_bat_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_batch_add(name);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_batch_run_ioctl(struct lstio_batch_run_args *args)
			
 
				-{
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	if (args->lstio_bat_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_bat_namep ||
			
 
				-	    args->lstio_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_bat_namep,
			
 
				-			   args->lstio_bat_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_bat_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_batch_run(name, args->lstio_bat_timeout,
			
 
				-			      args->lstio_bat_resultp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_batch_stop_ioctl(struct lstio_batch_stop_args *args)
			
 
				-{
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	if (args->lstio_bat_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_bat_resultp ||
			
 
				-	    !args->lstio_bat_namep ||
			
 
				-	    args->lstio_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_bat_namep,
			
 
				-			   args->lstio_bat_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_bat_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_batch_stop(name, args->lstio_bat_force,
			
 
				-			       args->lstio_bat_resultp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_batch_query_ioctl(struct lstio_batch_query_args *args)
			
 
				-{
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-	int rc;
			
 
				-
			
 
				-	if (args->lstio_bat_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_bat_resultp ||
			
 
				-	    !args->lstio_bat_namep ||
			
 
				-	    args->lstio_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_bat_testidx < 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_bat_namep,
			
 
				-			   args->lstio_bat_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_bat_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_test_batch_query(name,
			
 
				-				     args->lstio_bat_testidx,
			
 
				-				     args->lstio_bat_client,
			
 
				-				     args->lstio_bat_timeout,
			
 
				-				     args->lstio_bat_resultp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_batch_list_ioctl(struct lstio_batch_list_args *args)
			
 
				-{
			
 
				-	if (args->lstio_bat_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (args->lstio_bat_idx < 0 ||
			
 
				-	    !args->lstio_bat_namep ||
			
 
				-	    args->lstio_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	return lstcon_batch_list(args->lstio_bat_idx,
			
 
				-			      args->lstio_bat_nmlen,
			
 
				-			      args->lstio_bat_namep);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_batch_info_ioctl(struct lstio_batch_info_args *args)
			
 
				-{
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-	int rc;
			
 
				-	int index;
			
 
				-	int ndent;
			
 
				-
			
 
				-	if (args->lstio_bat_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_bat_namep ||	/* batch name */
			
 
				-	    args->lstio_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (!args->lstio_bat_entp &&	/* output: batch entry */
			
 
				-	    !args->lstio_bat_dentsp)	/* output: node entry */
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_bat_dentsp) {		/* have node entry */
			
 
				-		if (!args->lstio_bat_idxp ||	/* node index */
			
 
				-		    !args->lstio_bat_ndentp)	/* # of node entry */
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		if (copy_from_user(&index, args->lstio_bat_idxp,
			
 
				-				   sizeof(index)) ||
			
 
				-		    copy_from_user(&ndent, args->lstio_bat_ndentp,
			
 
				-				   sizeof(ndent)))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		if (ndent <= 0 || index < 0)
			
 
				-			return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (copy_from_user(name, args->lstio_bat_namep,
			
 
				-			   args->lstio_bat_nmlen))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	name[args->lstio_bat_nmlen] = 0;
			
 
				-
			
 
				-	rc = lstcon_batch_info(name, args->lstio_bat_entp,
			
 
				-			       args->lstio_bat_server, args->lstio_bat_testidx,
			
 
				-			       &index, &ndent, args->lstio_bat_dentsp);
			
 
				-
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (args->lstio_bat_dentsp &&
			
 
				-	    (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
			
 
				-	     copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
			
 
				-		rc = -EFAULT;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lst_stat_query_ioctl(struct lstio_stat_args *args)
			
 
				-{
			
 
				-	int rc;
			
 
				-	char name[LST_NAME_SIZE + 1];
			
 
				-
			
 
				-	/* TODO: not finished */
			
 
				-	if (args->lstio_sta_key != console_session.ses_key)
			
 
				-		return -EACCES;
			
 
				-
			
 
				-	if (!args->lstio_sta_resultp)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_sta_idsp) {
			
 
				-		if (args->lstio_sta_count <= 0)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		rc = lstcon_nodes_stat(args->lstio_sta_count,
			
 
				-				       args->lstio_sta_idsp,
			
 
				-				       args->lstio_sta_timeout,
			
 
				-				       args->lstio_sta_resultp);
			
 
				-	} else if (args->lstio_sta_namep) {
			
 
				-		if (args->lstio_sta_nmlen <= 0 ||
			
 
				-		    args->lstio_sta_nmlen > LST_NAME_SIZE)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		rc = copy_from_user(name, args->lstio_sta_namep,
			
 
				-				    args->lstio_sta_nmlen);
			
 
				-		if (!rc)
			
 
				-			rc = lstcon_group_stat(name, args->lstio_sta_timeout,
			
 
				-					       args->lstio_sta_resultp);
			
 
				-		else
			
 
				-			rc = -EFAULT;
			
 
				-	} else {
			
 
				-		rc = -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int lst_test_add_ioctl(struct lstio_test_args *args)
			
 
				-{
			
 
				-	char batch_name[LST_NAME_SIZE + 1];
			
 
				-	char src_name[LST_NAME_SIZE + 1];
			
 
				-	char dst_name[LST_NAME_SIZE + 1];
			
 
				-	void *param = NULL;
			
 
				-	int ret = 0;
			
 
				-	int rc = -ENOMEM;
			
 
				-
			
 
				-	if (!args->lstio_tes_resultp ||
			
 
				-	    !args->lstio_tes_retp ||
			
 
				-	    !args->lstio_tes_bat_name ||	/* no specified batch */
			
 
				-	    args->lstio_tes_bat_nmlen <= 0 ||
			
 
				-	    args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
			
 
				-	    !args->lstio_tes_sgrp_name ||	/* no source group */
			
 
				-	    args->lstio_tes_sgrp_nmlen <= 0 ||
			
 
				-	    args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
			
 
				-	    !args->lstio_tes_dgrp_name ||	/* no target group */
			
 
				-	    args->lstio_tes_dgrp_nmlen <= 0 ||
			
 
				-	    args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (!args->lstio_tes_loop ||		/* negative is infinite */
			
 
				-	    args->lstio_tes_concur <= 0 ||
			
 
				-	    args->lstio_tes_dist <= 0 ||
			
 
				-	    args->lstio_tes_span <= 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* have parameter, check if parameter length is valid */
			
 
				-	if (args->lstio_tes_param &&
			
 
				-	    (args->lstio_tes_param_len <= 0 ||
			
 
				-	     args->lstio_tes_param_len >
			
 
				-	     PAGE_SIZE - sizeof(struct lstcon_test)))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* Enforce zero parameter length if there's no parameter */
			
 
				-	if (!args->lstio_tes_param && args->lstio_tes_param_len)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (args->lstio_tes_param) {
			
 
				-		param = memdup_user(args->lstio_tes_param,
			
 
				-				    args->lstio_tes_param_len);
			
 
				-		if (IS_ERR(param))
			
 
				-			return PTR_ERR(param);
			
 
				-	}
			
 
				-
			
 
				-	rc = -EFAULT;
			
 
				-	if (copy_from_user(batch_name, args->lstio_tes_bat_name,
			
 
				-			   args->lstio_tes_bat_nmlen) ||
			
 
				-	    copy_from_user(src_name, args->lstio_tes_sgrp_name,
			
 
				-			   args->lstio_tes_sgrp_nmlen) ||
			
 
				-	    copy_from_user(dst_name, args->lstio_tes_dgrp_name,
			
 
				-			   args->lstio_tes_dgrp_nmlen))
			
 
				-		goto out;
			
 
				-
			
 
				-	rc = lstcon_test_add(batch_name, args->lstio_tes_type,
			
 
				-			     args->lstio_tes_loop, args->lstio_tes_concur,
			
 
				-			     args->lstio_tes_dist, args->lstio_tes_span,
			
 
				-			     src_name, dst_name, param,
			
 
				-			     args->lstio_tes_param_len,
			
 
				-			     &ret, args->lstio_tes_resultp);
			
 
				-
			
 
				-	if (!rc && ret)
			
 
				-		rc = (copy_to_user(args->lstio_tes_retp, &ret,
			
 
				-				   sizeof(ret))) ? -EFAULT : 0;
			
 
				-out:
			
 
				-	kfree(param);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_ioctl_entry(struct notifier_block *nb,
			
 
				-		   unsigned long cmd, void *vdata)
			
 
				-{
			
 
				-	struct libcfs_ioctl_hdr *hdr = vdata;
			
 
				-	char *buf = NULL;
			
 
				-	struct libcfs_ioctl_data *data;
			
 
				-	int opc;
			
 
				-	int rc = -EINVAL;
			
 
				-
			
 
				-	if (cmd != IOC_LIBCFS_LNETST)
			
 
				-		goto err;
			
 
				-
			
 
				-	data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
			
 
				-
			
 
				-	opc = data->ioc_u32[0];
			
 
				-
			
 
				-	if (data->ioc_plen1 > PAGE_SIZE)
			
 
				-		goto err;
			
 
				-
			
 
				-	buf = kmalloc(data->ioc_plen1, GFP_KERNEL);
			
 
				-	rc = -ENOMEM;
			
 
				-	if (!buf)
			
 
				-		goto err;
			
 
				-
			
 
				-	/* copy in parameter */
			
 
				-	rc = -EFAULT;
			
 
				-	if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1))
			
 
				-		goto err;
			
 
				-
			
 
				-	mutex_lock(&console_session.ses_mutex);
			
 
				-
			
 
				-	console_session.ses_laststamp = ktime_get_real_seconds();
			
 
				-
			
 
				-	if (console_session.ses_shutdown) {
			
 
				-		rc = -ESHUTDOWN;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (console_session.ses_expired)
			
 
				-		lstcon_session_end();
			
 
				-
			
 
				-	if (opc != LSTIO_SESSION_NEW &&
			
 
				-	    console_session.ses_state == LST_SESSION_NONE) {
			
 
				-		CDEBUG(D_NET, "LST no active session\n");
			
 
				-		rc = -ESRCH;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat));
			
 
				-
			
 
				-	switch (opc) {
			
 
				-	case LSTIO_SESSION_NEW:
			
 
				-		rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_SESSION_END:
			
 
				-		rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_SESSION_INFO:
			
 
				-		rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_DEBUG:
			
 
				-		rc = lst_debug_ioctl((struct lstio_debug_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_GROUP_ADD:
			
 
				-		rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_GROUP_DEL:
			
 
				-		rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_GROUP_UPDATE:
			
 
				-		rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_NODES_ADD:
			
 
				-		rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_GROUP_LIST:
			
 
				-		rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_GROUP_INFO:
			
 
				-		rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_BATCH_ADD:
			
 
				-		rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_BATCH_START:
			
 
				-		rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_BATCH_STOP:
			
 
				-		rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_BATCH_QUERY:
			
 
				-		rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_BATCH_LIST:
			
 
				-		rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_BATCH_INFO:
			
 
				-		rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_TEST_ADD:
			
 
				-		rc = lst_test_add_ioctl((struct lstio_test_args *)buf);
			
 
				-		break;
			
 
				-	case LSTIO_STAT_QUERY:
			
 
				-		rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf);
			
 
				-		break;
			
 
				-	default:
			
 
				-		rc = -EINVAL;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
			
 
				-			 sizeof(struct lstcon_trans_stat)))
			
 
				-		rc = -EFAULT;
			
 
				-out:
			
 
				-	mutex_unlock(&console_session.ses_mutex);
			
 
				-err:
			
 
				-	kfree(buf);
			
 
				-
			
 
				-	return notifier_from_ioctl_errno(rc);
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -1,1396 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/conctl.c
			
 
				- *
			
 
				- * Console framework rpcs
			
 
				- *
			
 
				- * Author: Liang Zhen <liang@whamcloud.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include "timer.h"
			
 
				-#include "conrpc.h"
			
 
				-#include "console.h"
			
 
				-
			
 
				-void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *,
			
 
				-			   struct lstcon_node *, struct lstcon_trans_stat *);
			
 
				-
			
 
				-static void
			
 
				-lstcon_rpc_done(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv;
			
 
				-
			
 
				-	LASSERT(crpc && rpc == crpc->crp_rpc);
			
 
				-	LASSERT(crpc->crp_posted && !crpc->crp_finished);
			
 
				-
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-	if (!crpc->crp_trans) {
			
 
				-		/*
			
 
				-		 * Orphan RPC is not in any transaction,
			
 
				-		 * I'm just a poor body and nobody loves me
			
 
				-		 */
			
 
				-		spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-		/* release it */
			
 
				-		lstcon_rpc_put(crpc);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* not an orphan RPC */
			
 
				-	crpc->crp_finished = 1;
			
 
				-
			
 
				-	if (!crpc->crp_stamp) {
			
 
				-		/* not aborted */
			
 
				-		LASSERT(!crpc->crp_status);
			
 
				-
			
 
				-		crpc->crp_stamp = jiffies;
			
 
				-		crpc->crp_status = rpc->crpc_status;
			
 
				-	}
			
 
				-
			
 
				-	/* wakeup (transaction)thread if I'm the last RPC in the transaction */
			
 
				-	if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
			
 
				-		wake_up(&crpc->crp_trans->tas_waitq);
			
 
				-
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats,
			
 
				-		int bulk_npg, int bulk_len, int embedded,
			
 
				-		struct lstcon_rpc *crpc)
			
 
				-{
			
 
				-	crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
			
 
				-				       feats, bulk_npg, bulk_len,
			
 
				-				       lstcon_rpc_done, (void *)crpc);
			
 
				-	if (!crpc->crp_rpc)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	crpc->crp_trans = NULL;
			
 
				-	crpc->crp_node = nd;
			
 
				-	crpc->crp_posted = 0;
			
 
				-	crpc->crp_finished = 0;
			
 
				-	crpc->crp_unpacked = 0;
			
 
				-	crpc->crp_status = 0;
			
 
				-	crpc->crp_stamp = 0;
			
 
				-	crpc->crp_embedded = embedded;
			
 
				-	INIT_LIST_HEAD(&crpc->crp_link);
			
 
				-
			
 
				-	atomic_inc(&console_session.ses_rpc_counter);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats,
			
 
				-		int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp)
			
 
				-{
			
 
				-	struct lstcon_rpc *crpc = NULL;
			
 
				-	int rc;
			
 
				-
			
 
				-	spin_lock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist,
			
 
				-					struct lstcon_rpc, crp_link);
			
 
				-	if (crpc)
			
 
				-		list_del_init(&crpc->crp_link);
			
 
				-
			
 
				-	spin_unlock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	if (!crpc) {
			
 
				-		crpc = kzalloc(sizeof(*crpc), GFP_NOFS);
			
 
				-		if (!crpc)
			
 
				-			return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc);
			
 
				-	if (!rc) {
			
 
				-		*crpcpp = crpc;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	kfree(crpc);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_put(struct lstcon_rpc *crpc)
			
 
				-{
			
 
				-	struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(list_empty(&crpc->crp_link));
			
 
				-
			
 
				-	for (i = 0; i < bulk->bk_niov; i++) {
			
 
				-		if (!bulk->bk_iovs[i].bv_page)
			
 
				-			continue;
			
 
				-
			
 
				-		__free_page(bulk->bk_iovs[i].bv_page);
			
 
				-	}
			
 
				-
			
 
				-	srpc_client_rpc_decref(crpc->crp_rpc);
			
 
				-
			
 
				-	if (crpc->crp_embedded) {
			
 
				-		/* embedded RPC, don't recycle it */
			
 
				-		memset(crpc, 0, sizeof(*crpc));
			
 
				-		crpc->crp_embedded = 1;
			
 
				-
			
 
				-	} else {
			
 
				-		spin_lock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-		list_add(&crpc->crp_link,
			
 
				-			 &console_session.ses_rpc_freelist);
			
 
				-
			
 
				-		spin_unlock(&console_session.ses_rpc_lock);
			
 
				-	}
			
 
				-
			
 
				-	/* RPC is not alive now */
			
 
				-	atomic_dec(&console_session.ses_rpc_counter);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_rpc_post(struct lstcon_rpc *crpc)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans = crpc->crp_trans;
			
 
				-
			
 
				-	LASSERT(trans);
			
 
				-
			
 
				-	atomic_inc(&trans->tas_remaining);
			
 
				-	crpc->crp_posted = 1;
			
 
				-
			
 
				-	sfw_post_rpc(crpc->crp_rpc);
			
 
				-}
			
 
				-
			
 
				-static char *
			
 
				-lstcon_rpc_trans_name(int transop)
			
 
				-{
			
 
				-	if (transop == LST_TRANS_SESNEW)
			
 
				-		return "SESNEW";
			
 
				-
			
 
				-	if (transop == LST_TRANS_SESEND)
			
 
				-		return "SESEND";
			
 
				-
			
 
				-	if (transop == LST_TRANS_SESQRY)
			
 
				-		return "SESQRY";
			
 
				-
			
 
				-	if (transop == LST_TRANS_SESPING)
			
 
				-		return "SESPING";
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBCLIADD)
			
 
				-		return "TSBCLIADD";
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBSRVADD)
			
 
				-		return "TSBSRVADD";
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBRUN)
			
 
				-		return "TSBRUN";
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBSTOP)
			
 
				-		return "TSBSTOP";
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBCLIQRY)
			
 
				-		return "TSBCLIQRY";
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBSRVQRY)
			
 
				-		return "TSBSRVQRY";
			
 
				-
			
 
				-	if (transop == LST_TRANS_STATQRY)
			
 
				-		return "STATQRY";
			
 
				-
			
 
				-	return "Unknown";
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_rpc_trans_prep(struct list_head *translist, int transop,
			
 
				-		      struct lstcon_rpc_trans **transpp)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-
			
 
				-	if (translist) {
			
 
				-		list_for_each_entry(trans, translist, tas_link) {
			
 
				-			/*
			
 
				-			 * Can't enqueue two private transaction on
			
 
				-			 * the same object
			
 
				-			 */
			
 
				-			if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
			
 
				-				return -EPERM;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* create a trans group */
			
 
				-	trans = kzalloc(sizeof(*trans), GFP_NOFS);
			
 
				-	if (!trans)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	trans->tas_opc = transop;
			
 
				-
			
 
				-	if (!translist)
			
 
				-		INIT_LIST_HEAD(&trans->tas_olink);
			
 
				-	else
			
 
				-		list_add_tail(&trans->tas_olink, translist);
			
 
				-
			
 
				-	list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&trans->tas_rpcs_list);
			
 
				-	atomic_set(&trans->tas_remaining, 0);
			
 
				-	init_waitqueue_head(&trans->tas_waitq);
			
 
				-
			
 
				-	spin_lock(&console_session.ses_rpc_lock);
			
 
				-	trans->tas_features = console_session.ses_features;
			
 
				-	spin_unlock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	*transpp = trans;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc)
			
 
				-{
			
 
				-	list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
			
 
				-	crpc->crp_trans = trans;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	struct lstcon_node *nd;
			
 
				-
			
 
				-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				-		rpc = crpc->crp_rpc;
			
 
				-
			
 
				-		spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-		if (!crpc->crp_posted || /* not posted */
			
 
				-		    crpc->crp_stamp) {	 /* rpc done or aborted already */
			
 
				-			if (!crpc->crp_stamp) {
			
 
				-				crpc->crp_stamp = jiffies;
			
 
				-				crpc->crp_status = -EINTR;
			
 
				-			}
			
 
				-			spin_unlock(&rpc->crpc_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		crpc->crp_stamp = jiffies;
			
 
				-		crpc->crp_status = error;
			
 
				-
			
 
				-		spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-		sfw_abort_rpc(rpc);
			
 
				-
			
 
				-		if (error != -ETIMEDOUT)
			
 
				-			continue;
			
 
				-
			
 
				-		nd = crpc->crp_node;
			
 
				-		if (time_after(nd->nd_stamp, crpc->crp_stamp))
			
 
				-			continue;
			
 
				-
			
 
				-		nd->nd_stamp = crpc->crp_stamp;
			
 
				-		nd->nd_state = LST_NODE_DOWN;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans)
			
 
				-{
			
 
				-	if (console_session.ses_shutdown &&
			
 
				-	    !list_empty(&trans->tas_olink)) /* Not an end session RPC */
			
 
				-		return 1;
			
 
				-
			
 
				-	return !atomic_read(&trans->tas_remaining) ? 1 : 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout)
			
 
				-{
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (list_empty(&trans->tas_rpcs_list))
			
 
				-		return 0;
			
 
				-
			
 
				-	if (timeout < LST_TRANS_MIN_TIMEOUT)
			
 
				-		timeout = LST_TRANS_MIN_TIMEOUT;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Transaction %s started\n",
			
 
				-	       lstcon_rpc_trans_name(trans->tas_opc));
			
 
				-
			
 
				-	/* post all requests */
			
 
				-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				-		LASSERT(!crpc->crp_posted);
			
 
				-
			
 
				-		lstcon_rpc_post(crpc);
			
 
				-	}
			
 
				-
			
 
				-	mutex_unlock(&console_session.ses_mutex);
			
 
				-
			
 
				-	rc = wait_event_interruptible_timeout(trans->tas_waitq,
			
 
				-					      lstcon_rpc_trans_check(trans),
			
 
				-					      timeout * HZ);
			
 
				-	rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT);
			
 
				-
			
 
				-	mutex_lock(&console_session.ses_mutex);
			
 
				-
			
 
				-	if (console_session.ses_shutdown)
			
 
				-		rc = -ESHUTDOWN;
			
 
				-
			
 
				-	if (rc || atomic_read(&trans->tas_remaining)) {
			
 
				-		/* treat short timeout as canceled */
			
 
				-		if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
			
 
				-			rc = -EINTR;
			
 
				-
			
 
				-		lstcon_rpc_trans_abort(trans, rc);
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Transaction %s stopped: %d\n",
			
 
				-	       lstcon_rpc_trans_name(trans->tas_opc), rc);
			
 
				-
			
 
				-	lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp)
			
 
				-{
			
 
				-	struct lstcon_node *nd = crpc->crp_node;
			
 
				-	struct srpc_client_rpc *rpc = crpc->crp_rpc;
			
 
				-	struct srpc_generic_reply *rep;
			
 
				-
			
 
				-	LASSERT(nd && rpc);
			
 
				-	LASSERT(crpc->crp_stamp);
			
 
				-
			
 
				-	if (crpc->crp_status) {
			
 
				-		*msgpp = NULL;
			
 
				-		return crpc->crp_status;
			
 
				-	}
			
 
				-
			
 
				-	*msgpp = &rpc->crpc_replymsg;
			
 
				-	if (!crpc->crp_unpacked) {
			
 
				-		sfw_unpack_message(*msgpp);
			
 
				-		crpc->crp_unpacked = 1;
			
 
				-	}
			
 
				-
			
 
				-	if (time_after(nd->nd_stamp, crpc->crp_stamp))
			
 
				-		return 0;
			
 
				-
			
 
				-	nd->nd_stamp = crpc->crp_stamp;
			
 
				-	rep = &(*msgpp)->msg_body.reply;
			
 
				-
			
 
				-	if (rep->sid.ses_nid == LNET_NID_ANY)
			
 
				-		nd->nd_state = LST_NODE_UNKNOWN;
			
 
				-	else if (lstcon_session_match(rep->sid))
			
 
				-		nd->nd_state = LST_NODE_ACTIVE;
			
 
				-	else
			
 
				-		nd->nd_state = LST_NODE_BUSY;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, struct lstcon_trans_stat *stat)
			
 
				-{
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	struct srpc_msg *rep;
			
 
				-	int error;
			
 
				-
			
 
				-	LASSERT(stat);
			
 
				-
			
 
				-	memset(stat, 0, sizeof(*stat));
			
 
				-
			
 
				-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				-		lstcon_rpc_stat_total(stat, 1);
			
 
				-
			
 
				-		LASSERT(crpc->crp_stamp);
			
 
				-
			
 
				-		error = lstcon_rpc_get_reply(crpc, &rep);
			
 
				-		if (error) {
			
 
				-			lstcon_rpc_stat_failure(stat, 1);
			
 
				-			if (!stat->trs_rpc_errno)
			
 
				-				stat->trs_rpc_errno = -error;
			
 
				-
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_rpc_stat_success(stat, 1);
			
 
				-
			
 
				-		lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat);
			
 
				-	}
			
 
				-
			
 
				-	if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) {
			
 
				-		stat->trs_fwk_errno =
			
 
				-		      lstcon_session_feats_check(trans->tas_features);
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, RPC error(%d), Framework error(%d)\n",
			
 
				-	       lstcon_rpc_trans_name(trans->tas_opc),
			
 
				-	       lstcon_rpc_stat_success(stat, 0),
			
 
				-	       lstcon_rpc_stat_failure(stat, 0),
			
 
				-	       lstcon_rpc_stat_total(stat, 0),
			
 
				-	       stat->trs_rpc_errno, stat->trs_fwk_errno);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
			
 
				-			     struct list_head __user *head_up,
			
 
				-			     lstcon_rpc_readent_func_t readent)
			
 
				-{
			
 
				-	struct list_head tmp;
			
 
				-	struct list_head __user *next;
			
 
				-	struct lstcon_rpc_ent *ent;
			
 
				-	struct srpc_generic_reply *rep;
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	struct srpc_msg *msg;
			
 
				-	struct lstcon_node *nd;
			
 
				-	long dur;
			
 
				-	struct timeval tv;
			
 
				-	int error;
			
 
				-
			
 
				-	LASSERT(head_up);
			
 
				-
			
 
				-	next = head_up;
			
 
				-
			
 
				-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				-		if (copy_from_user(&tmp, next,
			
 
				-				   sizeof(struct list_head)))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		next = tmp.next;
			
 
				-		if (next == head_up)
			
 
				-			return 0;
			
 
				-
			
 
				-		ent = list_entry(next, struct lstcon_rpc_ent, rpe_link);
			
 
				-
			
 
				-		LASSERT(crpc->crp_stamp);
			
 
				-
			
 
				-		error = lstcon_rpc_get_reply(crpc, &msg);
			
 
				-
			
 
				-		nd = crpc->crp_node;
			
 
				-
			
 
				-		dur = (long)(crpc->crp_stamp -
			
 
				-			     (unsigned long)console_session.ses_id.ses_stamp);
			
 
				-		jiffies_to_timeval(dur, &tv);
			
 
				-
			
 
				-		if (copy_to_user(&ent->rpe_peer, &nd->nd_id,
			
 
				-				 sizeof(struct lnet_process_id)) ||
			
 
				-		    copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
			
 
				-		    copy_to_user(&ent->rpe_state, &nd->nd_state,
			
 
				-				 sizeof(nd->nd_state)) ||
			
 
				-		    copy_to_user(&ent->rpe_rpc_errno, &error,
			
 
				-				 sizeof(error)))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		if (error)
			
 
				-			continue;
			
 
				-
			
 
				-		/* RPC is done */
			
 
				-		rep = (struct srpc_generic_reply *)&msg->msg_body.reply;
			
 
				-
			
 
				-		if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(rep->sid)) ||
			
 
				-		    copy_to_user(&ent->rpe_fwk_errno, &rep->status,
			
 
				-				 sizeof(rep->status)))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		if (!readent)
			
 
				-			continue;
			
 
				-
			
 
				-		error = readent(trans->tas_opc, msg, ent);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	struct lstcon_rpc *tmp;
			
 
				-	int count = 0;
			
 
				-
			
 
				-	list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) {
			
 
				-		rpc = crpc->crp_rpc;
			
 
				-
			
 
				-		spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-		/* free it if not posted or finished already */
			
 
				-		if (!crpc->crp_posted || crpc->crp_finished) {
			
 
				-			spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-			list_del_init(&crpc->crp_link);
			
 
				-			lstcon_rpc_put(crpc);
			
 
				-
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * rpcs can be still not callbacked (even LNetMDUnlink is
			
 
				-		 * called) because huge timeout for inaccessible network,
			
 
				-		 * don't make user wait for them, just abandon them, they
			
 
				-		 * will be recycled in callback
			
 
				-		 */
			
 
				-		LASSERT(crpc->crp_status);
			
 
				-
			
 
				-		crpc->crp_node = NULL;
			
 
				-		crpc->crp_trans = NULL;
			
 
				-		list_del_init(&crpc->crp_link);
			
 
				-		count++;
			
 
				-
			
 
				-		spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-		atomic_dec(&trans->tas_remaining);
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!atomic_read(&trans->tas_remaining));
			
 
				-
			
 
				-	list_del(&trans->tas_link);
			
 
				-	if (!list_empty(&trans->tas_olink))
			
 
				-		list_del(&trans->tas_olink);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
			
 
				-	       lstcon_rpc_trans_name(trans->tas_opc), count);
			
 
				-
			
 
				-	kfree(trans);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
			
 
				-		   unsigned int feats, struct lstcon_rpc **crpc)
			
 
				-{
			
 
				-	struct srpc_mksn_reqst *msrq;
			
 
				-	struct srpc_rmsn_reqst *rsrq;
			
 
				-	int rc;
			
 
				-
			
 
				-	switch (transop) {
			
 
				-	case LST_TRANS_SESNEW:
			
 
				-		rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION,
			
 
				-				     feats, 0, 0, crpc);
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-
			
 
				-		msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
			
 
				-		msrq->mksn_sid = console_session.ses_id;
			
 
				-		msrq->mksn_force = console_session.ses_force;
			
 
				-		strlcpy(msrq->mksn_name, console_session.ses_name,
			
 
				-			sizeof(msrq->mksn_name));
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_SESEND:
			
 
				-		rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION,
			
 
				-				     feats, 0, 0, crpc);
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-
			
 
				-		rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
			
 
				-		rsrq->rmsn_sid = console_session.ses_id;
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats,
			
 
				-		   struct lstcon_rpc **crpc)
			
 
				-{
			
 
				-	struct srpc_debug_reqst *drq;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
			
 
				-
			
 
				-	drq->dbg_sid = console_session.ses_id;
			
 
				-	drq->dbg_flags = 0;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
			
 
				-		   struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc)
			
 
				-{
			
 
				-	struct lstcon_batch *batch;
			
 
				-	struct srpc_batch_reqst *brq;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
			
 
				-
			
 
				-	brq->bar_sid = console_session.ses_id;
			
 
				-	brq->bar_bid = tsb->tsb_id;
			
 
				-	brq->bar_testidx = tsb->tsb_index;
			
 
				-	brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
			
 
				-		       (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP :
			
 
				-		       SRPC_BATCH_OPC_QUERY);
			
 
				-
			
 
				-	if (transop != LST_TRANS_TSBRUN &&
			
 
				-	    transop != LST_TRANS_TSBSTOP)
			
 
				-		return 0;
			
 
				-
			
 
				-	LASSERT(!tsb->tsb_index);
			
 
				-
			
 
				-	batch = (struct lstcon_batch *)tsb;
			
 
				-	brq->bar_arg = batch->bat_arg;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats,
			
 
				-		    struct lstcon_rpc **crpc)
			
 
				-{
			
 
				-	struct srpc_stat_reqst *srq;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
			
 
				-
			
 
				-	srq->str_sid = console_session.ses_id;
			
 
				-	srq->str_type = 0; /* XXX remove it */
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct lnet_process_id_packed *
			
 
				-lstcon_next_id(int idx, int nkiov, struct bio_vec *kiov)
			
 
				-{
			
 
				-	struct lnet_process_id_packed *pid;
			
 
				-	int i;
			
 
				-
			
 
				-	i = idx / SFW_ID_PER_PAGE;
			
 
				-
			
 
				-	LASSERT(i < nkiov);
			
 
				-
			
 
				-	pid = (struct lnet_process_id_packed *)page_address(kiov[i].bv_page);
			
 
				-
			
 
				-	return &pid[idx % SFW_ID_PER_PAGE];
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_dstnodes_prep(struct lstcon_group *grp, int idx,
			
 
				-		     int dist, int span, int nkiov, struct bio_vec *kiov)
			
 
				-{
			
 
				-	struct lnet_process_id_packed *pid;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_node *nd;
			
 
				-	int start;
			
 
				-	int end;
			
 
				-	int i = 0;
			
 
				-
			
 
				-	LASSERT(dist >= 1);
			
 
				-	LASSERT(span >= 1);
			
 
				-	LASSERT(grp->grp_nnode >= 1);
			
 
				-
			
 
				-	if (span > grp->grp_nnode)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	start = ((idx / dist) * span) % grp->grp_nnode;
			
 
				-	end = ((idx / dist) * span + span - 1) % grp->grp_nnode;
			
 
				-
			
 
				-	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
			
 
				-		nd = ndl->ndl_node;
			
 
				-		if (i < start) {
			
 
				-			i++;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (i > (end >= start ? end : grp->grp_nnode))
			
 
				-			break;
			
 
				-
			
 
				-		pid = lstcon_next_id((i - start), nkiov, kiov);
			
 
				-		pid->nid = nd->nd_id.nid;
			
 
				-		pid->pid = nd->nd_id.pid;
			
 
				-		i++;
			
 
				-	}
			
 
				-
			
 
				-	if (start <= end) /* done */
			
 
				-		return 0;
			
 
				-
			
 
				-	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
			
 
				-		if (i > grp->grp_nnode + end)
			
 
				-			break;
			
 
				-
			
 
				-		nd = ndl->ndl_node;
			
 
				-		pid = lstcon_next_id((i - start), nkiov, kiov);
			
 
				-		pid->nid = nd->nd_id.nid;
			
 
				-		pid->pid = nd->nd_id.pid;
			
 
				-		i++;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_pingrpc_prep(struct lst_test_ping_param *param, struct srpc_test_reqst *req)
			
 
				-{
			
 
				-	struct test_ping_req *prq = &req->tsr_u.ping;
			
 
				-
			
 
				-	prq->png_size = param->png_size;
			
 
				-	prq->png_flags = param->png_flags;
			
 
				-	/* TODO dest */
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_bulkrpc_v0_prep(struct lst_test_bulk_param *param,
			
 
				-		       struct srpc_test_reqst *req)
			
 
				-{
			
 
				-	struct test_bulk_req *brq = &req->tsr_u.bulk_v0;
			
 
				-
			
 
				-	brq->blk_opc = param->blk_opc;
			
 
				-	brq->blk_npg = DIV_ROUND_UP(param->blk_size, PAGE_SIZE);
			
 
				-	brq->blk_flags = param->blk_flags;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_bulkrpc_v1_prep(struct lst_test_bulk_param *param, bool is_client,
			
 
				-		       struct srpc_test_reqst *req)
			
 
				-{
			
 
				-	struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1;
			
 
				-
			
 
				-	brq->blk_opc = param->blk_opc;
			
 
				-	brq->blk_flags = param->blk_flags;
			
 
				-	brq->blk_len = param->blk_size;
			
 
				-	brq->blk_offset	= is_client ? param->blk_cli_off : param->blk_srv_off;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
			
 
				-		    struct lstcon_test *test, struct lstcon_rpc **crpc)
			
 
				-{
			
 
				-	struct lstcon_group *sgrp = test->tes_src_grp;
			
 
				-	struct lstcon_group *dgrp = test->tes_dst_grp;
			
 
				-	struct srpc_test_reqst *trq;
			
 
				-	struct srpc_bulk *bulk;
			
 
				-	int i;
			
 
				-	int npg = 0;
			
 
				-	int nob = 0;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBCLIADD) {
			
 
				-		npg = sfw_id_pages(test->tes_span);
			
 
				-		nob = !(feats & LST_FEAT_BULK_LEN) ?
			
 
				-		      npg * PAGE_SIZE :
			
 
				-		      sizeof(struct lnet_process_id_packed) * test->tes_span;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBSRVADD) {
			
 
				-		int ndist = DIV_ROUND_UP(sgrp->grp_nnode, test->tes_dist);
			
 
				-		int nspan = DIV_ROUND_UP(dgrp->grp_nnode, test->tes_span);
			
 
				-		int nmax = DIV_ROUND_UP(ndist, nspan);
			
 
				-
			
 
				-		trq->tsr_ndest = 0;
			
 
				-		trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
			
 
				-	} else {
			
 
				-		bulk = &(*crpc)->crp_rpc->crpc_bulk;
			
 
				-
			
 
				-		for (i = 0; i < npg; i++) {
			
 
				-			int len;
			
 
				-
			
 
				-			LASSERT(nob > 0);
			
 
				-
			
 
				-			len = !(feats & LST_FEAT_BULK_LEN) ?
			
 
				-			      PAGE_SIZE :
			
 
				-			      min_t(int, nob, PAGE_SIZE);
			
 
				-			nob -= len;
			
 
				-
			
 
				-			bulk->bk_iovs[i].bv_offset = 0;
			
 
				-			bulk->bk_iovs[i].bv_len = len;
			
 
				-			bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
			
 
				-
			
 
				-			if (!bulk->bk_iovs[i].bv_page) {
			
 
				-				lstcon_rpc_put(*crpc);
			
 
				-				return -ENOMEM;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		bulk->bk_sink = 0;
			
 
				-
			
 
				-		LASSERT(transop == LST_TRANS_TSBCLIADD);
			
 
				-
			
 
				-		rc = lstcon_dstnodes_prep(test->tes_dst_grp,
			
 
				-					  test->tes_cliidx++,
			
 
				-					  test->tes_dist,
			
 
				-					  test->tes_span,
			
 
				-					  npg, &bulk->bk_iovs[0]);
			
 
				-		if (rc) {
			
 
				-			lstcon_rpc_put(*crpc);
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		trq->tsr_ndest = test->tes_span;
			
 
				-		trq->tsr_loop = test->tes_loop;
			
 
				-	}
			
 
				-
			
 
				-	trq->tsr_sid = console_session.ses_id;
			
 
				-	trq->tsr_bid = test->tes_hdr.tsb_id;
			
 
				-	trq->tsr_concur = test->tes_concur;
			
 
				-	trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
			
 
				-	trq->tsr_stop_onerr = !!test->tes_stop_onerr;
			
 
				-
			
 
				-	switch (test->tes_type) {
			
 
				-	case LST_TEST_PING:
			
 
				-		trq->tsr_service = SRPC_SERVICE_PING;
			
 
				-		rc = lstcon_pingrpc_prep((struct lst_test_ping_param *)
			
 
				-					 &test->tes_param[0], trq);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TEST_BULK:
			
 
				-		trq->tsr_service = SRPC_SERVICE_BRW;
			
 
				-		if (!(feats & LST_FEAT_BULK_LEN)) {
			
 
				-			rc = lstcon_bulkrpc_v0_prep((struct lst_test_bulk_param *)
			
 
				-						    &test->tes_param[0], trq);
			
 
				-		} else {
			
 
				-			rc = lstcon_bulkrpc_v1_prep((struct lst_test_bulk_param *)
			
 
				-						    &test->tes_param[0],
			
 
				-						    trq->tsr_is_client, trq);
			
 
				-		}
			
 
				-
			
 
				-		break;
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans,
			
 
				-			 struct lstcon_node *nd, struct srpc_msg *reply)
			
 
				-{
			
 
				-	struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply;
			
 
				-	int status = mksn_rep->mksn_status;
			
 
				-
			
 
				-	if (!status &&
			
 
				-	    (reply->msg_ses_feats & ~LST_FEATS_MASK)) {
			
 
				-		mksn_rep->mksn_status = EPROTO;
			
 
				-		status = EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (status == EPROTO) {
			
 
				-		CNETERR("session protocol error from %s: %u\n",
			
 
				-			libcfs_nid2str(nd->nd_id.nid),
			
 
				-			reply->msg_ses_feats);
			
 
				-	}
			
 
				-
			
 
				-	if (status)
			
 
				-		return status;
			
 
				-
			
 
				-	if (!trans->tas_feats_updated) {
			
 
				-		spin_lock(&console_session.ses_rpc_lock);
			
 
				-		if (!trans->tas_feats_updated) {	/* recheck with lock */
			
 
				-			trans->tas_feats_updated = 1;
			
 
				-			trans->tas_features = reply->msg_ses_feats;
			
 
				-		}
			
 
				-		spin_unlock(&console_session.ses_rpc_lock);
			
 
				-	}
			
 
				-
			
 
				-	if (reply->msg_ses_feats != trans->tas_features) {
			
 
				-		CNETERR("Framework features %x from %s is different with features on this transaction: %x\n",
			
 
				-			reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid),
			
 
				-			trans->tas_features);
			
 
				-		mksn_rep->mksn_status = EPROTO;
			
 
				-		status = EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	if (!status) {
			
 
				-		/* session timeout on remote node */
			
 
				-		nd->nd_timeout = mksn_rep->mksn_timeout;
			
 
				-	}
			
 
				-
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg,
			
 
				-		      struct lstcon_node *nd, struct lstcon_trans_stat *stat)
			
 
				-{
			
 
				-	struct srpc_rmsn_reply *rmsn_rep;
			
 
				-	struct srpc_debug_reply *dbg_rep;
			
 
				-	struct srpc_batch_reply *bat_rep;
			
 
				-	struct srpc_test_reply *test_rep;
			
 
				-	struct srpc_stat_reply *stat_rep;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	switch (trans->tas_opc) {
			
 
				-	case LST_TRANS_SESNEW:
			
 
				-		rc = lstcon_sesnew_stat_reply(trans, nd, msg);
			
 
				-		if (!rc) {
			
 
				-			lstcon_sesop_stat_success(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_sesop_stat_failure(stat, 1);
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_SESEND:
			
 
				-		rmsn_rep = &msg->msg_body.rmsn_reply;
			
 
				-		/* ESRCH is not an error for end session */
			
 
				-		if (!rmsn_rep->rmsn_status ||
			
 
				-		    rmsn_rep->rmsn_status == ESRCH) {
			
 
				-			lstcon_sesop_stat_success(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_sesop_stat_failure(stat, 1);
			
 
				-		rc = rmsn_rep->rmsn_status;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_SESQRY:
			
 
				-	case LST_TRANS_SESPING:
			
 
				-		dbg_rep = &msg->msg_body.dbg_reply;
			
 
				-
			
 
				-		if (dbg_rep->dbg_status == ESRCH) {
			
 
				-			lstcon_sesqry_stat_unknown(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		if (lstcon_session_match(dbg_rep->dbg_sid))
			
 
				-			lstcon_sesqry_stat_active(stat, 1);
			
 
				-		else
			
 
				-			lstcon_sesqry_stat_busy(stat, 1);
			
 
				-		return;
			
 
				-
			
 
				-	case LST_TRANS_TSBRUN:
			
 
				-	case LST_TRANS_TSBSTOP:
			
 
				-		bat_rep = &msg->msg_body.bat_reply;
			
 
				-
			
 
				-		if (!bat_rep->bar_status) {
			
 
				-			lstcon_tsbop_stat_success(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		if (bat_rep->bar_status == EPERM &&
			
 
				-		    trans->tas_opc == LST_TRANS_TSBSTOP) {
			
 
				-			lstcon_tsbop_stat_success(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_tsbop_stat_failure(stat, 1);
			
 
				-		rc = bat_rep->bar_status;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_TSBCLIQRY:
			
 
				-	case LST_TRANS_TSBSRVQRY:
			
 
				-		bat_rep = &msg->msg_body.bat_reply;
			
 
				-
			
 
				-		if (bat_rep->bar_active)
			
 
				-			lstcon_tsbqry_stat_run(stat, 1);
			
 
				-		else
			
 
				-			lstcon_tsbqry_stat_idle(stat, 1);
			
 
				-
			
 
				-		if (!bat_rep->bar_status)
			
 
				-			return;
			
 
				-
			
 
				-		lstcon_tsbqry_stat_failure(stat, 1);
			
 
				-		rc = bat_rep->bar_status;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_TSBCLIADD:
			
 
				-	case LST_TRANS_TSBSRVADD:
			
 
				-		test_rep = &msg->msg_body.tes_reply;
			
 
				-
			
 
				-		if (!test_rep->tsr_status) {
			
 
				-			lstcon_tsbop_stat_success(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_tsbop_stat_failure(stat, 1);
			
 
				-		rc = test_rep->tsr_status;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_STATQRY:
			
 
				-		stat_rep = &msg->msg_body.stat_reply;
			
 
				-
			
 
				-		if (!stat_rep->str_status) {
			
 
				-			lstcon_statqry_stat_success(stat, 1);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_statqry_stat_failure(stat, 1);
			
 
				-		rc = stat_rep->str_status;
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	}
			
 
				-
			
 
				-	if (!stat->trs_fwk_errno)
			
 
				-		stat->trs_fwk_errno = rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_rpc_trans_ndlist(struct list_head *ndlist,
			
 
				-			struct list_head *translist, int transop,
			
 
				-			void *arg, lstcon_rpc_cond_func_t condition,
			
 
				-			struct lstcon_rpc_trans **transpp)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_node *nd;
			
 
				-	struct lstcon_rpc *rpc;
			
 
				-	unsigned int feats;
			
 
				-	int rc;
			
 
				-
			
 
				-	/* Creating session RPG for list of nodes */
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_prep(translist, transop, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction %d: %d\n", transop, rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	feats = trans->tas_features;
			
 
				-	list_for_each_entry(ndl, ndlist, ndl_link) {
			
 
				-		rc = !condition ? 1 :
			
 
				-		     condition(transop, ndl->ndl_node, arg);
			
 
				-
			
 
				-		if (!rc)
			
 
				-			continue;
			
 
				-
			
 
				-		if (rc < 0) {
			
 
				-			CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n",
			
 
				-			       transop, rc);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		nd = ndl->ndl_node;
			
 
				-
			
 
				-		switch (transop) {
			
 
				-		case LST_TRANS_SESNEW:
			
 
				-		case LST_TRANS_SESEND:
			
 
				-			rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc);
			
 
				-			break;
			
 
				-		case LST_TRANS_SESQRY:
			
 
				-		case LST_TRANS_SESPING:
			
 
				-			rc = lstcon_dbgrpc_prep(nd, feats, &rpc);
			
 
				-			break;
			
 
				-		case LST_TRANS_TSBCLIADD:
			
 
				-		case LST_TRANS_TSBSRVADD:
			
 
				-			rc = lstcon_testrpc_prep(nd, transop, feats,
			
 
				-						 (struct lstcon_test *)arg,
			
 
				-						 &rpc);
			
 
				-			break;
			
 
				-		case LST_TRANS_TSBRUN:
			
 
				-		case LST_TRANS_TSBSTOP:
			
 
				-		case LST_TRANS_TSBCLIQRY:
			
 
				-		case LST_TRANS_TSBSRVQRY:
			
 
				-			rc = lstcon_batrpc_prep(nd, transop, feats,
			
 
				-						(struct lstcon_tsb_hdr *)arg,
			
 
				-						&rpc);
			
 
				-			break;
			
 
				-		case LST_TRANS_STATQRY:
			
 
				-			rc = lstcon_statrpc_prep(nd, feats, &rpc);
			
 
				-			break;
			
 
				-		default:
			
 
				-			rc = -EINVAL;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (rc) {
			
 
				-			CERROR("Failed to create RPC for transaction %s: %d\n",
			
 
				-			       lstcon_rpc_trans_name(transop), rc);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		lstcon_rpc_trans_addreq(trans, rpc);
			
 
				-	}
			
 
				-
			
 
				-	if (!rc) {
			
 
				-		*transpp = trans;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_rpc_pinger(void *arg)
			
 
				-{
			
 
				-	struct stt_timer *ptimer = (struct stt_timer *)arg;
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	struct srpc_msg *rep;
			
 
				-	struct srpc_debug_reqst *drq;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_node *nd;
			
 
				-	int intv;
			
 
				-	int count = 0;
			
 
				-	int rc;
			
 
				-
			
 
				-	/*
			
 
				-	 * RPC pinger is a special case of transaction,
			
 
				-	 * it's called by timer at 8 seconds interval.
			
 
				-	 */
			
 
				-	mutex_lock(&console_session.ses_mutex);
			
 
				-
			
 
				-	if (console_session.ses_shutdown || console_session.ses_expired) {
			
 
				-		mutex_unlock(&console_session.ses_mutex);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (!console_session.ses_expired &&
			
 
				-	    ktime_get_real_seconds() - console_session.ses_laststamp >
			
 
				-	    (time64_t)console_session.ses_timeout)
			
 
				-		console_session.ses_expired = 1;
			
 
				-
			
 
				-	trans = console_session.ses_ping;
			
 
				-
			
 
				-	LASSERT(trans);
			
 
				-
			
 
				-	list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
			
 
				-		nd = ndl->ndl_node;
			
 
				-
			
 
				-		if (console_session.ses_expired) {
			
 
				-			/* idle console, end session on all nodes */
			
 
				-			if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				-				continue;
			
 
				-
			
 
				-			rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND,
			
 
				-						trans->tas_features, &crpc);
			
 
				-			if (rc) {
			
 
				-				CERROR("Out of memory\n");
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			lstcon_rpc_trans_addreq(trans, crpc);
			
 
				-			lstcon_rpc_post(crpc);
			
 
				-
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		crpc = &nd->nd_ping;
			
 
				-
			
 
				-		if (crpc->crp_rpc) {
			
 
				-			LASSERT(crpc->crp_trans == trans);
			
 
				-			LASSERT(!list_empty(&crpc->crp_link));
			
 
				-
			
 
				-			spin_lock(&crpc->crp_rpc->crpc_lock);
			
 
				-
			
 
				-			LASSERT(crpc->crp_posted);
			
 
				-
			
 
				-			if (!crpc->crp_finished) {
			
 
				-				/* in flight */
			
 
				-				spin_unlock(&crpc->crp_rpc->crpc_lock);
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			spin_unlock(&crpc->crp_rpc->crpc_lock);
			
 
				-
			
 
				-			lstcon_rpc_get_reply(crpc, &rep);
			
 
				-
			
 
				-			list_del_init(&crpc->crp_link);
			
 
				-
			
 
				-			lstcon_rpc_put(crpc);
			
 
				-		}
			
 
				-
			
 
				-		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				-			continue;
			
 
				-
			
 
				-		intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC);
			
 
				-		if (intv < nd->nd_timeout / 2)
			
 
				-			continue;
			
 
				-
			
 
				-		rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG,
			
 
				-				     trans->tas_features, 0, 0, 1, crpc);
			
 
				-		if (rc) {
			
 
				-			CERROR("Out of memory\n");
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
			
 
				-
			
 
				-		drq->dbg_sid = console_session.ses_id;
			
 
				-		drq->dbg_flags = 0;
			
 
				-
			
 
				-		lstcon_rpc_trans_addreq(trans, crpc);
			
 
				-		lstcon_rpc_post(crpc);
			
 
				-
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				-	if (console_session.ses_expired) {
			
 
				-		mutex_unlock(&console_session.ses_mutex);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Ping %d nodes in session\n", count);
			
 
				-
			
 
				-	ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
			
 
				-	stt_add_timer(ptimer);
			
 
				-
			
 
				-	mutex_unlock(&console_session.ses_mutex);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_rpc_pinger_start(void)
			
 
				-{
			
 
				-	struct stt_timer *ptimer;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(list_empty(&console_session.ses_rpc_freelist));
			
 
				-	LASSERT(!atomic_read(&console_session.ses_rpc_counter));
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
			
 
				-				   &console_session.ses_ping);
			
 
				-	if (rc) {
			
 
				-		CERROR("Failed to create console pinger\n");
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	ptimer = &console_session.ses_ping_timer;
			
 
				-	ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
			
 
				-
			
 
				-	stt_add_timer(ptimer);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_pinger_stop(void)
			
 
				-{
			
 
				-	LASSERT(console_session.ses_shutdown);
			
 
				-
			
 
				-	stt_del_timer(&console_session.ses_ping_timer);
			
 
				-
			
 
				-	lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
			
 
				-	lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
			
 
				-	lstcon_rpc_trans_destroy(console_session.ses_ping);
			
 
				-
			
 
				-	memset(lstcon_trans_stat(), 0, sizeof(struct lstcon_trans_stat));
			
 
				-
			
 
				-	console_session.ses_ping = NULL;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_cleanup_wait(void)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_rpc *crpc;
			
 
				-	struct lstcon_rpc *temp;
			
 
				-	struct list_head *pacer;
			
 
				-	struct list_head zlist;
			
 
				-
			
 
				-	/* Called with hold of global mutex */
			
 
				-
			
 
				-	LASSERT(console_session.ses_shutdown);
			
 
				-
			
 
				-	while (!list_empty(&console_session.ses_trans_list)) {
			
 
				-		list_for_each(pacer, &console_session.ses_trans_list) {
			
 
				-			trans = list_entry(pacer, struct lstcon_rpc_trans,
			
 
				-					   tas_link);
			
 
				-
			
 
				-			CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
			
 
				-			       lstcon_rpc_trans_name(trans->tas_opc));
			
 
				-
			
 
				-			wake_up(&trans->tas_waitq);
			
 
				-		}
			
 
				-
			
 
				-		mutex_unlock(&console_session.ses_mutex);
			
 
				-
			
 
				-		CWARN("Session is shutting down, waiting for termination of transactions\n");
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-		schedule_timeout(HZ);
			
 
				-
			
 
				-		mutex_lock(&console_session.ses_mutex);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	lst_wait_until(!atomic_read(&console_session.ses_rpc_counter),
			
 
				-		       console_session.ses_rpc_lock,
			
 
				-		       "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n",
			
 
				-		       atomic_read(&console_session.ses_rpc_counter));
			
 
				-
			
 
				-	list_add(&zlist, &console_session.ses_rpc_freelist);
			
 
				-	list_del_init(&console_session.ses_rpc_freelist);
			
 
				-
			
 
				-	spin_unlock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	list_for_each_entry_safe(crpc, temp, &zlist, crp_link) {
			
 
				-		list_del(&crpc->crp_link);
			
 
				-		kfree(crpc);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_rpc_module_init(void)
			
 
				-{
			
 
				-	INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
			
 
				-	console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
			
 
				-	console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
			
 
				-
			
 
				-	console_session.ses_ping = NULL;
			
 
				-
			
 
				-	spin_lock_init(&console_session.ses_rpc_lock);
			
 
				-	atomic_set(&console_session.ses_rpc_counter, 0);
			
 
				-	INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-lstcon_rpc_module_fini(void)
			
 
				-{
			
 
				-	LASSERT(list_empty(&console_session.ses_rpc_freelist));
			
 
				-	LASSERT(!atomic_read(&console_session.ses_rpc_counter));
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.h
@@ -1,142 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * /lnet/selftest/conrpc.h
			
 
				- *
			
 
				- * Console rpc
			
 
				- *
			
 
				- * Author: Liang Zhen <liang@whamcloud.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef __LST_CONRPC_H__
			
 
				-#define __LST_CONRPC_H__
			
 
				-
			
 
				-#include <linux/lnet/lib-types.h>
			
 
				-#include <uapi/linux/lnet/lnetst.h>
			
 
				-#include "rpc.h"
			
 
				-#include "selftest.h"
			
 
				-
			
 
				-/* Console rpc and rpc transaction */
			
 
				-#define LST_TRANS_TIMEOUT	30
			
 
				-#define LST_TRANS_MIN_TIMEOUT	3
			
 
				-
			
 
				-#define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
			
 
				-
			
 
				-#define LST_PING_INTERVAL	8
			
 
				-
			
 
				-struct lstcon_rpc_trans;
			
 
				-struct lstcon_tsb_hdr;
			
 
				-struct lstcon_test;
			
 
				-struct lstcon_node;
			
 
				-
			
 
				-struct lstcon_rpc {
			
 
				-	struct list_head	 crp_link;	/* chain on rpc transaction */
			
 
				-	struct srpc_client_rpc	*crp_rpc;	/* client rpc */
			
 
				-	struct lstcon_node	*crp_node;	/* destination node */
			
 
				-	struct lstcon_rpc_trans *crp_trans;	/* conrpc transaction */
			
 
				-
			
 
				-	unsigned int		 crp_posted:1;	/* rpc is posted */
			
 
				-	unsigned int		 crp_finished:1; /* rpc is finished */
			
 
				-	unsigned int		 crp_unpacked:1; /* reply is unpacked */
			
 
				-	/** RPC is embedded in other structure and can't free it */
			
 
				-	unsigned int		 crp_embedded:1;
			
 
				-	int			 crp_status;	/* console rpc errors */
			
 
				-	unsigned long		 crp_stamp;	/* replied time stamp */
			
 
				-};
			
 
				-
			
 
				-struct lstcon_rpc_trans {
			
 
				-	struct list_head  tas_olink;	     /* link chain on owner list */
			
 
				-	struct list_head  tas_link;	     /* link chain on global list */
			
 
				-	int		  tas_opc;	     /* operation code of transaction */
			
 
				-	unsigned int	  tas_feats_updated; /* features mask is uptodate */
			
 
				-	unsigned int	  tas_features;      /* test features mask */
			
 
				-	wait_queue_head_t tas_waitq;	     /* wait queue head */
			
 
				-	atomic_t	  tas_remaining;     /* # of un-scheduled rpcs */
			
 
				-	struct list_head  tas_rpcs_list;     /* queued requests */
			
 
				-};
			
 
				-
			
 
				-#define LST_TRANS_PRIVATE	0x1000
			
 
				-
			
 
				-#define LST_TRANS_SESNEW	(LST_TRANS_PRIVATE | 0x01)
			
 
				-#define LST_TRANS_SESEND	(LST_TRANS_PRIVATE | 0x02)
			
 
				-#define LST_TRANS_SESQRY	0x03
			
 
				-#define LST_TRANS_SESPING	0x04
			
 
				-
			
 
				-#define LST_TRANS_TSBCLIADD	(LST_TRANS_PRIVATE | 0x11)
			
 
				-#define LST_TRANS_TSBSRVADD	(LST_TRANS_PRIVATE | 0x12)
			
 
				-#define LST_TRANS_TSBRUN	(LST_TRANS_PRIVATE | 0x13)
			
 
				-#define LST_TRANS_TSBSTOP	(LST_TRANS_PRIVATE | 0x14)
			
 
				-#define LST_TRANS_TSBCLIQRY	0x15
			
 
				-#define LST_TRANS_TSBSRVQRY	0x16
			
 
				-
			
 
				-#define LST_TRANS_STATQRY	0x21
			
 
				-
			
 
				-typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
			
 
				-typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *,
			
 
				-					 struct lstcon_rpc_ent __user *);
			
 
				-
			
 
				-int  lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
			
 
				-			unsigned int version, struct lstcon_rpc **crpc);
			
 
				-int  lstcon_dbgrpc_prep(struct lstcon_node *nd,
			
 
				-			unsigned int version, struct lstcon_rpc **crpc);
			
 
				-int  lstcon_batrpc_prep(struct lstcon_node *nd, int transop,
			
 
				-			unsigned int version, struct lstcon_tsb_hdr *tsb,
			
 
				-			struct lstcon_rpc **crpc);
			
 
				-int  lstcon_testrpc_prep(struct lstcon_node *nd, int transop,
			
 
				-			 unsigned int version, struct lstcon_test *test,
			
 
				-			 struct lstcon_rpc **crpc);
			
 
				-int  lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version,
			
 
				-			 struct lstcon_rpc **crpc);
			
 
				-void lstcon_rpc_put(struct lstcon_rpc *crpc);
			
 
				-int  lstcon_rpc_trans_prep(struct list_head *translist,
			
 
				-			   int transop, struct lstcon_rpc_trans **transpp);
			
 
				-int  lstcon_rpc_trans_ndlist(struct list_head *ndlist,
			
 
				-			     struct list_head *translist, int transop,
			
 
				-			     void *arg, lstcon_rpc_cond_func_t condition,
			
 
				-			     struct lstcon_rpc_trans **transpp);
			
 
				-void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans,
			
 
				-			   struct lstcon_trans_stat *stat);
			
 
				-int  lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
			
 
				-				  struct list_head __user *head_up,
			
 
				-				  lstcon_rpc_readent_func_t readent);
			
 
				-void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error);
			
 
				-void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans);
			
 
				-void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans,
			
 
				-			     struct lstcon_rpc *req);
			
 
				-int  lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout);
			
 
				-int  lstcon_rpc_pinger_start(void);
			
 
				-void lstcon_rpc_pinger_stop(void);
			
 
				-void lstcon_rpc_cleanup_wait(void);
			
 
				-int  lstcon_rpc_module_init(void);
			
 
				-void lstcon_rpc_module_fini(void);
			
 
				-
			
 
				-#endif
			
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -1,2104 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/conctl.c
			
 
				- *
			
 
				- * Infrastructure of LST console
			
 
				- *
			
 
				- * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include "console.h"
			
 
				-#include "conrpc.h"
			
 
				-
			
 
				-#define LST_NODE_STATE_COUNTER(nd, p)			\
			
 
				-do {							\
			
 
				-	if ((nd)->nd_state == LST_NODE_ACTIVE)		\
			
 
				-		(p)->nle_nactive++;			\
			
 
				-	else if ((nd)->nd_state == LST_NODE_BUSY)	\
			
 
				-		(p)->nle_nbusy++;			\
			
 
				-	else if ((nd)->nd_state == LST_NODE_DOWN)	\
			
 
				-		(p)->nle_ndown++;			\
			
 
				-	else						\
			
 
				-		(p)->nle_nunknown++;			\
			
 
				-	(p)->nle_nnode++;				\
			
 
				-} while (0)
			
 
				-
			
 
				-struct lstcon_session console_session;
			
 
				-
			
 
				-static void
			
 
				-lstcon_node_get(struct lstcon_node *nd)
			
 
				-{
			
 
				-	LASSERT(nd->nd_ref >= 1);
			
 
				-
			
 
				-	nd->nd_ref++;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_node_find(struct lnet_process_id id, struct lstcon_node **ndpp,
			
 
				-		 int create)
			
 
				-{
			
 
				-	struct lstcon_ndlink	*ndl;
			
 
				-	unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
			
 
				-
			
 
				-	LASSERT(id.nid != LNET_NID_ANY);
			
 
				-
			
 
				-	list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx],
			
 
				-			    ndl_hlink) {
			
 
				-		if (ndl->ndl_node->nd_id.nid != id.nid ||
			
 
				-		    ndl->ndl_node->nd_id.pid != id.pid)
			
 
				-			continue;
			
 
				-
			
 
				-		lstcon_node_get(ndl->ndl_node);
			
 
				-		*ndpp = ndl->ndl_node;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!create)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	*ndpp = kzalloc(sizeof(**ndpp) + sizeof(*ndl), GFP_KERNEL);
			
 
				-	if (!*ndpp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	ndl = (struct lstcon_ndlink *)(*ndpp + 1);
			
 
				-
			
 
				-	ndl->ndl_node = *ndpp;
			
 
				-
			
 
				-	ndl->ndl_node->nd_ref = 1;
			
 
				-	ndl->ndl_node->nd_id = id;
			
 
				-	ndl->ndl_node->nd_stamp = jiffies;
			
 
				-	ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
			
 
				-	ndl->ndl_node->nd_timeout = 0;
			
 
				-	memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc));
			
 
				-
			
 
				-	/*
			
 
				-	 * queued in global hash & list, no refcount is taken by
			
 
				-	 * global hash & list, if caller release his refcount,
			
 
				-	 * node will be released
			
 
				-	 */
			
 
				-	list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
			
 
				-	list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_node_put(struct lstcon_node *nd)
			
 
				-{
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-
			
 
				-	LASSERT(nd->nd_ref > 0);
			
 
				-
			
 
				-	if (--nd->nd_ref > 0)
			
 
				-		return;
			
 
				-
			
 
				-	ndl = (struct lstcon_ndlink *)(nd + 1);
			
 
				-
			
 
				-	LASSERT(!list_empty(&ndl->ndl_link));
			
 
				-	LASSERT(!list_empty(&ndl->ndl_hlink));
			
 
				-
			
 
				-	/* remove from session */
			
 
				-	list_del(&ndl->ndl_link);
			
 
				-	list_del(&ndl->ndl_hlink);
			
 
				-
			
 
				-	kfree(nd);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_ndlink_find(struct list_head *hash, struct lnet_process_id id,
			
 
				-		   struct lstcon_ndlink **ndlpp, int create)
			
 
				-{
			
 
				-	unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_node *nd;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (id.nid == LNET_NID_ANY)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/* search in hash */
			
 
				-	list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
			
 
				-		if (ndl->ndl_node->nd_id.nid != id.nid ||
			
 
				-		    ndl->ndl_node->nd_id.pid != id.pid)
			
 
				-			continue;
			
 
				-
			
 
				-		*ndlpp = ndl;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!create)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	/* find or create in session hash */
			
 
				-	rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	ndl = kzalloc(sizeof(struct lstcon_ndlink), GFP_NOFS);
			
 
				-	if (!ndl) {
			
 
				-		lstcon_node_put(nd);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	*ndlpp = ndl;
			
 
				-
			
 
				-	ndl->ndl_node = nd;
			
 
				-	INIT_LIST_HEAD(&ndl->ndl_link);
			
 
				-	list_add_tail(&ndl->ndl_hlink, &hash[idx]);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_ndlink_release(struct lstcon_ndlink *ndl)
			
 
				-{
			
 
				-	LASSERT(list_empty(&ndl->ndl_link));
			
 
				-	LASSERT(!list_empty(&ndl->ndl_hlink));
			
 
				-
			
 
				-	list_del(&ndl->ndl_hlink); /* delete from hash */
			
 
				-	lstcon_node_put(ndl->ndl_node);
			
 
				-
			
 
				-	kfree(ndl);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_group_alloc(char *name, struct lstcon_group **grpp)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-	int i;
			
 
				-
			
 
				-	grp = kmalloc(offsetof(struct lstcon_group,
			
 
				-			       grp_ndl_hash[LST_NODE_HASHSIZE]),
			
 
				-		      GFP_KERNEL);
			
 
				-	if (!grp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	grp->grp_ref = 1;
			
 
				-	if (name) {
			
 
				-		if (strlen(name) > sizeof(grp->grp_name) - 1) {
			
 
				-			kfree(grp);
			
 
				-			return -E2BIG;
			
 
				-		}
			
 
				-		strncpy(grp->grp_name, name, sizeof(grp->grp_name));
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&grp->grp_link);
			
 
				-	INIT_LIST_HEAD(&grp->grp_ndl_list);
			
 
				-	INIT_LIST_HEAD(&grp->grp_trans_list);
			
 
				-
			
 
				-	for (i = 0; i < LST_NODE_HASHSIZE; i++)
			
 
				-		INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
			
 
				-
			
 
				-	*grpp = grp;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_group_addref(struct lstcon_group *grp)
			
 
				-{
			
 
				-	grp->grp_ref++;
			
 
				-}
			
 
				-
			
 
				-static void lstcon_group_ndlink_release(struct lstcon_group *,
			
 
				-					struct lstcon_ndlink *);
			
 
				-
			
 
				-static void
			
 
				-lstcon_group_drain(struct lstcon_group *grp, int keep)
			
 
				-{
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_ndlink *tmp;
			
 
				-
			
 
				-	list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
			
 
				-		if (!(ndl->ndl_node->nd_state & keep))
			
 
				-			lstcon_group_ndlink_release(grp, ndl);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_group_decref(struct lstcon_group *grp)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (--grp->grp_ref > 0)
			
 
				-		return;
			
 
				-
			
 
				-	if (!list_empty(&grp->grp_link))
			
 
				-		list_del(&grp->grp_link);
			
 
				-
			
 
				-	lstcon_group_drain(grp, 0);
			
 
				-
			
 
				-	for (i = 0; i < LST_NODE_HASHSIZE; i++)
			
 
				-		LASSERT(list_empty(&grp->grp_ndl_hash[i]));
			
 
				-
			
 
				-	kfree(grp);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_group_find(const char *name, struct lstcon_group **grpp)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-
			
 
				-	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
			
 
				-		if (strncmp(grp->grp_name, name, LST_NAME_SIZE))
			
 
				-			continue;
			
 
				-
			
 
				-		lstcon_group_addref(grp); /* +1 ref for caller */
			
 
				-		*grpp = grp;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_group_ndlink_find(struct lstcon_group *grp, struct lnet_process_id id,
			
 
				-			 struct lstcon_ndlink **ndlpp, int create)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	if (!list_empty(&(*ndlpp)->ndl_link))
			
 
				-		return 0;
			
 
				-
			
 
				-	list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
			
 
				-	grp->grp_nnode++;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl)
			
 
				-{
			
 
				-	list_del_init(&ndl->ndl_link);
			
 
				-	lstcon_ndlink_release(ndl);
			
 
				-	grp->grp_nnode--;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_group_ndlink_move(struct lstcon_group *old,
			
 
				-			 struct lstcon_group *new, struct lstcon_ndlink *ndl)
			
 
				-{
			
 
				-	unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
			
 
				-					LST_NODE_HASHSIZE;
			
 
				-
			
 
				-	list_del(&ndl->ndl_hlink);
			
 
				-	list_del(&ndl->ndl_link);
			
 
				-	old->grp_nnode--;
			
 
				-
			
 
				-	list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
			
 
				-	list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
			
 
				-	new->grp_nnode++;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new)
			
 
				-{
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-
			
 
				-	while (!list_empty(&old->grp_ndl_list)) {
			
 
				-		ndl = list_entry(old->grp_ndl_list.next,
			
 
				-				 struct lstcon_ndlink, ndl_link);
			
 
				-		lstcon_group_ndlink_move(old, new, ndl);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg)
			
 
				-{
			
 
				-	struct lstcon_group *grp = (struct lstcon_group *)arg;
			
 
				-
			
 
				-	switch (transop) {
			
 
				-	case LST_TRANS_SESNEW:
			
 
				-		if (nd->nd_state == LST_NODE_ACTIVE)
			
 
				-			return 0;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_SESEND:
			
 
				-		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				-			return 0;
			
 
				-
			
 
				-		if (grp && nd->nd_ref > 1)
			
 
				-			return 0;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_SESQRY:
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_sesrpc_readent(int transop, struct srpc_msg *msg,
			
 
				-		      struct lstcon_rpc_ent __user *ent_up)
			
 
				-{
			
 
				-	struct srpc_debug_reply *rep;
			
 
				-
			
 
				-	switch (transop) {
			
 
				-	case LST_TRANS_SESNEW:
			
 
				-	case LST_TRANS_SESEND:
			
 
				-		return 0;
			
 
				-
			
 
				-	case LST_TRANS_SESQRY:
			
 
				-		rep = &msg->msg_body.dbg_reply;
			
 
				-
			
 
				-		if (copy_to_user(&ent_up->rpe_priv[0],
			
 
				-				 &rep->dbg_timeout, sizeof(int)) ||
			
 
				-		    copy_to_user(&ent_up->rpe_payload[0],
			
 
				-				 &rep->dbg_name, LST_NAME_SIZE))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		return 0;
			
 
				-
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_group_nodes_add(struct lstcon_group *grp,
			
 
				-		       int count, struct lnet_process_id __user *ids_up,
			
 
				-		       unsigned int *featp,
			
 
				-		       struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_ndlink	*ndl;
			
 
				-	struct lstcon_group *tmp;
			
 
				-	struct lnet_process_id id;
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_alloc(NULL, &tmp);
			
 
				-	if (rc) {
			
 
				-		CERROR("Out of memory\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0 ; i < count; i++) {
			
 
				-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				-			rc = -EFAULT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* skip if it's in this group already */
			
 
				-		rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
			
 
				-		if (!rc)
			
 
				-			continue;
			
 
				-
			
 
				-		/* add to tmp group */
			
 
				-		rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't create ndlink, out of memory\n");
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (rc) {
			
 
				-		lstcon_group_decref(tmp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
			
 
				-				     &tmp->grp_trans_list, LST_TRANS_SESNEW,
			
 
				-				     tmp, lstcon_sesrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		lstcon_group_decref(tmp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* post all RPCs */
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				-					  lstcon_sesrpc_readent);
			
 
				-	*featp = trans->tas_features;
			
 
				-
			
 
				-	/* destroy all RPGs */
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	lstcon_group_move(tmp, grp);
			
 
				-	lstcon_group_decref(tmp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_group_nodes_remove(struct lstcon_group *grp,
			
 
				-			  int count, struct lnet_process_id __user *ids_up,
			
 
				-			  struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_group *tmp;
			
 
				-	struct lnet_process_id id;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	/* End session and remove node from the group */
			
 
				-
			
 
				-	rc = lstcon_group_alloc(NULL, &tmp);
			
 
				-	if (rc) {
			
 
				-		CERROR("Out of memory\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < count; i++) {
			
 
				-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				-			rc = -EFAULT;
			
 
				-			goto error;
			
 
				-		}
			
 
				-
			
 
				-		/* move node to tmp group */
			
 
				-		if (!lstcon_group_ndlink_find(grp, id, &ndl, 0))
			
 
				-			lstcon_group_ndlink_move(grp, tmp, ndl);
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
			
 
				-				     &tmp->grp_trans_list, LST_TRANS_SESEND,
			
 
				-				     tmp, lstcon_sesrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		goto error;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-	/* release nodes anyway, because we can't rollback status */
			
 
				-	lstcon_group_decref(tmp);
			
 
				-
			
 
				-	return rc;
			
 
				-error:
			
 
				-	lstcon_group_move(tmp, grp);
			
 
				-	lstcon_group_decref(tmp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_add(char *name)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST;
			
 
				-	if (rc) {
			
 
				-		/* find a group with same name */
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_group_alloc(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't allocate descriptor for group %s\n", name);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_nodes_add(char *name, int count, struct lnet_process_id __user *ids_up,
			
 
				-		 unsigned int *featp, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(count > 0);
			
 
				-	LASSERT(ids_up);
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (grp->grp_ref > 2) {
			
 
				-		/* referred by other threads or test */
			
 
				-		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				-		lstcon_group_decref(grp);
			
 
				-
			
 
				-		return -EBUSY;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_del(char *name)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_group *grp;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group: %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (grp->grp_ref > 2) {
			
 
				-		/* referred by others threads or test */
			
 
				-		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return -EBUSY;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
			
 
				-				     &grp->grp_trans_list, LST_TRANS_SESEND,
			
 
				-				     grp, lstcon_sesrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-	/*
			
 
				-	 * -ref for session, it's destroyed,
			
 
				-	 * status can't be rolled back, destroy group anyway
			
 
				-	 */
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_clean(char *name, int args)
			
 
				-{
			
 
				-	struct lstcon_group *grp = NULL;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (grp->grp_ref > 2) {
			
 
				-		/* referred by test */
			
 
				-		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return -EBUSY;
			
 
				-	}
			
 
				-
			
 
				-	args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
			
 
				-		LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
			
 
				-
			
 
				-	lstcon_group_drain(grp, args);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-	/* release empty group */
			
 
				-	if (list_empty(&grp->grp_ndl_list))
			
 
				-		lstcon_group_decref(grp);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_nodes_remove(char *name, int count,
			
 
				-		    struct lnet_process_id __user *ids_up,
			
 
				-		    struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_group *grp = NULL;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group: %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (grp->grp_ref > 2) {
			
 
				-		/* referred by test */
			
 
				-		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return -EBUSY;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-	/* release empty group */
			
 
				-	if (list_empty(&grp->grp_ndl_list))
			
 
				-		lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_refresh(char *name, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_group *grp;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group: %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (grp->grp_ref > 2) {
			
 
				-		/* referred by test */
			
 
				-		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return -EBUSY;
			
 
				-	}
			
 
				-
			
 
				-	/* re-invite all inactive nodes int the group */
			
 
				-	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
			
 
				-				     &grp->grp_trans_list, LST_TRANS_SESNEW,
			
 
				-				     grp, lstcon_sesrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		/* local error, return */
			
 
				-		CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-	/* -ref for me */
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_list(int index, int len, char __user *name_up)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-
			
 
				-	LASSERT(index >= 0);
			
 
				-	LASSERT(name_up);
			
 
				-
			
 
				-	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
			
 
				-		if (!index--) {
			
 
				-			return copy_to_user(name_up, grp->grp_name, len) ?
			
 
				-					    -EFAULT : 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_nodes_getent(struct list_head *head, int *index_p,
			
 
				-		    int *count_p, struct lstcon_node_ent __user *dents_up)
			
 
				-{
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_node *nd;
			
 
				-	int count = 0;
			
 
				-	int index = 0;
			
 
				-
			
 
				-	LASSERT(index_p && count_p);
			
 
				-	LASSERT(dents_up);
			
 
				-	LASSERT(*index_p >= 0);
			
 
				-	LASSERT(*count_p > 0);
			
 
				-
			
 
				-	list_for_each_entry(ndl, head, ndl_link) {
			
 
				-		if (index++ < *index_p)
			
 
				-			continue;
			
 
				-
			
 
				-		if (count >= *count_p)
			
 
				-			break;
			
 
				-
			
 
				-		nd = ndl->ndl_node;
			
 
				-		if (copy_to_user(&dents_up[count].nde_id,
			
 
				-				 &nd->nd_id, sizeof(nd->nd_id)) ||
			
 
				-		    copy_to_user(&dents_up[count].nde_state,
			
 
				-				 &nd->nd_state, sizeof(nd->nd_state)))
			
 
				-			return -EFAULT;
			
 
				-
			
 
				-		count++;
			
 
				-	}
			
 
				-
			
 
				-	if (index <= *index_p)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	*count_p = count;
			
 
				-	*index_p = index;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gents_p,
			
 
				-		  int *index_p, int *count_p,
			
 
				-		  struct lstcon_node_ent __user *dents_up)
			
 
				-{
			
 
				-	struct lstcon_ndlist_ent *gentp;
			
 
				-	struct lstcon_group *grp;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (dents_up) {
			
 
				-		/* verbose query */
			
 
				-		rc = lstcon_nodes_getent(&grp->grp_ndl_list,
			
 
				-					 index_p, count_p, dents_up);
			
 
				-		lstcon_group_decref(grp);
			
 
				-
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* non-verbose query */
			
 
				-	gentp = kzalloc(sizeof(struct lstcon_ndlist_ent), GFP_NOFS);
			
 
				-	if (!gentp) {
			
 
				-		CERROR("Can't allocate ndlist_ent\n");
			
 
				-		lstcon_group_decref(grp);
			
 
				-
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
			
 
				-		LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
			
 
				-
			
 
				-	rc = copy_to_user(gents_p, gentp,
			
 
				-			  sizeof(struct lstcon_ndlist_ent)) ? -EFAULT : 0;
			
 
				-
			
 
				-	kfree(gentp);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_batch_find(const char *name, struct lstcon_batch **batpp)
			
 
				-{
			
 
				-	struct lstcon_batch *bat;
			
 
				-
			
 
				-	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
			
 
				-		if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) {
			
 
				-			*batpp = bat;
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batch_add(char *name)
			
 
				-{
			
 
				-	struct lstcon_batch *bat;
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0;
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Batch %s already exists\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	bat = kzalloc(sizeof(struct lstcon_batch), GFP_NOFS);
			
 
				-	if (!bat) {
			
 
				-		CERROR("Can't allocate descriptor for batch %s\n", name);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	bat->bat_cli_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
			
 
				-				    GFP_KERNEL);
			
 
				-	if (!bat->bat_cli_hash) {
			
 
				-		CERROR("Can't allocate hash for batch %s\n", name);
			
 
				-		kfree(bat);
			
 
				-
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	bat->bat_srv_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
			
 
				-				    GFP_KERNEL);
			
 
				-	if (!bat->bat_srv_hash) {
			
 
				-		CERROR("Can't allocate hash for batch %s\n", name);
			
 
				-		kfree(bat->bat_cli_hash);
			
 
				-		kfree(bat);
			
 
				-
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	if (strlen(name) > sizeof(bat->bat_name) - 1) {
			
 
				-		kfree(bat->bat_srv_hash);
			
 
				-		kfree(bat->bat_cli_hash);
			
 
				-		kfree(bat);
			
 
				-		return -E2BIG;
			
 
				-	}
			
 
				-	strncpy(bat->bat_name, name, sizeof(bat->bat_name));
			
 
				-	bat->bat_hdr.tsb_index = 0;
			
 
				-	bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
			
 
				-
			
 
				-	bat->bat_ntest = 0;
			
 
				-	bat->bat_state = LST_BATCH_IDLE;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&bat->bat_cli_list);
			
 
				-	INIT_LIST_HEAD(&bat->bat_srv_list);
			
 
				-	INIT_LIST_HEAD(&bat->bat_test_list);
			
 
				-	INIT_LIST_HEAD(&bat->bat_trans_list);
			
 
				-
			
 
				-	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
			
 
				-		INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
			
 
				-		INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
			
 
				-	}
			
 
				-
			
 
				-	list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batch_list(int index, int len, char __user *name_up)
			
 
				-{
			
 
				-	struct lstcon_batch *bat;
			
 
				-
			
 
				-	LASSERT(name_up);
			
 
				-	LASSERT(index >= 0);
			
 
				-
			
 
				-	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
			
 
				-		if (!index--) {
			
 
				-			return copy_to_user(name_up, bat->bat_name, len) ?
			
 
				-					    -EFAULT : 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
			
 
				-		  int server, int testidx, int *index_p, int *ndent_p,
			
 
				-		  struct lstcon_node_ent __user *dents_up)
			
 
				-{
			
 
				-	struct lstcon_test_batch_ent *entp;
			
 
				-	struct list_head *clilst;
			
 
				-	struct list_head *srvlst;
			
 
				-	struct lstcon_test *test = NULL;
			
 
				-	struct lstcon_batch *bat;
			
 
				-	struct lstcon_ndlink	*ndl;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_batch_find(name, &bat);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	if (testidx > 0) {
			
 
				-		/* query test, test index start from 1 */
			
 
				-		list_for_each_entry(test, &bat->bat_test_list, tes_link) {
			
 
				-			if (testidx-- == 1)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		if (testidx > 0) {
			
 
				-			CDEBUG(D_NET, "Can't find specified test in batch\n");
			
 
				-			return -ENOENT;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	clilst = !test ? &bat->bat_cli_list :
			
 
				-			 &test->tes_src_grp->grp_ndl_list;
			
 
				-	srvlst = !test ? &bat->bat_srv_list :
			
 
				-			 &test->tes_dst_grp->grp_ndl_list;
			
 
				-
			
 
				-	if (dents_up) {
			
 
				-		rc = lstcon_nodes_getent((server ? srvlst : clilst),
			
 
				-					 index_p, ndent_p, dents_up);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	/* non-verbose query */
			
 
				-	entp = kzalloc(sizeof(struct lstcon_test_batch_ent), GFP_NOFS);
			
 
				-	if (!entp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	if (!test) {
			
 
				-		entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
			
 
				-		entp->u.tbe_batch.bae_state = bat->bat_state;
			
 
				-	} else {
			
 
				-		entp->u.tbe_test.tse_type = test->tes_type;
			
 
				-		entp->u.tbe_test.tse_loop = test->tes_loop;
			
 
				-		entp->u.tbe_test.tse_concur = test->tes_concur;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(ndl, clilst, ndl_link)
			
 
				-		LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
			
 
				-
			
 
				-	list_for_each_entry(ndl, srvlst, ndl_link)
			
 
				-		LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
			
 
				-
			
 
				-	rc = copy_to_user(ent_up, entp,
			
 
				-			  sizeof(struct lstcon_test_batch_ent)) ? -EFAULT : 0;
			
 
				-
			
 
				-	kfree(entp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg)
			
 
				-{
			
 
				-	switch (transop) {
			
 
				-	case LST_TRANS_TSBRUN:
			
 
				-		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				-			return -ENETDOWN;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_TSBSTOP:
			
 
				-		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				-			return 0;
			
 
				-		break;
			
 
				-
			
 
				-	case LST_TRANS_TSBCLIQRY:
			
 
				-	case LST_TRANS_TSBSRVQRY:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_batch_op(struct lstcon_batch *bat, int transop,
			
 
				-		struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
			
 
				-				     &bat->bat_trans_list, transop,
			
 
				-				     bat, lstcon_batrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_batch *bat;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (lstcon_batch_find(name, &bat)) {
			
 
				-		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	bat->bat_arg = timeout;
			
 
				-
			
 
				-	rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
			
 
				-
			
 
				-	/* mark batch as running if it's started in any node */
			
 
				-	if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0))
			
 
				-		bat->bat_state = LST_BATCH_RUNNING;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batch_stop(char *name, int force, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_batch *bat;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (lstcon_batch_find(name, &bat)) {
			
 
				-		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	bat->bat_arg = force;
			
 
				-
			
 
				-	rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
			
 
				-
			
 
				-	/* mark batch as stopped if all RPCs finished */
			
 
				-	if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0))
			
 
				-		bat->bat_state = LST_BATCH_IDLE;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_batch_destroy(struct lstcon_batch *bat)
			
 
				-{
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_test *test;
			
 
				-	int i;
			
 
				-
			
 
				-	list_del(&bat->bat_link);
			
 
				-
			
 
				-	while (!list_empty(&bat->bat_test_list)) {
			
 
				-		test = list_entry(bat->bat_test_list.next,
			
 
				-				  struct lstcon_test, tes_link);
			
 
				-		LASSERT(list_empty(&test->tes_trans_list));
			
 
				-
			
 
				-		list_del(&test->tes_link);
			
 
				-
			
 
				-		lstcon_group_decref(test->tes_src_grp);
			
 
				-		lstcon_group_decref(test->tes_dst_grp);
			
 
				-
			
 
				-		kfree(test);
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(list_empty(&bat->bat_trans_list));
			
 
				-
			
 
				-	while (!list_empty(&bat->bat_cli_list)) {
			
 
				-		ndl = list_entry(bat->bat_cli_list.next,
			
 
				-				 struct lstcon_ndlink, ndl_link);
			
 
				-		list_del_init(&ndl->ndl_link);
			
 
				-
			
 
				-		lstcon_ndlink_release(ndl);
			
 
				-	}
			
 
				-
			
 
				-	while (!list_empty(&bat->bat_srv_list)) {
			
 
				-		ndl = list_entry(bat->bat_srv_list.next,
			
 
				-				 struct lstcon_ndlink, ndl_link);
			
 
				-		list_del_init(&ndl->ndl_link);
			
 
				-
			
 
				-		lstcon_ndlink_release(ndl);
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
			
 
				-		LASSERT(list_empty(&bat->bat_cli_hash[i]));
			
 
				-		LASSERT(list_empty(&bat->bat_srv_hash[i]));
			
 
				-	}
			
 
				-
			
 
				-	kfree(bat->bat_cli_hash);
			
 
				-	kfree(bat->bat_srv_hash);
			
 
				-	kfree(bat);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg)
			
 
				-{
			
 
				-	struct lstcon_test *test;
			
 
				-	struct lstcon_batch *batch;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct list_head *hash;
			
 
				-	struct list_head *head;
			
 
				-
			
 
				-	test = (struct lstcon_test *)arg;
			
 
				-	LASSERT(test);
			
 
				-
			
 
				-	batch = test->tes_batch;
			
 
				-	LASSERT(batch);
			
 
				-
			
 
				-	if (test->tes_oneside &&
			
 
				-	    transop == LST_TRANS_TSBSRVADD)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				-		return -ENETDOWN;
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBCLIADD) {
			
 
				-		hash = batch->bat_cli_hash;
			
 
				-		head = &batch->bat_cli_list;
			
 
				-
			
 
				-	} else {
			
 
				-		LASSERT(transop == LST_TRANS_TSBSRVADD);
			
 
				-
			
 
				-		hash = batch->bat_srv_hash;
			
 
				-		head = &batch->bat_srv_list;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(nd->nd_id.nid != LNET_NID_ANY);
			
 
				-
			
 
				-	if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1))
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	if (list_empty(&ndl->ndl_link))
			
 
				-		list_add_tail(&ndl->ndl_link, head);
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_test_nodes_add(struct lstcon_test *test,
			
 
				-		      struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_group *grp;
			
 
				-	int transop;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(test->tes_src_grp);
			
 
				-	LASSERT(test->tes_dst_grp);
			
 
				-
			
 
				-	transop = LST_TRANS_TSBSRVADD;
			
 
				-	grp = test->tes_dst_grp;
			
 
				-again:
			
 
				-	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
			
 
				-				     &test->tes_trans_list, transop,
			
 
				-				     test, lstcon_testrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	if (lstcon_trans_stat()->trs_rpc_errno ||
			
 
				-	    lstcon_trans_stat()->trs_fwk_errno) {
			
 
				-		lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				-
			
 
				-		lstcon_rpc_trans_destroy(trans);
			
 
				-		/* return if any error */
			
 
				-		CDEBUG(D_NET, "Failed to add test %s, RPC error %d, framework error %d\n",
			
 
				-		       transop == LST_TRANS_TSBCLIADD ? "client" : "server",
			
 
				-		       lstcon_trans_stat()->trs_rpc_errno,
			
 
				-		       lstcon_trans_stat()->trs_fwk_errno);
			
 
				-
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	if (transop == LST_TRANS_TSBCLIADD)
			
 
				-		return rc;
			
 
				-
			
 
				-	transop = LST_TRANS_TSBCLIADD;
			
 
				-	grp = test->tes_src_grp;
			
 
				-	test->tes_cliidx = 0;
			
 
				-
			
 
				-	/* requests to test clients */
			
 
				-	goto again;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_verify_batch(const char *name, struct lstcon_batch **batch)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_batch_find(name, batch);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if ((*batch)->bat_state != LST_BATCH_IDLE) {
			
 
				-		CDEBUG(D_NET, "Can't change running batch %s\n", name);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_verify_group(const char *name, struct lstcon_group **grp)
			
 
				-{
			
 
				-	int rc;
			
 
				-	struct lstcon_ndlink	*ndl;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "can't find group %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) {
			
 
				-		if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name);
			
 
				-
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_test_add(char *batch_name, int type, int loop,
			
 
				-		int concur, int dist, int span,
			
 
				-		char *src_name, char *dst_name,
			
 
				-		void *param, int paramlen, int *retp,
			
 
				-		struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_test *test = NULL;
			
 
				-	int rc;
			
 
				-	struct lstcon_group *src_grp = NULL;
			
 
				-	struct lstcon_group *dst_grp = NULL;
			
 
				-	struct lstcon_batch *batch = NULL;
			
 
				-
			
 
				-	/*
			
 
				-	 * verify that a batch of the given name exists, and the groups
			
 
				-	 * that will be part of the batch exist and have at least one
			
 
				-	 * active node
			
 
				-	 */
			
 
				-	rc = lstcon_verify_batch(batch_name, &batch);
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	rc = lstcon_verify_group(src_name, &src_grp);
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	rc = lstcon_verify_group(dst_name, &dst_grp);
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (dst_grp->grp_userland)
			
 
				-		*retp = 1;
			
 
				-
			
 
				-	test = kzalloc(offsetof(struct lstcon_test, tes_param[paramlen]),
			
 
				-		       GFP_KERNEL);
			
 
				-	if (!test) {
			
 
				-		CERROR("Can't allocate test descriptor\n");
			
 
				-		rc = -ENOMEM;
			
 
				-
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id;
			
 
				-	test->tes_batch	= batch;
			
 
				-	test->tes_type = type;
			
 
				-	test->tes_oneside = 0; /* TODO */
			
 
				-	test->tes_loop = loop;
			
 
				-	test->tes_concur = concur;
			
 
				-	test->tes_stop_onerr = 1; /* TODO */
			
 
				-	test->tes_span = span;
			
 
				-	test->tes_dist = dist;
			
 
				-	test->tes_cliidx = 0; /* just used for creating RPC */
			
 
				-	test->tes_src_grp = src_grp;
			
 
				-	test->tes_dst_grp = dst_grp;
			
 
				-	INIT_LIST_HEAD(&test->tes_trans_list);
			
 
				-
			
 
				-	if (param) {
			
 
				-		test->tes_paramlen = paramlen;
			
 
				-		memcpy(&test->tes_param[0], param, paramlen);
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_test_nodes_add(test, result_up);
			
 
				-
			
 
				-	if (rc)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (lstcon_trans_stat()->trs_rpc_errno ||
			
 
				-	    lstcon_trans_stat()->trs_fwk_errno)
			
 
				-		CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type,
			
 
				-		       batch_name);
			
 
				-
			
 
				-	/* add to test list anyway, so user can check what's going on */
			
 
				-	list_add_tail(&test->tes_link, &batch->bat_test_list);
			
 
				-
			
 
				-	batch->bat_ntest++;
			
 
				-	test->tes_hdr.tsb_index = batch->bat_ntest;
			
 
				-
			
 
				-	/*  hold groups so nobody can change them */
			
 
				-	return rc;
			
 
				-out:
			
 
				-	kfree(test);
			
 
				-
			
 
				-	if (dst_grp)
			
 
				-		lstcon_group_decref(dst_grp);
			
 
				-
			
 
				-	if (src_grp)
			
 
				-		lstcon_group_decref(src_grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_test_find(struct lstcon_batch *batch, int idx,
			
 
				-		 struct lstcon_test **testpp)
			
 
				-{
			
 
				-	struct lstcon_test *test;
			
 
				-
			
 
				-	list_for_each_entry(test, &batch->bat_test_list, tes_link) {
			
 
				-		if (idx == test->tes_hdr.tsb_index) {
			
 
				-			*testpp = test;
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg,
			
 
				-		      struct lstcon_rpc_ent __user *ent_up)
			
 
				-{
			
 
				-	struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
			
 
				-
			
 
				-	LASSERT(transop == LST_TRANS_TSBCLIQRY ||
			
 
				-		transop == LST_TRANS_TSBSRVQRY);
			
 
				-
			
 
				-	/* positive errno, framework error code */
			
 
				-	if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active,
			
 
				-			 sizeof(rep->bar_active)))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_test_batch_query(char *name, int testidx, int client,
			
 
				-			int timeout, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct list_head *translist;
			
 
				-	struct list_head *ndlist;
			
 
				-	struct lstcon_tsb_hdr *hdr;
			
 
				-	struct lstcon_batch *batch;
			
 
				-	struct lstcon_test *test = NULL;
			
 
				-	int transop;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_batch_find(name, &batch);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find batch: %s\n", name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!testidx) {
			
 
				-		translist = &batch->bat_trans_list;
			
 
				-		ndlist = &batch->bat_cli_list;
			
 
				-		hdr = &batch->bat_hdr;
			
 
				-	} else {
			
 
				-		/* query specified test only */
			
 
				-		rc = lstcon_test_find(batch, testidx, &test);
			
 
				-		if (rc) {
			
 
				-			CDEBUG(D_NET, "Can't find test: %d\n", testidx);
			
 
				-			return rc;
			
 
				-		}
			
 
				-
			
 
				-		translist = &test->tes_trans_list;
			
 
				-		ndlist = &test->tes_src_grp->grp_ndl_list;
			
 
				-		hdr = &test->tes_hdr;
			
 
				-	}
			
 
				-
			
 
				-	transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
			
 
				-				     lstcon_batrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, timeout);
			
 
				-
			
 
				-	/* query a batch, not a test */
			
 
				-	if (!testidx &&
			
 
				-	    !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) &&
			
 
				-	    !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) {
			
 
				-		/* all RPCs finished, and no active test */
			
 
				-		batch->bat_state = LST_BATCH_IDLE;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				-					  lstcon_tsbrpc_readent);
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
			
 
				-		       struct lstcon_rpc_ent __user *ent_up)
			
 
				-{
			
 
				-	struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
			
 
				-	struct sfw_counters __user *sfwk_stat;
			
 
				-	struct srpc_counters __user *srpc_stat;
			
 
				-	struct lnet_counters __user *lnet_stat;
			
 
				-
			
 
				-	if (rep->str_status)
			
 
				-		return 0;
			
 
				-
			
 
				-	sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0];
			
 
				-	srpc_stat = (struct srpc_counters __user *)(sfwk_stat + 1);
			
 
				-	lnet_stat = (struct lnet_counters __user *)(srpc_stat + 1);
			
 
				-
			
 
				-	if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
			
 
				-	    copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
			
 
				-	    copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_ndlist_stat(struct list_head *ndlist,
			
 
				-		   int timeout, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct list_head head;
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	int rc;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&head);
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(ndlist, &head,
			
 
				-				     LST_TRANS_STATQRY, NULL, NULL, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				-					  lstcon_statrpc_readent);
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_stat(char *grp_name, int timeout,
			
 
				-		  struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(grp_name, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Can't find group %s\n", grp_name);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
			
 
				-		  int timeout, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_ndlink	*ndl;
			
 
				-	struct lstcon_group *tmp;
			
 
				-	struct lnet_process_id id;
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_alloc(NULL, &tmp);
			
 
				-	if (rc) {
			
 
				-		CERROR("Out of memory\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0 ; i < count; i++) {
			
 
				-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				-			rc = -EFAULT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* add to tmp group */
			
 
				-		rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
			
 
				-		if (rc) {
			
 
				-			CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
			
 
				-			       "Failed to find or create %s: %d\n",
			
 
				-			       libcfs_id2str(id), rc);
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (rc) {
			
 
				-		lstcon_group_decref(tmp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
			
 
				-
			
 
				-	lstcon_group_decref(tmp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_debug_ndlist(struct list_head *ndlist,
			
 
				-		    struct list_head *translist,
			
 
				-		    int timeout, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
			
 
				-				     NULL, lstcon_sesrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				-					  lstcon_sesrpc_readent);
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_session_debug(int timeout, struct list_head __user *result_up)
			
 
				-{
			
 
				-	return lstcon_debug_ndlist(&console_session.ses_ndl_list,
			
 
				-				   NULL, timeout, result_up);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_batch_debug(int timeout, char *name,
			
 
				-		   int client, struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_batch *bat;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_batch_find(name, &bat);
			
 
				-	if (rc)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
			
 
				-					  &bat->bat_srv_list,
			
 
				-				 NULL, timeout, result_up);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_group_debug(int timeout, char *name,
			
 
				-		   struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lstcon_group *grp;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_find(name, &grp);
			
 
				-	if (rc)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
			
 
				-				 timeout, result_up);
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_nodes_debug(int timeout, int count,
			
 
				-		   struct lnet_process_id __user *ids_up,
			
 
				-		   struct list_head __user *result_up)
			
 
				-{
			
 
				-	struct lnet_process_id id;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	struct lstcon_group *grp;
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	rc = lstcon_group_alloc(NULL, &grp);
			
 
				-	if (rc) {
			
 
				-		CDEBUG(D_NET, "Out of memory\n");
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < count; i++) {
			
 
				-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				-			rc = -EFAULT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		/* node is added to tmp group */
			
 
				-		rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
			
 
				-		if (rc) {
			
 
				-			CERROR("Can't create node link\n");
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (rc) {
			
 
				-		lstcon_group_decref(grp);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
			
 
				-				 timeout, result_up);
			
 
				-
			
 
				-	lstcon_group_decref(grp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_session_match(struct lst_sid sid)
			
 
				-{
			
 
				-	return (console_session.ses_id.ses_nid == sid.ses_nid &&
			
 
				-		console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-lstcon_new_session_id(struct lst_sid *sid)
			
 
				-{
			
 
				-	struct lnet_process_id id;
			
 
				-
			
 
				-	LASSERT(console_session.ses_state == LST_SESSION_NONE);
			
 
				-
			
 
				-	LNetGetId(1, &id);
			
 
				-	sid->ses_nid = id.nid;
			
 
				-	sid->ses_stamp = jiffies;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_session_new(char *name, int key, unsigned int feats,
			
 
				-		   int timeout, int force, struct lst_sid __user *sid_up)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	if (console_session.ses_state != LST_SESSION_NONE) {
			
 
				-		/* session exists */
			
 
				-		if (!force) {
			
 
				-			CNETERR("Session %s already exists\n",
			
 
				-				console_session.ses_name);
			
 
				-			return -EEXIST;
			
 
				-		}
			
 
				-
			
 
				-		rc = lstcon_session_end();
			
 
				-
			
 
				-		/* lstcon_session_end() only return local error */
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (feats & ~LST_FEATS_MASK) {
			
 
				-		CNETERR("Unknown session features %x\n",
			
 
				-			(feats & ~LST_FEATS_MASK));
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
			
 
				-		LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
			
 
				-
			
 
				-	lstcon_new_session_id(&console_session.ses_id);
			
 
				-
			
 
				-	console_session.ses_key = key;
			
 
				-	console_session.ses_state = LST_SESSION_ACTIVE;
			
 
				-	console_session.ses_force = !!force;
			
 
				-	console_session.ses_features = feats;
			
 
				-	console_session.ses_feats_updated = 0;
			
 
				-	console_session.ses_timeout = (timeout <= 0) ?
			
 
				-				      LST_CONSOLE_TIMEOUT : timeout;
			
 
				-
			
 
				-	if (strlen(name) > sizeof(console_session.ses_name) - 1)
			
 
				-		return -E2BIG;
			
 
				-	strlcpy(console_session.ses_name, name,
			
 
				-		sizeof(console_session.ses_name));
			
 
				-
			
 
				-	rc = lstcon_batch_add(LST_DEFAULT_BATCH);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	rc = lstcon_rpc_pinger_start();
			
 
				-	if (rc) {
			
 
				-		struct lstcon_batch *bat = NULL;
			
 
				-
			
 
				-		lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
			
 
				-		lstcon_batch_destroy(bat);
			
 
				-
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	if (!copy_to_user(sid_up, &console_session.ses_id,
			
 
				-			  sizeof(struct lst_sid)))
			
 
				-		return rc;
			
 
				-
			
 
				-	lstcon_session_end();
			
 
				-
			
 
				-	return -EFAULT;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_session_info(struct lst_sid __user *sid_up, int __user *key_up,
			
 
				-		    unsigned __user *featp,
			
 
				-		    struct lstcon_ndlist_ent __user *ndinfo_up,
			
 
				-		    char __user *name_up, int len)
			
 
				-{
			
 
				-	struct lstcon_ndlist_ent *entp;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (console_session.ses_state != LST_SESSION_ACTIVE)
			
 
				-		return -ESRCH;
			
 
				-
			
 
				-	entp = kzalloc(sizeof(*entp), GFP_NOFS);
			
 
				-	if (!entp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
			
 
				-		LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
			
 
				-
			
 
				-	if (copy_to_user(sid_up, &console_session.ses_id,
			
 
				-			 sizeof(*sid_up)) ||
			
 
				-	    copy_to_user(key_up, &console_session.ses_key,
			
 
				-			 sizeof(*key_up)) ||
			
 
				-	    copy_to_user(featp, &console_session.ses_features,
			
 
				-			 sizeof(*featp)) ||
			
 
				-	    copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
			
 
				-	    copy_to_user(name_up, console_session.ses_name, len))
			
 
				-		rc = -EFAULT;
			
 
				-
			
 
				-	kfree(entp);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_session_end(void)
			
 
				-{
			
 
				-	struct lstcon_rpc_trans *trans;
			
 
				-	struct lstcon_group *grp;
			
 
				-	struct lstcon_batch *bat;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	LASSERT(console_session.ses_state == LST_SESSION_ACTIVE);
			
 
				-
			
 
				-	rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
			
 
				-				     NULL, LST_TRANS_SESEND, NULL,
			
 
				-				     lstcon_sesrpc_condition, &trans);
			
 
				-	if (rc) {
			
 
				-		CERROR("Can't create transaction: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	console_session.ses_shutdown = 1;
			
 
				-
			
 
				-	lstcon_rpc_pinger_stop();
			
 
				-
			
 
				-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				-
			
 
				-	lstcon_rpc_trans_destroy(trans);
			
 
				-	/* User can do nothing even rpc failed, so go on */
			
 
				-
			
 
				-	/* waiting for orphan rpcs to die */
			
 
				-	lstcon_rpc_cleanup_wait();
			
 
				-
			
 
				-	console_session.ses_id = LST_INVALID_SID;
			
 
				-	console_session.ses_state = LST_SESSION_NONE;
			
 
				-	console_session.ses_key = 0;
			
 
				-	console_session.ses_force = 0;
			
 
				-	console_session.ses_feats_updated = 0;
			
 
				-
			
 
				-	/* destroy all batches */
			
 
				-	while (!list_empty(&console_session.ses_bat_list)) {
			
 
				-		bat = list_entry(console_session.ses_bat_list.next,
			
 
				-				 struct lstcon_batch, bat_link);
			
 
				-
			
 
				-		lstcon_batch_destroy(bat);
			
 
				-	}
			
 
				-
			
 
				-	/* destroy all groups */
			
 
				-	while (!list_empty(&console_session.ses_grp_list)) {
			
 
				-		grp = list_entry(console_session.ses_grp_list.next,
			
 
				-				 struct lstcon_group, grp_link);
			
 
				-		LASSERT(grp->grp_ref == 1);
			
 
				-
			
 
				-		lstcon_group_decref(grp);
			
 
				-	}
			
 
				-
			
 
				-	/* all nodes should be released */
			
 
				-	LASSERT(list_empty(&console_session.ses_ndl_list));
			
 
				-
			
 
				-	console_session.ses_shutdown = 0;
			
 
				-	console_session.ses_expired = 0;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_session_feats_check(unsigned int feats)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	if (feats & ~LST_FEATS_MASK) {
			
 
				-		CERROR("Can't support these features: %x\n",
			
 
				-		       (feats & ~LST_FEATS_MASK));
			
 
				-		return -EPROTO;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	if (!console_session.ses_feats_updated) {
			
 
				-		console_session.ses_feats_updated = 1;
			
 
				-		console_session.ses_features = feats;
			
 
				-	}
			
 
				-
			
 
				-	if (console_session.ses_features != feats)
			
 
				-		rc = -EPROTO;
			
 
				-
			
 
				-	spin_unlock(&console_session.ses_rpc_lock);
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("remote features %x do not match with session features %x of console\n",
			
 
				-		       feats, console_session.ses_features);
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lstcon_acceptor_handle(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_msg *rep	= &rpc->srpc_replymsg;
			
 
				-	struct srpc_msg *req	= &rpc->srpc_reqstbuf->buf_msg;
			
 
				-	struct srpc_join_reqst *jreq = &req->msg_body.join_reqst;
			
 
				-	struct srpc_join_reply *jrep = &rep->msg_body.join_reply;
			
 
				-	struct lstcon_group *grp = NULL;
			
 
				-	struct lstcon_ndlink *ndl;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	sfw_unpack_message(req);
			
 
				-
			
 
				-	mutex_lock(&console_session.ses_mutex);
			
 
				-
			
 
				-	jrep->join_sid = console_session.ses_id;
			
 
				-
			
 
				-	if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
			
 
				-		jrep->join_status = ESRCH;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (lstcon_session_feats_check(req->msg_ses_feats)) {
			
 
				-		jrep->join_status = EPROTO;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
			
 
				-	    !lstcon_session_match(jreq->join_sid)) {
			
 
				-		jrep->join_status = EBUSY;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (lstcon_group_find(jreq->join_group, &grp)) {
			
 
				-		rc = lstcon_group_alloc(jreq->join_group, &grp);
			
 
				-		if (rc) {
			
 
				-			CERROR("Out of memory\n");
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		list_add_tail(&grp->grp_link,
			
 
				-			      &console_session.ses_grp_list);
			
 
				-		lstcon_group_addref(grp);
			
 
				-	}
			
 
				-
			
 
				-	if (grp->grp_ref > 2) {
			
 
				-		/* Group in using */
			
 
				-		jrep->join_status = EBUSY;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
			
 
				-	if (!rc) {
			
 
				-		jrep->join_status = EEXIST;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
			
 
				-	if (rc) {
			
 
				-		CERROR("Out of memory\n");
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	ndl->ndl_node->nd_state = LST_NODE_ACTIVE;
			
 
				-	ndl->ndl_node->nd_timeout = console_session.ses_timeout;
			
 
				-
			
 
				-	if (!grp->grp_userland)
			
 
				-		grp->grp_userland = 1;
			
 
				-
			
 
				-	strlcpy(jrep->join_session, console_session.ses_name,
			
 
				-		sizeof(jrep->join_session));
			
 
				-	jrep->join_timeout = console_session.ses_timeout;
			
 
				-	jrep->join_status = 0;
			
 
				-
			
 
				-out:
			
 
				-	rep->msg_ses_feats = console_session.ses_features;
			
 
				-	if (grp)
			
 
				-		lstcon_group_decref(grp);
			
 
				-
			
 
				-	mutex_unlock(&console_session.ses_mutex);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static struct srpc_service lstcon_acceptor_service;
			
 
				-
			
 
				-static void lstcon_init_acceptor_service(void)
			
 
				-{
			
 
				-	/* initialize selftest console acceptor service table */
			
 
				-	lstcon_acceptor_service.sv_name = "join session";
			
 
				-	lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle;
			
 
				-	lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN;
			
 
				-	lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
			
 
				-}
			
 
				-
			
 
				-static struct notifier_block lstcon_ioctl_handler = {
			
 
				-	.notifier_call = lstcon_ioctl_entry,
			
 
				-};
			
 
				-
			
 
				-/* initialize console */
			
 
				-int
			
 
				-lstcon_console_init(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	memset(&console_session, 0, sizeof(struct lstcon_session));
			
 
				-
			
 
				-	console_session.ses_id = LST_INVALID_SID;
			
 
				-	console_session.ses_state = LST_SESSION_NONE;
			
 
				-	console_session.ses_timeout = 0;
			
 
				-	console_session.ses_force = 0;
			
 
				-	console_session.ses_expired = 0;
			
 
				-	console_session.ses_feats_updated = 0;
			
 
				-	console_session.ses_features = LST_FEATS_MASK;
			
 
				-	console_session.ses_laststamp = ktime_get_real_seconds();
			
 
				-
			
 
				-	mutex_init(&console_session.ses_mutex);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&console_session.ses_ndl_list);
			
 
				-	INIT_LIST_HEAD(&console_session.ses_grp_list);
			
 
				-	INIT_LIST_HEAD(&console_session.ses_bat_list);
			
 
				-	INIT_LIST_HEAD(&console_session.ses_trans_list);
			
 
				-
			
 
				-	console_session.ses_ndl_hash =
			
 
				-		kmalloc(sizeof(struct list_head) * LST_GLOBAL_HASHSIZE, GFP_KERNEL);
			
 
				-	if (!console_session.ses_ndl_hash)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
			
 
				-		INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
			
 
				-
			
 
				-	/* initialize acceptor service table */
			
 
				-	lstcon_init_acceptor_service();
			
 
				-
			
 
				-	rc = srpc_add_service(&lstcon_acceptor_service);
			
 
				-	LASSERT(rc != -EBUSY);
			
 
				-	if (rc) {
			
 
				-		kfree(console_session.ses_ndl_hash);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	rc = srpc_service_add_buffers(&lstcon_acceptor_service,
			
 
				-				      lstcon_acceptor_service.sv_wi_total);
			
 
				-	if (rc) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
			
 
				-					      &lstcon_ioctl_handler);
			
 
				-
			
 
				-	if (!rc) {
			
 
				-		lstcon_rpc_module_init();
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	srpc_shutdown_service(&lstcon_acceptor_service);
			
 
				-	srpc_remove_service(&lstcon_acceptor_service);
			
 
				-
			
 
				-	kfree(console_session.ses_ndl_hash);
			
 
				-
			
 
				-	srpc_wait_service_shutdown(&lstcon_acceptor_service);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-lstcon_console_fini(void)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	blocking_notifier_chain_unregister(&libcfs_ioctl_list,
			
 
				-					   &lstcon_ioctl_handler);
			
 
				-
			
 
				-	mutex_lock(&console_session.ses_mutex);
			
 
				-
			
 
				-	srpc_shutdown_service(&lstcon_acceptor_service);
			
 
				-	srpc_remove_service(&lstcon_acceptor_service);
			
 
				-
			
 
				-	if (console_session.ses_state != LST_SESSION_NONE)
			
 
				-		lstcon_session_end();
			
 
				-
			
 
				-	lstcon_rpc_module_fini();
			
 
				-
			
 
				-	mutex_unlock(&console_session.ses_mutex);
			
 
				-
			
 
				-	LASSERT(list_empty(&console_session.ses_ndl_list));
			
 
				-	LASSERT(list_empty(&console_session.ses_grp_list));
			
 
				-	LASSERT(list_empty(&console_session.ses_bat_list));
			
 
				-	LASSERT(list_empty(&console_session.ses_trans_list));
			
 
				-
			
 
				-	for (i = 0; i < LST_NODE_HASHSIZE; i++)
			
 
				-		LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
			
 
				-
			
 
				-	kfree(console_session.ses_ndl_hash);
			
 
				-
			
 
				-	srpc_wait_service_shutdown(&lstcon_acceptor_service);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -1,244 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/console.h
			
 
				- *
			
 
				- * kernel structure for LST console
			
 
				- *
			
 
				- * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef __LST_CONSOLE_H__
			
 
				-#define __LST_CONSOLE_H__
			
 
				-
			
 
				-#include <linux/lnet/lib-types.h>
			
 
				-#include <uapi/linux/lnet/lnetst.h>
			
 
				-#include "selftest.h"
			
 
				-#include "conrpc.h"
			
 
				-
			
 
				-/* node descriptor */
			
 
				-struct lstcon_node {
			
 
				-	struct lnet_process_id	nd_id;	/* id of the node */
			
 
				-	int		  nd_ref;     /* reference count */
			
 
				-	int		  nd_state;   /* state of the node */
			
 
				-	int		  nd_timeout; /* session timeout */
			
 
				-	unsigned long	  nd_stamp;   /* timestamp of last replied RPC */
			
 
				-	struct lstcon_rpc nd_ping;    /* ping rpc */
			
 
				-};
			
 
				-
			
 
				-/* node link descriptor */
			
 
				-struct lstcon_ndlink {
			
 
				-	struct list_head ndl_link;    /* chain on list */
			
 
				-	struct list_head ndl_hlink;   /* chain on hash */
			
 
				-	struct lstcon_node	*ndl_node;	/* pointer to node */
			
 
				-};
			
 
				-
			
 
				-/* (alias of nodes) group descriptor */
			
 
				-struct lstcon_group {
			
 
				-	struct list_head grp_link;		  /* chain on global group list
			
 
				-						   */
			
 
				-	int		 grp_ref;		  /* reference count */
			
 
				-	int		 grp_userland;		  /* has userland nodes */
			
 
				-	int		 grp_nnode;		  /* # of nodes */
			
 
				-	char		 grp_name[LST_NAME_SIZE]; /* group name */
			
 
				-
			
 
				-	struct list_head grp_trans_list;	  /* transaction list */
			
 
				-	struct list_head grp_ndl_list;		  /* nodes list */
			
 
				-	struct list_head grp_ndl_hash[0];	  /* hash table for nodes */
			
 
				-};
			
 
				-
			
 
				-#define LST_BATCH_IDLE	  0xB0	    /* idle batch */
			
 
				-#define LST_BATCH_RUNNING 0xB1	    /* running batch */
			
 
				-
			
 
				-struct lstcon_tsb_hdr {
			
 
				-	struct lst_bid	 tsb_id;	 /* batch ID */
			
 
				-	int		 tsb_index;	 /* test index */
			
 
				-};
			
 
				-
			
 
				-/* (tests ) batch descriptor */
			
 
				-struct lstcon_batch {
			
 
				-	struct lstcon_tsb_hdr	bat_hdr;	/* test_batch header */
			
 
				-	struct list_head bat_link;	  /* chain on session's batches list */
			
 
				-	int		 bat_ntest;	  /* # of test */
			
 
				-	int		 bat_state;	  /* state of the batch */
			
 
				-	int		 bat_arg;	  /* parameter for run|stop, timeout
			
 
				-					   * for run, force for stop
			
 
				-					   */
			
 
				-	char		 bat_name[LST_NAME_SIZE];/* name of batch */
			
 
				-
			
 
				-	struct list_head bat_test_list;   /* list head of tests (struct lstcon_test)
			
 
				-					   */
			
 
				-	struct list_head bat_trans_list;  /* list head of transaction */
			
 
				-	struct list_head bat_cli_list;	  /* list head of client nodes
			
 
				-					   * (struct lstcon_node)
			
 
				-					   */
			
 
				-	struct list_head *bat_cli_hash;   /* hash table of client nodes */
			
 
				-	struct list_head bat_srv_list;	  /* list head of server nodes */
			
 
				-	struct list_head *bat_srv_hash;   /* hash table of server nodes */
			
 
				-};
			
 
				-
			
 
				-/* a single test descriptor */
			
 
				-struct lstcon_test {
			
 
				-	struct lstcon_tsb_hdr	tes_hdr;	/* test batch header */
			
 
				-	struct list_head tes_link;	 /* chain on batch's tests list */
			
 
				-	struct lstcon_batch	*tes_batch;	 /* pointer to batch */
			
 
				-
			
 
				-	int		 tes_type;	 /* type of the test, i.e: bulk, ping */
			
 
				-	int		 tes_stop_onerr; /* stop on error */
			
 
				-	int		 tes_oneside;	 /* one-sided test */
			
 
				-	int		 tes_concur;	 /* concurrency */
			
 
				-	int		 tes_loop;	 /* loop count */
			
 
				-	int		 tes_dist;	 /* nodes distribution of target group */
			
 
				-	int		 tes_span;	 /* nodes span of target group */
			
 
				-	int		 tes_cliidx;	 /* client index, used for RPC creating */
			
 
				-
			
 
				-	struct list_head tes_trans_list; /* transaction list */
			
 
				-	struct lstcon_group	*tes_src_grp;	/* group run the test */
			
 
				-	struct lstcon_group	*tes_dst_grp;	/* target group */
			
 
				-
			
 
				-	int		 tes_paramlen;	 /* test parameter length */
			
 
				-	char		 tes_param[0];	 /* test parameter */
			
 
				-};
			
 
				-
			
 
				-#define LST_GLOBAL_HASHSIZE 503	     /* global nodes hash table size */
			
 
				-#define LST_NODE_HASHSIZE   239	     /* node hash table (for batch or group) */
			
 
				-
			
 
				-#define LST_SESSION_NONE    0x0	     /* no session */
			
 
				-#define LST_SESSION_ACTIVE  0x1	     /* working session */
			
 
				-
			
 
				-#define LST_CONSOLE_TIMEOUT 300	     /* default console timeout */
			
 
				-
			
 
				-struct lstcon_session {
			
 
				-	struct mutex	    ses_mutex;	      /* only 1 thread in session */
			
 
				-	struct lst_sid	    ses_id;	      /* global session id */
			
 
				-	int		    ses_key;	      /* local session key */
			
 
				-	int		    ses_state;	      /* state of session */
			
 
				-	int		    ses_timeout;      /* timeout in seconds */
			
 
				-	time64_t	    ses_laststamp;    /* last operation stamp (seconds)
			
 
				-					       */
			
 
				-	unsigned int	    ses_features;     /* tests features of the session
			
 
				-					       */
			
 
				-	unsigned int	    ses_feats_updated:1; /* features are synced with
			
 
				-						  * remote test nodes
			
 
				-						  */
			
 
				-	unsigned int	    ses_force:1;      /* force creating */
			
 
				-	unsigned int	    ses_shutdown:1;   /* session is shutting down */
			
 
				-	unsigned int	    ses_expired:1;    /* console is timedout */
			
 
				-	__u64		    ses_id_cookie;    /* batch id cookie */
			
 
				-	char		    ses_name[LST_NAME_SIZE];/* session name */
			
 
				-	struct lstcon_rpc_trans	*ses_ping;		/* session pinger */
			
 
				-	struct stt_timer	 ses_ping_timer;   /* timer for pinger */
			
 
				-	struct lstcon_trans_stat ses_trans_stat;   /* transaction stats */
			
 
				-
			
 
				-	struct list_head    ses_trans_list;   /* global list of transaction */
			
 
				-	struct list_head    ses_grp_list;     /* global list of groups */
			
 
				-	struct list_head    ses_bat_list;     /* global list of batches */
			
 
				-	struct list_head    ses_ndl_list;     /* global list of nodes */
			
 
				-	struct list_head    *ses_ndl_hash;    /* hash table of nodes */
			
 
				-
			
 
				-	spinlock_t	    ses_rpc_lock;     /* serialize */
			
 
				-	atomic_t	    ses_rpc_counter;  /* # of initialized RPCs */
			
 
				-	struct list_head    ses_rpc_freelist; /* idle console rpc */
			
 
				-}; /* session descriptor */
			
 
				-
			
 
				-extern struct lstcon_session	 console_session;
			
 
				-
			
 
				-static inline struct lstcon_trans_stat *
			
 
				-lstcon_trans_stat(void)
			
 
				-{
			
 
				-	return &console_session.ses_trans_stat;
			
 
				-}
			
 
				-
			
 
				-static inline struct list_head *
			
 
				-lstcon_id2hash(struct lnet_process_id id, struct list_head *hash)
			
 
				-{
			
 
				-	unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
			
 
				-
			
 
				-	return &hash[idx];
			
 
				-}
			
 
				-
			
 
				-int lstcon_ioctl_entry(struct notifier_block *nb,
			
 
				-		       unsigned long cmd, void *vdata);
			
 
				-int lstcon_console_init(void);
			
 
				-int lstcon_console_fini(void);
			
 
				-int lstcon_session_match(struct lst_sid sid);
			
 
				-int lstcon_session_new(char *name, int key, unsigned int version,
			
 
				-		       int timeout, int flags, struct lst_sid __user *sid_up);
			
 
				-int lstcon_session_info(struct lst_sid __user *sid_up, int __user *key,
			
 
				-			unsigned __user *verp, struct lstcon_ndlist_ent __user *entp,
			
 
				-			char __user *name_up, int len);
			
 
				-int lstcon_session_end(void);
			
 
				-int lstcon_session_debug(int timeout, struct list_head __user *result_up);
			
 
				-int lstcon_session_feats_check(unsigned int feats);
			
 
				-int lstcon_batch_debug(int timeout, char *name,
			
 
				-		       int client, struct list_head __user *result_up);
			
 
				-int lstcon_group_debug(int timeout, char *name,
			
 
				-		       struct list_head __user *result_up);
			
 
				-int lstcon_nodes_debug(int timeout, int nnd,
			
 
				-		       struct lnet_process_id __user *nds_up,
			
 
				-		       struct list_head __user *result_up);
			
 
				-int lstcon_group_add(char *name);
			
 
				-int lstcon_group_del(char *name);
			
 
				-int lstcon_group_clean(char *name, int args);
			
 
				-int lstcon_group_refresh(char *name, struct list_head __user *result_up);
			
 
				-int lstcon_nodes_add(char *name, int nnd, struct lnet_process_id __user *nds_up,
			
 
				-		     unsigned int *featp, struct list_head __user *result_up);
			
 
				-int lstcon_nodes_remove(char *name, int nnd,
			
 
				-			struct lnet_process_id __user *nds_up,
			
 
				-			struct list_head __user *result_up);
			
 
				-int lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gent_up,
			
 
				-		      int *index_p, int *ndent_p,
			
 
				-		      struct lstcon_node_ent __user *ndents_up);
			
 
				-int lstcon_group_list(int idx, int len, char __user *name_up);
			
 
				-int lstcon_batch_add(char *name);
			
 
				-int lstcon_batch_run(char *name, int timeout,
			
 
				-		     struct list_head __user *result_up);
			
 
				-int lstcon_batch_stop(char *name, int force,
			
 
				-		      struct list_head __user *result_up);
			
 
				-int lstcon_test_batch_query(char *name, int testidx,
			
 
				-			    int client, int timeout,
			
 
				-			    struct list_head __user *result_up);
			
 
				-int lstcon_batch_del(char *name);
			
 
				-int lstcon_batch_list(int idx, int namelen, char __user *name_up);
			
 
				-int lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
			
 
				-		      int server, int testidx, int *index_p,
			
 
				-		      int *ndent_p, struct lstcon_node_ent __user *dents_up);
			
 
				-int lstcon_group_stat(char *grp_name, int timeout,
			
 
				-		      struct list_head __user *result_up);
			
 
				-int lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
			
 
				-		      int timeout, struct list_head __user *result_up);
			
 
				-int lstcon_test_add(char *batch_name, int type, int loop,
			
 
				-		    int concur, int dist, int span,
			
 
				-		    char *src_name, char *dst_name,
			
 
				-		    void *param, int paramlen, int *retp,
			
 
				-		    struct list_head __user *result_up);
			
 
				-#endif
			
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -1,1786 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/framework.c
			
 
				- *
			
 
				- * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				- * Author: Liang Zhen  <liangzhen@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include "selftest.h"
			
 
				-
			
 
				-struct lst_sid LST_INVALID_SID = {LNET_NID_ANY, -1};
			
 
				-
			
 
				-static int session_timeout = 100;
			
 
				-module_param(session_timeout, int, 0444);
			
 
				-MODULE_PARM_DESC(session_timeout, "test session timeout in seconds (100 by default, 0 == never)");
			
 
				-
			
 
				-static int rpc_timeout = 64;
			
 
				-module_param(rpc_timeout, int, 0644);
			
 
				-MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)");
			
 
				-
			
 
				-#define sfw_unpack_id(id)		\
			
 
				-do {					\
			
 
				-	__swab64s(&(id).nid);		\
			
 
				-	__swab32s(&(id).pid);		\
			
 
				-} while (0)
			
 
				-
			
 
				-#define sfw_unpack_sid(sid)		\
			
 
				-do {					\
			
 
				-	__swab64s(&(sid).ses_nid);	\
			
 
				-	__swab64s(&(sid).ses_stamp);	\
			
 
				-} while (0)
			
 
				-
			
 
				-#define sfw_unpack_fw_counters(fc)	  \
			
 
				-do {					  \
			
 
				-	__swab32s(&(fc).running_ms);	  \
			
 
				-	__swab32s(&(fc).active_batches);  \
			
 
				-	__swab32s(&(fc).zombie_sessions); \
			
 
				-	__swab32s(&(fc).brw_errors);	  \
			
 
				-	__swab32s(&(fc).ping_errors);	  \
			
 
				-} while (0)
			
 
				-
			
 
				-#define sfw_unpack_rpc_counters(rc)	\
			
 
				-do {					\
			
 
				-	__swab32s(&(rc).errors);	\
			
 
				-	__swab32s(&(rc).rpcs_sent);	\
			
 
				-	__swab32s(&(rc).rpcs_rcvd);	\
			
 
				-	__swab32s(&(rc).rpcs_dropped);	\
			
 
				-	__swab32s(&(rc).rpcs_expired);	\
			
 
				-	__swab64s(&(rc).bulk_get);	\
			
 
				-	__swab64s(&(rc).bulk_put);	\
			
 
				-} while (0)
			
 
				-
			
 
				-#define sfw_unpack_lnet_counters(lc)	\
			
 
				-do {					\
			
 
				-	__swab32s(&(lc).errors);	\
			
 
				-	__swab32s(&(lc).msgs_max);	\
			
 
				-	__swab32s(&(lc).msgs_alloc);	\
			
 
				-	__swab32s(&(lc).send_count);	\
			
 
				-	__swab32s(&(lc).recv_count);	\
			
 
				-	__swab32s(&(lc).drop_count);	\
			
 
				-	__swab32s(&(lc).route_count);	\
			
 
				-	__swab64s(&(lc).send_length);	\
			
 
				-	__swab64s(&(lc).recv_length);	\
			
 
				-	__swab64s(&(lc).drop_length);	\
			
 
				-	__swab64s(&(lc).route_length);	\
			
 
				-} while (0)
			
 
				-
			
 
				-#define sfw_test_active(t)	(atomic_read(&(t)->tsi_nactive))
			
 
				-#define sfw_batch_active(b)	(atomic_read(&(b)->bat_nactive))
			
 
				-
			
 
				-static struct smoketest_framework {
			
 
				-	struct list_head  fw_zombie_rpcs;     /* RPCs to be recycled */
			
 
				-	struct list_head  fw_zombie_sessions; /* stopping sessions */
			
 
				-	struct list_head  fw_tests;	      /* registered test cases */
			
 
				-	atomic_t	  fw_nzombies;	      /* # zombie sessions */
			
 
				-	spinlock_t	  fw_lock;	      /* serialise */
			
 
				-	struct sfw_session	  *fw_session;	      /* _the_ session */
			
 
				-	int		  fw_shuttingdown;    /* shutdown in progress */
			
 
				-	struct srpc_server_rpc *fw_active_srpc;/* running RPC */
			
 
				-} sfw_data;
			
 
				-
			
 
				-/* forward ref's */
			
 
				-int sfw_stop_batch(struct sfw_batch *tsb, int force);
			
 
				-void sfw_destroy_session(struct sfw_session *sn);
			
 
				-
			
 
				-static inline struct sfw_test_case *
			
 
				-sfw_find_test_case(int id)
			
 
				-{
			
 
				-	struct sfw_test_case *tsc;
			
 
				-
			
 
				-	LASSERT(id <= SRPC_SERVICE_MAX_ID);
			
 
				-	LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				-
			
 
				-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				-		if (tsc->tsc_srv_service->sv_id == id)
			
 
				-			return tsc;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_register_test(struct srpc_service *service,
			
 
				-		  struct sfw_test_client_ops *cliops)
			
 
				-{
			
 
				-	struct sfw_test_case *tsc;
			
 
				-
			
 
				-	if (sfw_find_test_case(service->sv_id)) {
			
 
				-		CERROR("Failed to register test %s (%d)\n",
			
 
				-		       service->sv_name, service->sv_id);
			
 
				-		return -EEXIST;
			
 
				-	}
			
 
				-
			
 
				-	tsc = kzalloc(sizeof(struct sfw_test_case), GFP_NOFS);
			
 
				-	if (!tsc)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	tsc->tsc_cli_ops = cliops;
			
 
				-	tsc->tsc_srv_service = service;
			
 
				-
			
 
				-	list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_add_session_timer(void)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	struct stt_timer *timer = &sn->sn_timer;
			
 
				-
			
 
				-	LASSERT(!sfw_data.fw_shuttingdown);
			
 
				-
			
 
				-	if (!sn || !sn->sn_timeout)
			
 
				-		return;
			
 
				-
			
 
				-	LASSERT(!sn->sn_timer_active);
			
 
				-
			
 
				-	sn->sn_timer_active = 1;
			
 
				-	timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout;
			
 
				-	stt_add_timer(timer);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_del_session_timer(void)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-
			
 
				-	if (!sn || !sn->sn_timer_active)
			
 
				-		return 0;
			
 
				-
			
 
				-	LASSERT(sn->sn_timeout);
			
 
				-
			
 
				-	if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
			
 
				-		sn->sn_timer_active = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	return -EBUSY; /* racing with sfw_session_expired() */
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_deactivate_session(void)
			
 
				-__must_hold(&sfw_data.fw_lock)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	int nactive = 0;
			
 
				-	struct sfw_batch *tsb;
			
 
				-	struct sfw_test_case *tsc;
			
 
				-
			
 
				-	if (!sn)
			
 
				-		return;
			
 
				-
			
 
				-	LASSERT(!sn->sn_timer_active);
			
 
				-
			
 
				-	sfw_data.fw_session = NULL;
			
 
				-	atomic_inc(&sfw_data.fw_nzombies);
			
 
				-	list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
			
 
				-
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				-		srpc_abort_service(tsc->tsc_srv_service);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
			
 
				-		if (sfw_batch_active(tsb)) {
			
 
				-			nactive++;
			
 
				-			sfw_stop_batch(tsb, 1);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (nactive)
			
 
				-		return;	/* wait for active batches to stop */
			
 
				-
			
 
				-	list_del_init(&sn->sn_list);
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	sfw_destroy_session(sn);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_session_expired(void *data)
			
 
				-{
			
 
				-	struct sfw_session *sn = data;
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	LASSERT(sn->sn_timer_active);
			
 
				-	LASSERT(sn == sfw_data.fw_session);
			
 
				-
			
 
				-	CWARN("Session expired! sid: %s-%llu, name: %s\n",
			
 
				-	      libcfs_nid2str(sn->sn_id.ses_nid),
			
 
				-	      sn->sn_id.ses_stamp, &sn->sn_name[0]);
			
 
				-
			
 
				-	sn->sn_timer_active = 0;
			
 
				-	sfw_deactivate_session();
			
 
				-
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-sfw_init_session(struct sfw_session *sn, struct lst_sid sid,
			
 
				-		 unsigned int features, const char *name)
			
 
				-{
			
 
				-	struct stt_timer *timer = &sn->sn_timer;
			
 
				-
			
 
				-	memset(sn, 0, sizeof(struct sfw_session));
			
 
				-	INIT_LIST_HEAD(&sn->sn_list);
			
 
				-	INIT_LIST_HEAD(&sn->sn_batches);
			
 
				-	atomic_set(&sn->sn_refcount, 1);	/* +1 for caller */
			
 
				-	atomic_set(&sn->sn_brw_errors, 0);
			
 
				-	atomic_set(&sn->sn_ping_errors, 0);
			
 
				-	strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name));
			
 
				-
			
 
				-	sn->sn_timer_active = 0;
			
 
				-	sn->sn_id = sid;
			
 
				-	sn->sn_features = features;
			
 
				-	sn->sn_timeout = session_timeout;
			
 
				-	sn->sn_started = jiffies;
			
 
				-
			
 
				-	timer->stt_data = sn;
			
 
				-	timer->stt_func = sfw_session_expired;
			
 
				-	INIT_LIST_HEAD(&timer->stt_list);
			
 
				-}
			
 
				-
			
 
				-/* completion handler for incoming framework RPCs */
			
 
				-static void
			
 
				-sfw_server_rpc_done(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_service *sv	= rpc->srpc_scd->scd_svc;
			
 
				-	int status = rpc->srpc_status;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n",
			
 
				-	       sv->sv_name, libcfs_id2str(rpc->srpc_peer),
			
 
				-	       swi_state2str(rpc->srpc_wi.swi_state),
			
 
				-	       status);
			
 
				-
			
 
				-	if (rpc->srpc_bulk)
			
 
				-		sfw_free_pages(rpc);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_client_rpc_fini(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	LASSERT(!rpc->crpc_bulk.bk_niov);
			
 
				-	LASSERT(list_empty(&rpc->crpc_list));
			
 
				-	LASSERT(!atomic_read(&rpc->crpc_refcount));
			
 
				-
			
 
				-	CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n",
			
 
				-	       rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				-	       swi_state2str(rpc->crpc_wi.swi_state),
			
 
				-	       rpc->crpc_aborted, rpc->crpc_status);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	/* my callers must finish all RPCs before shutting me down */
			
 
				-	LASSERT(!sfw_data.fw_shuttingdown);
			
 
				-	list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
			
 
				-
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-}
			
 
				-
			
 
				-static struct sfw_batch *
			
 
				-sfw_find_batch(struct lst_bid bid)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	struct sfw_batch *bat;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-
			
 
				-	list_for_each_entry(bat, &sn->sn_batches, bat_list) {
			
 
				-		if (bat->bat_id.bat_id == bid.bat_id)
			
 
				-			return bat;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct sfw_batch *
			
 
				-sfw_bid2batch(struct lst_bid bid)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	struct sfw_batch *bat;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-
			
 
				-	bat = sfw_find_batch(bid);
			
 
				-	if (bat)
			
 
				-		return bat;
			
 
				-
			
 
				-	bat = kzalloc(sizeof(struct sfw_batch), GFP_NOFS);
			
 
				-	if (!bat)
			
 
				-		return NULL;
			
 
				-
			
 
				-	bat->bat_error = 0;
			
 
				-	bat->bat_session = sn;
			
 
				-	bat->bat_id = bid;
			
 
				-	atomic_set(&bat->bat_nactive, 0);
			
 
				-	INIT_LIST_HEAD(&bat->bat_tests);
			
 
				-
			
 
				-	list_add_tail(&bat->bat_list, &sn->sn_batches);
			
 
				-	return bat;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	struct sfw_counters *cnt = &reply->str_fw;
			
 
				-	struct sfw_batch *bat;
			
 
				-
			
 
				-	reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id;
			
 
				-
			
 
				-	if (request->str_sid.ses_nid == LNET_NID_ANY) {
			
 
				-		reply->str_status = EINVAL;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
			
 
				-		reply->str_status = ESRCH;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	lnet_counters_get(&reply->str_lnet);
			
 
				-	srpc_get_counters(&reply->str_rpc);
			
 
				-
			
 
				-	/*
			
 
				-	 * send over the msecs since the session was started
			
 
				-	 * with 32 bits to send, this is ~49 days
			
 
				-	 */
			
 
				-	cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started);
			
 
				-	cnt->brw_errors = atomic_read(&sn->sn_brw_errors);
			
 
				-	cnt->ping_errors = atomic_read(&sn->sn_ping_errors);
			
 
				-	cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
			
 
				-
			
 
				-	cnt->active_batches = 0;
			
 
				-	list_for_each_entry(bat, &sn->sn_batches, bat_list) {
			
 
				-		if (atomic_read(&bat->bat_nactive) > 0)
			
 
				-			cnt->active_batches++;
			
 
				-	}
			
 
				-
			
 
				-	reply->str_status = 0;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	struct srpc_msg *msg = container_of(request, struct srpc_msg,
			
 
				-				       msg_body.mksn_reqst);
			
 
				-	int cplen = 0;
			
 
				-
			
 
				-	if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
			
 
				-		reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
			
 
				-		reply->mksn_status = EINVAL;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (sn) {
			
 
				-		reply->mksn_status = 0;
			
 
				-		reply->mksn_sid = sn->sn_id;
			
 
				-		reply->mksn_timeout = sn->sn_timeout;
			
 
				-
			
 
				-		if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
			
 
				-			atomic_inc(&sn->sn_refcount);
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		if (!request->mksn_force) {
			
 
				-			reply->mksn_status = EBUSY;
			
 
				-			cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0],
			
 
				-					sizeof(reply->mksn_name));
			
 
				-			if (cplen >= sizeof(reply->mksn_name))
			
 
				-				return -E2BIG;
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * reject the request if it requires unknown features
			
 
				-	 * NB: old version will always accept all features because it's not
			
 
				-	 * aware of srpc_msg::msg_ses_feats, it's a defect but it's also
			
 
				-	 * harmless because it will return zero feature to console, and it's
			
 
				-	 * console's responsibility to make sure all nodes in a session have
			
 
				-	 * same feature mask.
			
 
				-	 */
			
 
				-	if (msg->msg_ses_feats & ~LST_FEATS_MASK) {
			
 
				-		reply->mksn_status = EPROTO;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* brand new or create by force */
			
 
				-	sn = kzalloc(sizeof(struct sfw_session), GFP_NOFS);
			
 
				-	if (!sn) {
			
 
				-		CERROR("dropping RPC mksn under memory pressure\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	sfw_init_session(sn, request->mksn_sid,
			
 
				-			 msg->msg_ses_feats, &request->mksn_name[0]);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	sfw_deactivate_session();
			
 
				-	LASSERT(!sfw_data.fw_session);
			
 
				-	sfw_data.fw_session = sn;
			
 
				-
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	reply->mksn_status = 0;
			
 
				-	reply->mksn_sid = sn->sn_id;
			
 
				-	reply->mksn_timeout = sn->sn_timeout;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_remove_session(struct srpc_rmsn_reqst *request,
			
 
				-		   struct srpc_rmsn_reply *reply)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-
			
 
				-	reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
			
 
				-
			
 
				-	if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
			
 
				-		reply->rmsn_status = EINVAL;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
			
 
				-		reply->rmsn_status = !sn ? ESRCH : EBUSY;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!atomic_dec_and_test(&sn->sn_refcount)) {
			
 
				-		reply->rmsn_status = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-	sfw_deactivate_session();
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	reply->rmsn_status = 0;
			
 
				-	reply->rmsn_sid = LST_INVALID_SID;
			
 
				-	LASSERT(!sfw_data.fw_session);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_debug_session(struct srpc_debug_reqst *request,
			
 
				-		  struct srpc_debug_reply *reply)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-
			
 
				-	if (!sn) {
			
 
				-		reply->dbg_status = ESRCH;
			
 
				-		reply->dbg_sid = LST_INVALID_SID;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	reply->dbg_status = 0;
			
 
				-	reply->dbg_sid = sn->sn_id;
			
 
				-	reply->dbg_timeout = sn->sn_timeout;
			
 
				-	if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name))
			
 
				-	    >= sizeof(reply->dbg_name))
			
 
				-		return -E2BIG;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_test_rpc_fini(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct sfw_test_unit *tsu = rpc->crpc_priv;
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-
			
 
				-	/* Called with hold of tsi->tsi_lock */
			
 
				-	LASSERT(list_empty(&rpc->crpc_list));
			
 
				-	list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-sfw_test_buffers(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct sfw_test_case *tsc;
			
 
				-	struct srpc_service *svc;
			
 
				-	int nbuf;
			
 
				-
			
 
				-	LASSERT(tsi);
			
 
				-	tsc = sfw_find_test_case(tsi->tsi_service);
			
 
				-	LASSERT(tsc);
			
 
				-	svc = tsc->tsc_srv_service;
			
 
				-	LASSERT(svc);
			
 
				-
			
 
				-	nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
			
 
				-	return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_load_test(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct sfw_test_case *tsc;
			
 
				-	struct srpc_service *svc;
			
 
				-	int nbuf;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(tsi);
			
 
				-	tsc = sfw_find_test_case(tsi->tsi_service);
			
 
				-	nbuf = sfw_test_buffers(tsi);
			
 
				-	LASSERT(tsc);
			
 
				-	svc = tsc->tsc_srv_service;
			
 
				-
			
 
				-	if (tsi->tsi_is_client) {
			
 
				-		tsi->tsi_ops = tsc->tsc_cli_ops;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	rc = srpc_service_add_buffers(svc, nbuf);
			
 
				-	if (rc) {
			
 
				-		CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
			
 
				-		      svc->sv_name, nbuf, rc);
			
 
				-		/*
			
 
				-		 * NB: this error handler is not strictly correct, because
			
 
				-		 * it may release more buffers than already allocated,
			
 
				-		 * but it doesn't matter because request portal should
			
 
				-		 * be lazy portal and will grow buffers if necessary.
			
 
				-		 */
			
 
				-		srpc_service_remove_buffers(svc, nbuf);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
			
 
				-	       nbuf * (srpc_serv_is_framework(svc) ?
			
 
				-		       2 : cfs_cpt_number(cfs_cpt_tab)), svc->sv_name);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_unload_test(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct sfw_test_case *tsc;
			
 
				-
			
 
				-	LASSERT(tsi);
			
 
				-	tsc = sfw_find_test_case(tsi->tsi_service);
			
 
				-	LASSERT(tsc);
			
 
				-
			
 
				-	if (tsi->tsi_is_client)
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * shrink buffers, because request portal is lazy portal
			
 
				-	 * which can grow buffers at runtime so we may leave
			
 
				-	 * some buffers behind, but never mind...
			
 
				-	 */
			
 
				-	srpc_service_remove_buffers(tsc->tsc_srv_service,
			
 
				-				    sfw_test_buffers(tsi));
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_destroy_test_instance(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-	struct sfw_test_unit *tsu;
			
 
				-
			
 
				-	if (!tsi->tsi_is_client)
			
 
				-		goto clean;
			
 
				-
			
 
				-	tsi->tsi_ops->tso_fini(tsi);
			
 
				-
			
 
				-	LASSERT(!tsi->tsi_stopping);
			
 
				-	LASSERT(list_empty(&tsi->tsi_active_rpcs));
			
 
				-	LASSERT(!sfw_test_active(tsi));
			
 
				-
			
 
				-	while (!list_empty(&tsi->tsi_units)) {
			
 
				-		tsu = list_entry(tsi->tsi_units.next,
			
 
				-				 struct sfw_test_unit, tsu_list);
			
 
				-		list_del(&tsu->tsu_list);
			
 
				-		kfree(tsu);
			
 
				-	}
			
 
				-
			
 
				-	while (!list_empty(&tsi->tsi_free_rpcs)) {
			
 
				-		rpc = list_entry(tsi->tsi_free_rpcs.next,
			
 
				-				 struct srpc_client_rpc, crpc_list);
			
 
				-		list_del(&rpc->crpc_list);
			
 
				-		kfree(rpc);
			
 
				-	}
			
 
				-
			
 
				-clean:
			
 
				-	sfw_unload_test(tsi);
			
 
				-	kfree(tsi);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_destroy_batch(struct sfw_batch *tsb)
			
 
				-{
			
 
				-	struct sfw_test_instance *tsi;
			
 
				-
			
 
				-	LASSERT(!sfw_batch_active(tsb));
			
 
				-	LASSERT(list_empty(&tsb->bat_list));
			
 
				-
			
 
				-	while (!list_empty(&tsb->bat_tests)) {
			
 
				-		tsi = list_entry(tsb->bat_tests.next,
			
 
				-				 struct sfw_test_instance, tsi_list);
			
 
				-		list_del_init(&tsi->tsi_list);
			
 
				-		sfw_destroy_test_instance(tsi);
			
 
				-	}
			
 
				-
			
 
				-	kfree(tsb);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-sfw_destroy_session(struct sfw_session *sn)
			
 
				-{
			
 
				-	struct sfw_batch *batch;
			
 
				-
			
 
				-	LASSERT(list_empty(&sn->sn_list));
			
 
				-	LASSERT(sn != sfw_data.fw_session);
			
 
				-
			
 
				-	while (!list_empty(&sn->sn_batches)) {
			
 
				-		batch = list_entry(sn->sn_batches.next,
			
 
				-				   struct sfw_batch, bat_list);
			
 
				-		list_del_init(&batch->bat_list);
			
 
				-		sfw_destroy_batch(batch);
			
 
				-	}
			
 
				-
			
 
				-	kfree(sn);
			
 
				-	atomic_dec(&sfw_data.fw_nzombies);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_unpack_addtest_req(struct srpc_msg *msg)
			
 
				-{
			
 
				-	struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
			
 
				-
			
 
				-	LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST);
			
 
				-	LASSERT(req->tsr_is_client);
			
 
				-
			
 
				-	if (msg->msg_magic == SRPC_MSG_MAGIC)
			
 
				-		return;	/* no flipping needed */
			
 
				-
			
 
				-	LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				-
			
 
				-	if (req->tsr_service == SRPC_SERVICE_BRW) {
			
 
				-		if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
			
 
				-			struct test_bulk_req *bulk = &req->tsr_u.bulk_v0;
			
 
				-
			
 
				-			__swab32s(&bulk->blk_opc);
			
 
				-			__swab32s(&bulk->blk_npg);
			
 
				-			__swab32s(&bulk->blk_flags);
			
 
				-
			
 
				-		} else {
			
 
				-			struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1;
			
 
				-
			
 
				-			__swab16s(&bulk->blk_opc);
			
 
				-			__swab16s(&bulk->blk_flags);
			
 
				-			__swab32s(&bulk->blk_offset);
			
 
				-			__swab32s(&bulk->blk_len);
			
 
				-		}
			
 
				-
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (req->tsr_service == SRPC_SERVICE_PING) {
			
 
				-		struct test_ping_req *ping = &req->tsr_u.ping;
			
 
				-
			
 
				-		__swab32s(&ping->png_size);
			
 
				-		__swab32s(&ping->png_flags);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LBUG();
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				-	struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
			
 
				-	struct srpc_bulk *bk = rpc->srpc_bulk;
			
 
				-	int ndest = req->tsr_ndest;
			
 
				-	struct sfw_test_unit *tsu;
			
 
				-	struct sfw_test_instance *tsi;
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-
			
 
				-	tsi = kzalloc(sizeof(*tsi), GFP_NOFS);
			
 
				-	if (!tsi) {
			
 
				-		CERROR("Can't allocate test instance for batch: %llu\n",
			
 
				-		       tsb->bat_id.bat_id);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	spin_lock_init(&tsi->tsi_lock);
			
 
				-	atomic_set(&tsi->tsi_nactive, 0);
			
 
				-	INIT_LIST_HEAD(&tsi->tsi_units);
			
 
				-	INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
			
 
				-	INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
			
 
				-
			
 
				-	tsi->tsi_stopping = 0;
			
 
				-	tsi->tsi_batch = tsb;
			
 
				-	tsi->tsi_loop = req->tsr_loop;
			
 
				-	tsi->tsi_concur = req->tsr_concur;
			
 
				-	tsi->tsi_service = req->tsr_service;
			
 
				-	tsi->tsi_is_client = !!(req->tsr_is_client);
			
 
				-	tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
			
 
				-
			
 
				-	rc = sfw_load_test(tsi);
			
 
				-	if (rc) {
			
 
				-		kfree(tsi);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!sfw_batch_active(tsb));
			
 
				-
			
 
				-	if (!tsi->tsi_is_client) {
			
 
				-		/* it's test server, just add it to tsb */
			
 
				-		list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(bk);
			
 
				-	LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
			
 
				-	LASSERT((unsigned int)bk->bk_len >=
			
 
				-		sizeof(struct lnet_process_id_packed) * ndest);
			
 
				-
			
 
				-	sfw_unpack_addtest_req(msg);
			
 
				-	memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
			
 
				-
			
 
				-	for (i = 0; i < ndest; i++) {
			
 
				-		struct lnet_process_id_packed *dests;
			
 
				-		struct lnet_process_id_packed id;
			
 
				-		int j;
			
 
				-
			
 
				-		dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
			
 
				-		LASSERT(dests);  /* my pages are within KVM always */
			
 
				-		id = dests[i % SFW_ID_PER_PAGE];
			
 
				-		if (msg->msg_magic != SRPC_MSG_MAGIC)
			
 
				-			sfw_unpack_id(id);
			
 
				-
			
 
				-		for (j = 0; j < tsi->tsi_concur; j++) {
			
 
				-			tsu = kzalloc(sizeof(struct sfw_test_unit), GFP_NOFS);
			
 
				-			if (!tsu) {
			
 
				-				rc = -ENOMEM;
			
 
				-				CERROR("Can't allocate tsu for %d\n",
			
 
				-				       tsi->tsi_service);
			
 
				-				goto error;
			
 
				-			}
			
 
				-
			
 
				-			tsu->tsu_dest.nid = id.nid;
			
 
				-			tsu->tsu_dest.pid = id.pid;
			
 
				-			tsu->tsu_instance = tsi;
			
 
				-			tsu->tsu_private = NULL;
			
 
				-			list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	rc = tsi->tsi_ops->tso_init(tsi);
			
 
				-	if (!rc) {
			
 
				-		list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-error:
			
 
				-	LASSERT(rc);
			
 
				-	sfw_destroy_test_instance(tsi);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_test_unit_done(struct sfw_test_unit *tsu)
			
 
				-{
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	struct sfw_batch *tsb = tsi->tsi_batch;
			
 
				-	struct sfw_session *sn = tsb->bat_session;
			
 
				-
			
 
				-	LASSERT(sfw_test_active(tsi));
			
 
				-
			
 
				-	if (!atomic_dec_and_test(&tsi->tsi_nactive))
			
 
				-		return;
			
 
				-
			
 
				-	/* the test instance is done */
			
 
				-	spin_lock(&tsi->tsi_lock);
			
 
				-
			
 
				-	tsi->tsi_stopping = 0;
			
 
				-
			
 
				-	spin_unlock(&tsi->tsi_lock);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	if (!atomic_dec_and_test(&tsb->bat_nactive) ||	/* tsb still active */
			
 
				-	    sn == sfw_data.fw_session) {		/* sn also active */
			
 
				-		spin_unlock(&sfw_data.fw_lock);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(!list_empty(&sn->sn_list)); /* I'm a zombie! */
			
 
				-
			
 
				-	list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
			
 
				-		if (sfw_batch_active(tsb)) {
			
 
				-			spin_unlock(&sfw_data.fw_lock);
			
 
				-			return;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	list_del_init(&sn->sn_list);
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	sfw_destroy_session(sn);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_test_rpc_done(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct sfw_test_unit *tsu = rpc->crpc_priv;
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	int done = 0;
			
 
				-
			
 
				-	tsi->tsi_ops->tso_done_rpc(tsu, rpc);
			
 
				-
			
 
				-	spin_lock(&tsi->tsi_lock);
			
 
				-
			
 
				-	LASSERT(sfw_test_active(tsi));
			
 
				-	LASSERT(!list_empty(&rpc->crpc_list));
			
 
				-
			
 
				-	list_del_init(&rpc->crpc_list);
			
 
				-
			
 
				-	/* batch is stopping or loop is done or get error */
			
 
				-	if (tsi->tsi_stopping || !tsu->tsu_loop ||
			
 
				-	    (rpc->crpc_status && tsi->tsi_stoptsu_onerr))
			
 
				-		done = 1;
			
 
				-
			
 
				-	/* dec ref for poster */
			
 
				-	srpc_client_rpc_decref(rpc);
			
 
				-
			
 
				-	spin_unlock(&tsi->tsi_lock);
			
 
				-
			
 
				-	if (!done) {
			
 
				-		swi_schedule_workitem(&tsu->tsu_worker);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	sfw_test_unit_done(tsu);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-sfw_create_test_rpc(struct sfw_test_unit *tsu, struct lnet_process_id peer,
			
 
				-		    unsigned int features, int nblk, int blklen,
			
 
				-		    struct srpc_client_rpc **rpcpp)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc = NULL;
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-
			
 
				-	spin_lock(&tsi->tsi_lock);
			
 
				-
			
 
				-	LASSERT(sfw_test_active(tsi));
			
 
				-		/* pick request from buffer */
			
 
				-	rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs,
			
 
				-				       struct srpc_client_rpc, crpc_list);
			
 
				-	if (rpc) {
			
 
				-		LASSERT(nblk == rpc->crpc_bulk.bk_niov);
			
 
				-		list_del_init(&rpc->crpc_list);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&tsi->tsi_lock);
			
 
				-
			
 
				-	if (!rpc) {
			
 
				-		rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
			
 
				-					     blklen, sfw_test_rpc_done,
			
 
				-					     sfw_test_rpc_fini, tsu);
			
 
				-	} else {
			
 
				-		srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
			
 
				-				     blklen, sfw_test_rpc_done,
			
 
				-				     sfw_test_rpc_fini, tsu);
			
 
				-	}
			
 
				-
			
 
				-	if (!rpc) {
			
 
				-		CERROR("Can't create rpc for test %d\n", tsi->tsi_service);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	rpc->crpc_reqstmsg.msg_ses_feats = features;
			
 
				-	*rpcpp = rpc;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-sfw_run_test(struct swi_workitem *wi)
			
 
				-{
			
 
				-	struct sfw_test_unit *tsu = container_of(wi, struct sfw_test_unit, tsu_worker);
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	struct srpc_client_rpc *rpc = NULL;
			
 
				-
			
 
				-	if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) {
			
 
				-		LASSERT(!rpc);
			
 
				-		goto test_done;
			
 
				-	}
			
 
				-
			
 
				-	LASSERT(rpc);
			
 
				-
			
 
				-	spin_lock(&tsi->tsi_lock);
			
 
				-
			
 
				-	if (tsi->tsi_stopping) {
			
 
				-		list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
			
 
				-		spin_unlock(&tsi->tsi_lock);
			
 
				-		goto test_done;
			
 
				-	}
			
 
				-
			
 
				-	if (tsu->tsu_loop > 0)
			
 
				-		tsu->tsu_loop--;
			
 
				-
			
 
				-	list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
			
 
				-	spin_unlock(&tsi->tsi_lock);
			
 
				-
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-	rpc->crpc_timeout = rpc_timeout;
			
 
				-	srpc_post_rpc(rpc);
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-	return;
			
 
				-
			
 
				-test_done:
			
 
				-	/*
			
 
				-	 * No one can schedule me now since:
			
 
				-	 * - previous RPC, if any, has done and
			
 
				-	 * - no new RPC is initiated.
			
 
				-	 * - my batch is still active; no one can run it again now.
			
 
				-	 * Cancel pending schedules and prevent future schedule attempts:
			
 
				-	 */
			
 
				-	sfw_test_unit_done(tsu);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_run_batch(struct sfw_batch *tsb)
			
 
				-{
			
 
				-	struct swi_workitem *wi;
			
 
				-	struct sfw_test_unit *tsu;
			
 
				-	struct sfw_test_instance *tsi;
			
 
				-
			
 
				-	if (sfw_batch_active(tsb)) {
			
 
				-		CDEBUG(D_NET, "Batch already active: %llu (%d)\n",
			
 
				-		       tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
			
 
				-		if (!tsi->tsi_is_client) /* skip server instances */
			
 
				-			continue;
			
 
				-
			
 
				-		LASSERT(!tsi->tsi_stopping);
			
 
				-		LASSERT(!sfw_test_active(tsi));
			
 
				-
			
 
				-		atomic_inc(&tsb->bat_nactive);
			
 
				-
			
 
				-		list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
			
 
				-			atomic_inc(&tsi->tsi_nactive);
			
 
				-			tsu->tsu_loop = tsi->tsi_loop;
			
 
				-			wi = &tsu->tsu_worker;
			
 
				-			swi_init_workitem(wi, sfw_run_test,
			
 
				-					  lst_test_wq[lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
			
 
				-			swi_schedule_workitem(wi);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-sfw_stop_batch(struct sfw_batch *tsb, int force)
			
 
				-{
			
 
				-	struct sfw_test_instance *tsi;
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-
			
 
				-	if (!sfw_batch_active(tsb)) {
			
 
				-		CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
			
 
				-		spin_lock(&tsi->tsi_lock);
			
 
				-
			
 
				-		if (!tsi->tsi_is_client ||
			
 
				-		    !sfw_test_active(tsi) || tsi->tsi_stopping) {
			
 
				-			spin_unlock(&tsi->tsi_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		tsi->tsi_stopping = 1;
			
 
				-
			
 
				-		if (!force) {
			
 
				-			spin_unlock(&tsi->tsi_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* abort launched rpcs in the test */
			
 
				-		list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) {
			
 
				-			spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-			srpc_abort_rpc(rpc, -EINTR);
			
 
				-
			
 
				-			spin_unlock(&rpc->crpc_lock);
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock(&tsi->tsi_lock);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_query_batch(struct sfw_batch *tsb, int testidx,
			
 
				-		struct srpc_batch_reply *reply)
			
 
				-{
			
 
				-	struct sfw_test_instance *tsi;
			
 
				-
			
 
				-	if (testidx < 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (!testidx) {
			
 
				-		reply->bar_active = atomic_read(&tsb->bat_nactive);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
			
 
				-		if (testidx-- > 1)
			
 
				-			continue;
			
 
				-
			
 
				-		reply->bar_active = atomic_read(&tsi->tsi_nactive);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	return -ENOENT;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-sfw_free_pages(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	srpc_free_bulk(rpc->srpc_bulk);
			
 
				-	rpc->srpc_bulk = NULL;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
			
 
				-		int sink)
			
 
				-{
			
 
				-	LASSERT(!rpc->srpc_bulk);
			
 
				-	LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
			
 
				-
			
 
				-	rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
			
 
				-	if (!rpc->srpc_bulk)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_add_test(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
			
 
				-	struct srpc_test_reqst *request;
			
 
				-	int rc;
			
 
				-	struct sfw_batch *bat;
			
 
				-
			
 
				-	request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
			
 
				-	reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id;
			
 
				-
			
 
				-	if (!request->tsr_loop ||
			
 
				-	    !request->tsr_concur ||
			
 
				-	    request->tsr_sid.ses_nid == LNET_NID_ANY ||
			
 
				-	    request->tsr_ndest > SFW_MAX_NDESTS ||
			
 
				-	    (request->tsr_is_client && !request->tsr_ndest) ||
			
 
				-	    request->tsr_concur > SFW_MAX_CONCUR ||
			
 
				-	    request->tsr_service > SRPC_SERVICE_MAX_ID ||
			
 
				-	    request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
			
 
				-		reply->tsr_status = EINVAL;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
			
 
				-	    !sfw_find_test_case(request->tsr_service)) {
			
 
				-		reply->tsr_status = ENOENT;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	bat = sfw_bid2batch(request->tsr_bid);
			
 
				-	if (!bat) {
			
 
				-		CERROR("dropping RPC %s from %s under memory pressure\n",
			
 
				-		       rpc->srpc_scd->scd_svc->sv_name,
			
 
				-		       libcfs_id2str(rpc->srpc_peer));
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	if (sfw_batch_active(bat)) {
			
 
				-		reply->tsr_status = EBUSY;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (request->tsr_is_client && !rpc->srpc_bulk) {
			
 
				-		/* rpc will be resumed later in sfw_bulk_ready */
			
 
				-		int npg = sfw_id_pages(request->tsr_ndest);
			
 
				-		int len;
			
 
				-
			
 
				-		if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
			
 
				-			len = npg * PAGE_SIZE;
			
 
				-
			
 
				-		} else {
			
 
				-			len = sizeof(struct lnet_process_id_packed) *
			
 
				-			      request->tsr_ndest;
			
 
				-		}
			
 
				-
			
 
				-		return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
			
 
				-	}
			
 
				-
			
 
				-	rc = sfw_add_test_instance(bat, rpc);
			
 
				-	CDEBUG(!rc ? D_NET : D_WARNING,
			
 
				-	       "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
			
 
				-	       !rc ? "Added" : "Failed to add", request->tsr_service,
			
 
				-	       request->tsr_is_client ? "client" : "server",
			
 
				-	       request->tsr_loop, request->tsr_concur, request->tsr_ndest);
			
 
				-
			
 
				-	reply->tsr_status = (rc < 0) ? -rc : rc;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_control_batch(struct srpc_batch_reqst *request,
			
 
				-		  struct srpc_batch_reply *reply)
			
 
				-{
			
 
				-	struct sfw_session *sn = sfw_data.fw_session;
			
 
				-	int rc = 0;
			
 
				-	struct sfw_batch *bat;
			
 
				-
			
 
				-	reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id;
			
 
				-
			
 
				-	if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
			
 
				-		reply->bar_status = ESRCH;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	bat = sfw_find_batch(request->bar_bid);
			
 
				-	if (!bat) {
			
 
				-		reply->bar_status = ENOENT;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	switch (request->bar_opc) {
			
 
				-	case SRPC_BATCH_OPC_RUN:
			
 
				-		rc = sfw_run_batch(bat);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_BATCH_OPC_STOP:
			
 
				-		rc = sfw_stop_batch(bat, request->bar_arg);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_BATCH_OPC_QUERY:
			
 
				-		rc = sfw_query_batch(bat, request->bar_testidx, reply);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		return -EINVAL; /* drop it */
			
 
				-	}
			
 
				-
			
 
				-	reply->bar_status = (rc < 0) ? -rc : rc;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
			
 
				-	struct srpc_msg *reply = &rpc->srpc_replymsg;
			
 
				-	struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg;
			
 
				-	unsigned int features = LST_FEATS_MASK;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	LASSERT(!sfw_data.fw_active_srpc);
			
 
				-	LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	if (sfw_data.fw_shuttingdown) {
			
 
				-		spin_unlock(&sfw_data.fw_lock);
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	/* Remove timer to avoid racing with it or expiring active session */
			
 
				-	if (sfw_del_session_timer()) {
			
 
				-		CERROR("dropping RPC %s from %s: racing with expiry timer\n",
			
 
				-		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
			
 
				-		spin_unlock(&sfw_data.fw_lock);
			
 
				-		return -EAGAIN;
			
 
				-	}
			
 
				-
			
 
				-	sfw_data.fw_active_srpc = rpc;
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	sfw_unpack_message(request);
			
 
				-	LASSERT(request->msg_type == srpc_service2request(sv->sv_id));
			
 
				-
			
 
				-	/* rpc module should have checked this */
			
 
				-	LASSERT(request->msg_version == SRPC_MSG_VERSION);
			
 
				-
			
 
				-	if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
			
 
				-	    sv->sv_id != SRPC_SERVICE_DEBUG) {
			
 
				-		struct sfw_session *sn = sfw_data.fw_session;
			
 
				-
			
 
				-		if (sn &&
			
 
				-		    sn->sn_features != request->msg_ses_feats) {
			
 
				-			CNETERR("Features of framework RPC don't match features of current session: %x/%x\n",
			
 
				-				request->msg_ses_feats, sn->sn_features);
			
 
				-			reply->msg_body.reply.status = EPROTO;
			
 
				-			reply->msg_body.reply.sid = sn->sn_id;
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-	} else if (request->msg_ses_feats & ~LST_FEATS_MASK) {
			
 
				-		/*
			
 
				-		 * NB: at this point, old version will ignore features and
			
 
				-		 * create new session anyway, so console should be able
			
 
				-		 * to handle this
			
 
				-		 */
			
 
				-		reply->msg_body.reply.status = EPROTO;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	switch (sv->sv_id) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case SRPC_SERVICE_TEST:
			
 
				-		rc = sfw_add_test(rpc);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_SERVICE_BATCH:
			
 
				-		rc = sfw_control_batch(&request->msg_body.bat_reqst,
			
 
				-				       &reply->msg_body.bat_reply);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_SERVICE_QUERY_STAT:
			
 
				-		rc = sfw_get_stats(&request->msg_body.stat_reqst,
			
 
				-				   &reply->msg_body.stat_reply);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_SERVICE_DEBUG:
			
 
				-		rc = sfw_debug_session(&request->msg_body.dbg_reqst,
			
 
				-				       &reply->msg_body.dbg_reply);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_SERVICE_MAKE_SESSION:
			
 
				-		rc = sfw_make_session(&request->msg_body.mksn_reqst,
			
 
				-				      &reply->msg_body.mksn_reply);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_SERVICE_REMOVE_SESSION:
			
 
				-		rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
			
 
				-					&reply->msg_body.rmsn_reply);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	if (sfw_data.fw_session)
			
 
				-		features = sfw_data.fw_session->sn_features;
			
 
				- out:
			
 
				-	reply->msg_ses_feats = features;
			
 
				-	rpc->srpc_done = sfw_server_rpc_done;
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	if (!sfw_data.fw_shuttingdown)
			
 
				-		sfw_add_session_timer();
			
 
				-
			
 
				-	sfw_data.fw_active_srpc = NULL;
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
			
 
				-{
			
 
				-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(rpc->srpc_bulk);
			
 
				-	LASSERT(sv->sv_id == SRPC_SERVICE_TEST);
			
 
				-	LASSERT(!sfw_data.fw_active_srpc);
			
 
				-	LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	if (status) {
			
 
				-		CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n",
			
 
				-		       sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
			
 
				-		spin_unlock(&sfw_data.fw_lock);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	if (sfw_data.fw_shuttingdown) {
			
 
				-		spin_unlock(&sfw_data.fw_lock);
			
 
				-		return -ESHUTDOWN;
			
 
				-	}
			
 
				-
			
 
				-	if (sfw_del_session_timer()) {
			
 
				-		CERROR("dropping RPC %s from %s: racing with expiry timer\n",
			
 
				-		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
			
 
				-		spin_unlock(&sfw_data.fw_lock);
			
 
				-		return -EAGAIN;
			
 
				-	}
			
 
				-
			
 
				-	sfw_data.fw_active_srpc = rpc;
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	rc = sfw_add_test(rpc);
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	if (!sfw_data.fw_shuttingdown)
			
 
				-		sfw_add_session_timer();
			
 
				-
			
 
				-	sfw_data.fw_active_srpc = NULL;
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-struct srpc_client_rpc *
			
 
				-sfw_create_rpc(struct lnet_process_id peer, int service,
			
 
				-	       unsigned int features, int nbulkiov, int bulklen,
			
 
				-	       void (*done)(struct srpc_client_rpc *), void *priv)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc = NULL;
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	LASSERT(!sfw_data.fw_shuttingdown);
			
 
				-	LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				-
			
 
				-	if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) {
			
 
				-		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
			
 
				-				 struct srpc_client_rpc, crpc_list);
			
 
				-		list_del(&rpc->crpc_list);
			
 
				-
			
 
				-		srpc_init_client_rpc(rpc, peer, service, 0, 0,
			
 
				-				     done, sfw_client_rpc_fini, priv);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	if (!rpc) {
			
 
				-		rpc = srpc_create_client_rpc(peer, service,
			
 
				-					     nbulkiov, bulklen, done,
			
 
				-					     nbulkiov ?  NULL :
			
 
				-					     sfw_client_rpc_fini,
			
 
				-					     priv);
			
 
				-	}
			
 
				-
			
 
				-	if (rpc) /* "session" is concept in framework */
			
 
				-		rpc->crpc_reqstmsg.msg_ses_feats = features;
			
 
				-
			
 
				-	return rpc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-sfw_unpack_message(struct srpc_msg *msg)
			
 
				-{
			
 
				-	if (msg->msg_magic == SRPC_MSG_MAGIC)
			
 
				-		return; /* no flipping needed */
			
 
				-
			
 
				-	/* srpc module should guarantee I wouldn't get crap */
			
 
				-	LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_STAT_REQST) {
			
 
				-		struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst;
			
 
				-
			
 
				-		__swab32s(&req->str_type);
			
 
				-		__swab64s(&req->str_rpyid);
			
 
				-		sfw_unpack_sid(req->str_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
			
 
				-		struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
			
 
				-
			
 
				-		__swab32s(&rep->str_status);
			
 
				-		sfw_unpack_sid(rep->str_sid);
			
 
				-		sfw_unpack_fw_counters(rep->str_fw);
			
 
				-		sfw_unpack_rpc_counters(rep->str_rpc);
			
 
				-		sfw_unpack_lnet_counters(rep->str_lnet);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
			
 
				-		struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst;
			
 
				-
			
 
				-		__swab64s(&req->mksn_rpyid);
			
 
				-		__swab32s(&req->mksn_force);
			
 
				-		sfw_unpack_sid(req->mksn_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
			
 
				-		struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply;
			
 
				-
			
 
				-		__swab32s(&rep->mksn_status);
			
 
				-		__swab32s(&rep->mksn_timeout);
			
 
				-		sfw_unpack_sid(rep->mksn_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
			
 
				-		struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst;
			
 
				-
			
 
				-		__swab64s(&req->rmsn_rpyid);
			
 
				-		sfw_unpack_sid(req->rmsn_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
			
 
				-		struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply;
			
 
				-
			
 
				-		__swab32s(&rep->rmsn_status);
			
 
				-		sfw_unpack_sid(rep->rmsn_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
			
 
				-		struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst;
			
 
				-
			
 
				-		__swab64s(&req->dbg_rpyid);
			
 
				-		__swab32s(&req->dbg_flags);
			
 
				-		sfw_unpack_sid(req->dbg_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
			
 
				-		struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply;
			
 
				-
			
 
				-		__swab32s(&rep->dbg_nbatch);
			
 
				-		__swab32s(&rep->dbg_timeout);
			
 
				-		sfw_unpack_sid(rep->dbg_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
			
 
				-		struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst;
			
 
				-
			
 
				-		__swab32s(&req->bar_opc);
			
 
				-		__swab64s(&req->bar_rpyid);
			
 
				-		__swab32s(&req->bar_testidx);
			
 
				-		__swab32s(&req->bar_arg);
			
 
				-		sfw_unpack_sid(req->bar_sid);
			
 
				-		__swab64s(&req->bar_bid.bat_id);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
			
 
				-		struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
			
 
				-
			
 
				-		__swab32s(&rep->bar_status);
			
 
				-		sfw_unpack_sid(rep->bar_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_TEST_REQST) {
			
 
				-		struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
			
 
				-
			
 
				-		__swab64s(&req->tsr_rpyid);
			
 
				-		__swab64s(&req->tsr_bulkid);
			
 
				-		__swab32s(&req->tsr_loop);
			
 
				-		__swab32s(&req->tsr_ndest);
			
 
				-		__swab32s(&req->tsr_concur);
			
 
				-		__swab32s(&req->tsr_service);
			
 
				-		sfw_unpack_sid(req->tsr_sid);
			
 
				-		__swab64s(&req->tsr_bid.bat_id);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
			
 
				-		struct srpc_test_reply *rep = &msg->msg_body.tes_reply;
			
 
				-
			
 
				-		__swab32s(&rep->tsr_status);
			
 
				-		sfw_unpack_sid(rep->tsr_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
			
 
				-		struct srpc_join_reqst *req = &msg->msg_body.join_reqst;
			
 
				-
			
 
				-		__swab64s(&req->join_rpyid);
			
 
				-		sfw_unpack_sid(req->join_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
			
 
				-		struct srpc_join_reply *rep = &msg->msg_body.join_reply;
			
 
				-
			
 
				-		__swab32s(&rep->join_status);
			
 
				-		__swab32s(&rep->join_timeout);
			
 
				-		sfw_unpack_sid(rep->join_sid);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	LBUG();
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-sfw_abort_rpc(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
			
 
				-	LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				-
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-	srpc_abort_rpc(rpc, -EINTR);
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-sfw_post_rpc(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-	LASSERT(!rpc->crpc_closed);
			
 
				-	LASSERT(!rpc->crpc_aborted);
			
 
				-	LASSERT(list_empty(&rpc->crpc_list));
			
 
				-	LASSERT(!sfw_data.fw_shuttingdown);
			
 
				-
			
 
				-	rpc->crpc_timeout = rpc_timeout;
			
 
				-	srpc_post_rpc(rpc);
			
 
				-
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-}
			
 
				-
			
 
				-static struct srpc_service sfw_services[] = {
			
 
				-	{
			
 
				-		/* sv_id */    SRPC_SERVICE_DEBUG,
			
 
				-		/* sv_name */  "debug",
			
 
				-		0
			
 
				-	},
			
 
				-	{
			
 
				-		/* sv_id */    SRPC_SERVICE_QUERY_STAT,
			
 
				-		/* sv_name */  "query stats",
			
 
				-		0
			
 
				-	},
			
 
				-	{
			
 
				-		/* sv_id */    SRPC_SERVICE_MAKE_SESSION,
			
 
				-		/* sv_name */  "make session",
			
 
				-		0
			
 
				-	},
			
 
				-	{
			
 
				-		/* sv_id */    SRPC_SERVICE_REMOVE_SESSION,
			
 
				-		/* sv_name */  "remove session",
			
 
				-		0
			
 
				-	},
			
 
				-	{
			
 
				-		/* sv_id */    SRPC_SERVICE_BATCH,
			
 
				-		/* sv_name */  "batch service",
			
 
				-		0
			
 
				-	},
			
 
				-	{
			
 
				-		/* sv_id */    SRPC_SERVICE_TEST,
			
 
				-		/* sv_name */  "test service",
			
 
				-		0
			
 
				-	},
			
 
				-	{
			
 
				-		/* sv_id */    0,
			
 
				-		/* sv_name */  NULL,
			
 
				-		0
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-int
			
 
				-sfw_startup(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-	int error;
			
 
				-	struct srpc_service *sv;
			
 
				-	struct sfw_test_case *tsc;
			
 
				-
			
 
				-	if (session_timeout < 0) {
			
 
				-		CERROR("Session timeout must be non-negative: %d\n",
			
 
				-		       session_timeout);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (rpc_timeout < 0) {
			
 
				-		CERROR("RPC timeout must be non-negative: %d\n",
			
 
				-		       rpc_timeout);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (!session_timeout)
			
 
				-		CWARN("Zero session_timeout specified - test sessions never expire.\n");
			
 
				-
			
 
				-	if (!rpc_timeout)
			
 
				-		CWARN("Zero rpc_timeout specified - test RPC never expire.\n");
			
 
				-
			
 
				-	memset(&sfw_data, 0, sizeof(struct smoketest_framework));
			
 
				-
			
 
				-	sfw_data.fw_session = NULL;
			
 
				-	sfw_data.fw_active_srpc = NULL;
			
 
				-	spin_lock_init(&sfw_data.fw_lock);
			
 
				-	atomic_set(&sfw_data.fw_nzombies, 0);
			
 
				-	INIT_LIST_HEAD(&sfw_data.fw_tests);
			
 
				-	INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
			
 
				-	INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
			
 
				-
			
 
				-	brw_init_test_client();
			
 
				-	brw_init_test_service();
			
 
				-	rc = sfw_register_test(&brw_test_service, &brw_test_client);
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	ping_init_test_client();
			
 
				-	ping_init_test_service();
			
 
				-	rc = sfw_register_test(&ping_test_service, &ping_test_client);
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	error = 0;
			
 
				-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				-		sv = tsc->tsc_srv_service;
			
 
				-
			
 
				-		rc = srpc_add_service(sv);
			
 
				-		LASSERT(rc != -EBUSY);
			
 
				-		if (rc) {
			
 
				-			CWARN("Failed to add %s service: %d\n",
			
 
				-			      sv->sv_name, rc);
			
 
				-			error = rc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; ; i++) {
			
 
				-		sv = &sfw_services[i];
			
 
				-		if (!sv->sv_name)
			
 
				-			break;
			
 
				-
			
 
				-		sv->sv_bulk_ready = NULL;
			
 
				-		sv->sv_handler = sfw_handle_server_rpc;
			
 
				-		sv->sv_wi_total = SFW_FRWK_WI_MAX;
			
 
				-		if (sv->sv_id == SRPC_SERVICE_TEST)
			
 
				-			sv->sv_bulk_ready = sfw_bulk_ready;
			
 
				-
			
 
				-		rc = srpc_add_service(sv);
			
 
				-		LASSERT(rc != -EBUSY);
			
 
				-		if (rc) {
			
 
				-			CWARN("Failed to add %s service: %d\n",
			
 
				-			      sv->sv_name, rc);
			
 
				-			error = rc;
			
 
				-		}
			
 
				-
			
 
				-		/* about to sfw_shutdown, no need to add buffer */
			
 
				-		if (error)
			
 
				-			continue;
			
 
				-
			
 
				-		rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
			
 
				-		if (rc) {
			
 
				-			CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
			
 
				-			      sv->sv_name, sv->sv_wi_total, rc);
			
 
				-			error = -ENOMEM;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (error)
			
 
				-		sfw_shutdown();
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-sfw_shutdown(void)
			
 
				-{
			
 
				-	struct srpc_service *sv;
			
 
				-	struct sfw_test_case	*tsc;
			
 
				-	int i;
			
 
				-
			
 
				-	spin_lock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	sfw_data.fw_shuttingdown = 1;
			
 
				-	lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock,
			
 
				-		       "waiting for active RPC to finish.\n");
			
 
				-
			
 
				-	if (sfw_del_session_timer())
			
 
				-		lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock,
			
 
				-			       "waiting for session timer to explode.\n");
			
 
				-
			
 
				-	sfw_deactivate_session();
			
 
				-	lst_wait_until(!atomic_read(&sfw_data.fw_nzombies),
			
 
				-		       sfw_data.fw_lock,
			
 
				-		       "waiting for %d zombie sessions to die.\n",
			
 
				-		       atomic_read(&sfw_data.fw_nzombies));
			
 
				-
			
 
				-	spin_unlock(&sfw_data.fw_lock);
			
 
				-
			
 
				-	for (i = 0; ; i++) {
			
 
				-		sv = &sfw_services[i];
			
 
				-		if (!sv->sv_name)
			
 
				-			break;
			
 
				-
			
 
				-		srpc_shutdown_service(sv);
			
 
				-		srpc_remove_service(sv);
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				-		sv = tsc->tsc_srv_service;
			
 
				-		srpc_shutdown_service(sv);
			
 
				-		srpc_remove_service(sv);
			
 
				-	}
			
 
				-
			
 
				-	while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
			
 
				-		struct srpc_client_rpc *rpc;
			
 
				-
			
 
				-		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
			
 
				-				 struct srpc_client_rpc, crpc_list);
			
 
				-		list_del(&rpc->crpc_list);
			
 
				-
			
 
				-		kfree(rpc);
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; ; i++) {
			
 
				-		sv = &sfw_services[i];
			
 
				-		if (!sv->sv_name)
			
 
				-			break;
			
 
				-
			
 
				-		srpc_wait_service_shutdown(sv);
			
 
				-	}
			
 
				-
			
 
				-	while (!list_empty(&sfw_data.fw_tests)) {
			
 
				-		tsc = list_entry(sfw_data.fw_tests.next,
			
 
				-				 struct sfw_test_case, tsc_list);
			
 
				-
			
 
				-		srpc_wait_service_shutdown(tsc->tsc_srv_service);
			
 
				-
			
 
				-		list_del(&tsc->tsc_list);
			
 
				-		kfree(tsc);
			
 
				-	}
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/module.c
+++ b/drivers/staging/lustre/lnet/selftest/module.c
@@ -1,169 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include "selftest.h"
			
 
				-#include "console.h"
			
 
				-
			
 
				-enum {
			
 
				-	LST_INIT_NONE		= 0,
			
 
				-	LST_INIT_WI_SERIAL,
			
 
				-	LST_INIT_WI_TEST,
			
 
				-	LST_INIT_RPC,
			
 
				-	LST_INIT_FW,
			
 
				-	LST_INIT_CONSOLE
			
 
				-};
			
 
				-
			
 
				-static int lst_init_step = LST_INIT_NONE;
			
 
				-
			
 
				-struct workqueue_struct *lst_serial_wq;
			
 
				-struct workqueue_struct **lst_test_wq;
			
 
				-
			
 
				-static void
			
 
				-lnet_selftest_exit(void)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	switch (lst_init_step) {
			
 
				-	case LST_INIT_CONSOLE:
			
 
				-		lstcon_console_fini();
			
 
				-		/* fall through */
			
 
				-	case LST_INIT_FW:
			
 
				-		sfw_shutdown();
			
 
				-		/* fall through */
			
 
				-	case LST_INIT_RPC:
			
 
				-		srpc_shutdown();
			
 
				-		/* fall through */
			
 
				-	case LST_INIT_WI_TEST:
			
 
				-		for (i = 0;
			
 
				-		     i < cfs_cpt_number(lnet_cpt_table()); i++) {
			
 
				-			if (!lst_test_wq[i])
			
 
				-				continue;
			
 
				-			destroy_workqueue(lst_test_wq[i]);
			
 
				-		}
			
 
				-		kvfree(lst_test_wq);
			
 
				-		lst_test_wq = NULL;
			
 
				-		/* fall through */
			
 
				-	case LST_INIT_WI_SERIAL:
			
 
				-		destroy_workqueue(lst_serial_wq);
			
 
				-		lst_serial_wq = NULL;
			
 
				-	case LST_INIT_NONE:
			
 
				-		break;
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-lnet_selftest_init(void)
			
 
				-{
			
 
				-	int nscheds;
			
 
				-	int rc;
			
 
				-	int i;
			
 
				-
			
 
				-	rc = libcfs_setup();
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	lst_serial_wq = alloc_ordered_workqueue("lst_s", 0);
			
 
				-	if (!lst_serial_wq) {
			
 
				-		CERROR("Failed to create serial WI scheduler for LST\n");
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-	lst_init_step = LST_INIT_WI_SERIAL;
			
 
				-
			
 
				-	nscheds = cfs_cpt_number(lnet_cpt_table());
			
 
				-	lst_test_wq = kvmalloc_array(nscheds, sizeof(lst_test_wq[0]),
			
 
				-					GFP_KERNEL | __GFP_ZERO);
			
 
				-	if (!lst_test_wq) {
			
 
				-		rc = -ENOMEM;
			
 
				-		goto error;
			
 
				-	}
			
 
				-
			
 
				-	lst_init_step = LST_INIT_WI_TEST;
			
 
				-	for (i = 0; i < nscheds; i++) {
			
 
				-		int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
			
 
				-		struct workqueue_attrs attrs = {0};
			
 
				-		cpumask_var_t *mask = cfs_cpt_cpumask(lnet_cpt_table(), i);
			
 
				-
			
 
				-		/* reserve at least one CPU for LND */
			
 
				-		nthrs = max(nthrs - 1, 1);
			
 
				-		lst_test_wq[i] = alloc_workqueue("lst_t", WQ_UNBOUND, nthrs);
			
 
				-		if (!lst_test_wq[i]) {
			
 
				-			CWARN("Failed to create CPU partition affinity WI scheduler %d for LST\n",
			
 
				-			      i);
			
 
				-			rc = -ENOMEM;
			
 
				-			goto error;
			
 
				-		}
			
 
				-
			
 
				-		if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) {
			
 
				-			cpumask_copy(attrs.cpumask, *mask);
			
 
				-			apply_workqueue_attrs(lst_test_wq[i], &attrs);
			
 
				-			free_cpumask_var(attrs.cpumask);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	rc = srpc_startup();
			
 
				-	if (rc) {
			
 
				-		CERROR("LST can't startup rpc\n");
			
 
				-		goto error;
			
 
				-	}
			
 
				-	lst_init_step = LST_INIT_RPC;
			
 
				-
			
 
				-	rc = sfw_startup();
			
 
				-	if (rc) {
			
 
				-		CERROR("LST can't startup framework\n");
			
 
				-		goto error;
			
 
				-	}
			
 
				-	lst_init_step = LST_INIT_FW;
			
 
				-
			
 
				-	rc = lstcon_console_init();
			
 
				-	if (rc) {
			
 
				-		CERROR("LST can't startup console\n");
			
 
				-		goto error;
			
 
				-	}
			
 
				-	lst_init_step = LST_INIT_CONSOLE;
			
 
				-	return 0;
			
 
				-error:
			
 
				-	lnet_selftest_exit();
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
			
 
				-MODULE_DESCRIPTION("LNet Selftest");
			
 
				-MODULE_VERSION("2.7.0");
			
 
				-MODULE_LICENSE("GPL");
			
 
				-
			
 
				-module_init(lnet_selftest_init);
			
 
				-module_exit(lnet_selftest_exit);
			
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ b/drivers/staging/lustre/lnet/selftest/ping_test.c
@@ -1,228 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/conctl.c
			
 
				- *
			
 
				- * Test client & Server
			
 
				- *
			
 
				- * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#include "selftest.h"
			
 
				-
			
 
				-#define LST_PING_TEST_MAGIC	0xbabeface
			
 
				-
			
 
				-static int ping_srv_workitems = SFW_TEST_WI_MAX;
			
 
				-module_param(ping_srv_workitems, int, 0644);
			
 
				-MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems");
			
 
				-
			
 
				-struct lst_ping_data {
			
 
				-	spinlock_t	pnd_lock;	/* serialize */
			
 
				-	int		pnd_counter;	/* sequence counter */
			
 
				-};
			
 
				-
			
 
				-static struct lst_ping_data  lst_ping_data;
			
 
				-
			
 
				-static int
			
 
				-ping_client_init(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-
			
 
				-	LASSERT(tsi->tsi_is_client);
			
 
				-	LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK));
			
 
				-
			
 
				-	spin_lock_init(&lst_ping_data.pnd_lock);
			
 
				-	lst_ping_data.pnd_counter = 0;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ping_client_fini(struct sfw_test_instance *tsi)
			
 
				-{
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-	int errors;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-	LASSERT(tsi->tsi_is_client);
			
 
				-
			
 
				-	errors = atomic_read(&sn->sn_ping_errors);
			
 
				-	if (errors)
			
 
				-		CWARN("%d pings have failed.\n", errors);
			
 
				-	else
			
 
				-		CDEBUG(D_NET, "Ping test finished OK.\n");
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ping_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
			
 
				-		     struct srpc_client_rpc **rpc)
			
 
				-{
			
 
				-	struct srpc_ping_reqst *req;
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-	struct timespec64 ts;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-	LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
			
 
				-
			
 
				-	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-
			
 
				-	req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
			
 
				-
			
 
				-	req->pnr_magic = LST_PING_TEST_MAGIC;
			
 
				-
			
 
				-	spin_lock(&lst_ping_data.pnd_lock);
			
 
				-	req->pnr_seq = lst_ping_data.pnd_counter++;
			
 
				-	spin_unlock(&lst_ping_data.pnd_lock);
			
 
				-
			
 
				-	ktime_get_real_ts64(&ts);
			
 
				-	req->pnr_time_sec = ts.tv_sec;
			
 
				-	req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct sfw_test_instance *tsi = tsu->tsu_instance;
			
 
				-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
			
 
				-	struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
			
 
				-	struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
			
 
				-	struct timespec64 ts;
			
 
				-
			
 
				-	LASSERT(sn);
			
 
				-
			
 
				-	if (rpc->crpc_status) {
			
 
				-		if (!tsi->tsi_stopping)	/* rpc could have been aborted */
			
 
				-			atomic_inc(&sn->sn_ping_errors);
			
 
				-		CERROR("Unable to ping %s (%d): %d\n",
			
 
				-		       libcfs_id2str(rpc->crpc_dest),
			
 
				-		       reqst->pnr_seq, rpc->crpc_status);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
			
 
				-		__swab32s(&reply->pnr_seq);
			
 
				-		__swab32s(&reply->pnr_magic);
			
 
				-		__swab32s(&reply->pnr_status);
			
 
				-	}
			
 
				-
			
 
				-	if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
			
 
				-		rpc->crpc_status = -EBADMSG;
			
 
				-		atomic_inc(&sn->sn_ping_errors);
			
 
				-		CERROR("Bad magic %u from %s, %u expected.\n",
			
 
				-		       reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
			
 
				-		       LST_PING_TEST_MAGIC);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (reply->pnr_seq != reqst->pnr_seq) {
			
 
				-		rpc->crpc_status = -EBADMSG;
			
 
				-		atomic_inc(&sn->sn_ping_errors);
			
 
				-		CERROR("Bad seq %u from %s, %u expected.\n",
			
 
				-		       reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
			
 
				-		       reqst->pnr_seq);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	ktime_get_real_ts64(&ts);
			
 
				-	CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq,
			
 
				-	       (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 +
			
 
				-			      (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec)));
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-ping_server_handle(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
			
 
				-	struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				-	struct srpc_msg *replymsg = &rpc->srpc_replymsg;
			
 
				-	struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst;
			
 
				-	struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
			
 
				-
			
 
				-	LASSERT(sv->sv_id == SRPC_SERVICE_PING);
			
 
				-
			
 
				-	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
			
 
				-		LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				-
			
 
				-		__swab32s(&req->pnr_seq);
			
 
				-		__swab32s(&req->pnr_magic);
			
 
				-		__swab64s(&req->pnr_time_sec);
			
 
				-		__swab64s(&req->pnr_time_usec);
			
 
				-	}
			
 
				-	LASSERT(reqstmsg->msg_type == srpc_service2request(sv->sv_id));
			
 
				-
			
 
				-	if (req->pnr_magic != LST_PING_TEST_MAGIC) {
			
 
				-		CERROR("Unexpected magic %08x from %s\n",
			
 
				-		       req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	rep->pnr_seq = req->pnr_seq;
			
 
				-	rep->pnr_magic = LST_PING_TEST_MAGIC;
			
 
				-
			
 
				-	if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
			
 
				-		replymsg->msg_ses_feats = LST_FEATS_MASK;
			
 
				-		rep->pnr_status = EPROTO;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Get ping %d from %s\n",
			
 
				-	       req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-struct sfw_test_client_ops ping_test_client;
			
 
				-
			
 
				-void ping_init_test_client(void)
			
 
				-{
			
 
				-	ping_test_client.tso_init = ping_client_init;
			
 
				-	ping_test_client.tso_fini = ping_client_fini;
			
 
				-	ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
			
 
				-	ping_test_client.tso_done_rpc = ping_client_done_rpc;
			
 
				-}
			
 
				-
			
 
				-struct srpc_service ping_test_service;
			
 
				-
			
 
				-void ping_init_test_service(void)
			
 
				-{
			
 
				-	ping_test_service.sv_id = SRPC_SERVICE_PING;
			
 
				-	ping_test_service.sv_name = "ping_test";
			
 
				-	ping_test_service.sv_handler = ping_server_handle;
			
 
				-	ping_test_service.sv_wi_total = ping_srv_workitems;
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -1,1682 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, 2015, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/rpc.c
			
 
				- *
			
 
				- * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				- *
			
 
				- * 2012-05-13: Liang Zhen <liang@whamcloud.com>
			
 
				- * - percpt data for service to improve smp performance
			
 
				- * - code cleanup
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include "selftest.h"
			
 
				-
			
 
				-enum srpc_state {
			
 
				-	SRPC_STATE_NONE,
			
 
				-	SRPC_STATE_NI_INIT,
			
 
				-	SRPC_STATE_EQ_INIT,
			
 
				-	SRPC_STATE_RUNNING,
			
 
				-	SRPC_STATE_STOPPING,
			
 
				-};
			
 
				-
			
 
				-static struct smoketest_rpc {
			
 
				-	spinlock_t	 rpc_glock;	/* global lock */
			
 
				-	struct srpc_service	*rpc_services[SRPC_SERVICE_MAX_ID + 1];
			
 
				-	struct lnet_handle_eq	 rpc_lnet_eq;	/* _the_ LNet event queue */
			
 
				-	enum srpc_state	 rpc_state;
			
 
				-	struct srpc_counters	 rpc_counters;
			
 
				-	__u64		 rpc_matchbits;	/* matchbits counter */
			
 
				-} srpc_data;
			
 
				-
			
 
				-static inline int
			
 
				-srpc_serv_portal(int svc_id)
			
 
				-{
			
 
				-	return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
			
 
				-	       SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
			
 
				-}
			
 
				-
			
 
				-/* forward ref's */
			
 
				-void srpc_handle_rpc(struct swi_workitem *wi);
			
 
				-
			
 
				-void srpc_get_counters(struct srpc_counters *cnt)
			
 
				-{
			
 
				-	spin_lock(&srpc_data.rpc_glock);
			
 
				-	*cnt = srpc_data.rpc_counters;
			
 
				-	spin_unlock(&srpc_data.rpc_glock);
			
 
				-}
			
 
				-
			
 
				-void srpc_set_counters(const struct srpc_counters *cnt)
			
 
				-{
			
 
				-	spin_lock(&srpc_data.rpc_glock);
			
 
				-	srpc_data.rpc_counters = *cnt;
			
 
				-	spin_unlock(&srpc_data.rpc_glock);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
			
 
				-		   int nob)
			
 
				-{
			
 
				-	LASSERT(off < PAGE_SIZE);
			
 
				-	LASSERT(nob > 0 && nob <= PAGE_SIZE);
			
 
				-
			
 
				-	bk->bk_iovs[i].bv_offset = off;
			
 
				-	bk->bk_iovs[i].bv_page = pg;
			
 
				-	bk->bk_iovs[i].bv_len = nob;
			
 
				-	return nob;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-srpc_free_bulk(struct srpc_bulk *bk)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct page *pg;
			
 
				-
			
 
				-	LASSERT(bk);
			
 
				-
			
 
				-	for (i = 0; i < bk->bk_niov; i++) {
			
 
				-		pg = bk->bk_iovs[i].bv_page;
			
 
				-		if (!pg)
			
 
				-			break;
			
 
				-
			
 
				-		__free_page(pg);
			
 
				-	}
			
 
				-
			
 
				-	kfree(bk);
			
 
				-}
			
 
				-
			
 
				-struct srpc_bulk *
			
 
				-srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg,
			
 
				-		unsigned int bulk_len, int sink)
			
 
				-{
			
 
				-	struct srpc_bulk *bk;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
			
 
				-
			
 
				-	bk = kzalloc_cpt(offsetof(struct srpc_bulk, bk_iovs[bulk_npg]),
			
 
				-			 GFP_KERNEL, cpt);
			
 
				-	if (!bk) {
			
 
				-		CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-
			
 
				-	memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
			
 
				-	bk->bk_sink = sink;
			
 
				-	bk->bk_len = bulk_len;
			
 
				-	bk->bk_niov = bulk_npg;
			
 
				-
			
 
				-	for (i = 0; i < bulk_npg; i++) {
			
 
				-		struct page *pg;
			
 
				-		int nob;
			
 
				-
			
 
				-		pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
			
 
				-				      GFP_KERNEL, 0);
			
 
				-		if (!pg) {
			
 
				-			CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
			
 
				-			srpc_free_bulk(bk);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-
			
 
				-		nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) -
			
 
				-		      bulk_off;
			
 
				-		srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
			
 
				-		bulk_len -= nob;
			
 
				-		bulk_off = 0;
			
 
				-	}
			
 
				-
			
 
				-	return bk;
			
 
				-}
			
 
				-
			
 
				-static inline __u64
			
 
				-srpc_next_id(void)
			
 
				-{
			
 
				-	__u64 id;
			
 
				-
			
 
				-	spin_lock(&srpc_data.rpc_glock);
			
 
				-	id = srpc_data.rpc_matchbits++;
			
 
				-	spin_unlock(&srpc_data.rpc_glock);
			
 
				-	return id;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-srpc_init_server_rpc(struct srpc_server_rpc *rpc,
			
 
				-		     struct srpc_service_cd *scd,
			
 
				-		     struct srpc_buffer *buffer)
			
 
				-{
			
 
				-	memset(rpc, 0, sizeof(*rpc));
			
 
				-	swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
			
 
				-			  srpc_serv_is_framework(scd->scd_svc) ?
			
 
				-			  lst_serial_wq : lst_test_wq[scd->scd_cpt]);
			
 
				-
			
 
				-	rpc->srpc_ev.ev_fired = 1; /* no event expected now */
			
 
				-
			
 
				-	rpc->srpc_scd = scd;
			
 
				-	rpc->srpc_reqstbuf = buffer;
			
 
				-	rpc->srpc_peer = buffer->buf_peer;
			
 
				-	rpc->srpc_self = buffer->buf_self;
			
 
				-	LNetInvalidateMDHandle(&rpc->srpc_replymdh);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-srpc_service_fini(struct srpc_service *svc)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	struct srpc_server_rpc *rpc;
			
 
				-	struct srpc_buffer *buf;
			
 
				-	struct list_head *q;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!svc->sv_cpt_data)
			
 
				-		return;
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
			
 
				-		while (1) {
			
 
				-			if (!list_empty(&scd->scd_buf_posted))
			
 
				-				q = &scd->scd_buf_posted;
			
 
				-			else if (!list_empty(&scd->scd_buf_blocked))
			
 
				-				q = &scd->scd_buf_blocked;
			
 
				-			else
			
 
				-				break;
			
 
				-
			
 
				-			while (!list_empty(q)) {
			
 
				-				buf = list_entry(q->next, struct srpc_buffer,
			
 
				-						 buf_list);
			
 
				-				list_del(&buf->buf_list);
			
 
				-				kfree(buf);
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		LASSERT(list_empty(&scd->scd_rpc_active));
			
 
				-
			
 
				-		while (!list_empty(&scd->scd_rpc_free)) {
			
 
				-			rpc = list_entry(scd->scd_rpc_free.next,
			
 
				-					 struct srpc_server_rpc,
			
 
				-					 srpc_list);
			
 
				-			list_del(&rpc->srpc_list);
			
 
				-			kfree(rpc);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_free(svc->sv_cpt_data);
			
 
				-	svc->sv_cpt_data = NULL;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_service_nrpcs(struct srpc_service *svc)
			
 
				-{
			
 
				-	int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
			
 
				-
			
 
				-	return srpc_serv_is_framework(svc) ?
			
 
				-	       max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
			
 
				-}
			
 
				-
			
 
				-void srpc_add_buffer(struct swi_workitem *wi);
			
 
				-
			
 
				-static int
			
 
				-srpc_service_init(struct srpc_service *svc)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	struct srpc_server_rpc *rpc;
			
 
				-	int nrpcs;
			
 
				-	int i;
			
 
				-	int j;
			
 
				-
			
 
				-	svc->sv_shuttingdown = 0;
			
 
				-
			
 
				-	svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				-					    sizeof(**svc->sv_cpt_data));
			
 
				-	if (!svc->sv_cpt_data)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	svc->sv_ncpts = srpc_serv_is_framework(svc) ?
			
 
				-			1 : cfs_cpt_number(lnet_cpt_table());
			
 
				-	nrpcs = srpc_service_nrpcs(svc);
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
			
 
				-		scd->scd_cpt = i;
			
 
				-		scd->scd_svc = svc;
			
 
				-		spin_lock_init(&scd->scd_lock);
			
 
				-		INIT_LIST_HEAD(&scd->scd_rpc_free);
			
 
				-		INIT_LIST_HEAD(&scd->scd_rpc_active);
			
 
				-		INIT_LIST_HEAD(&scd->scd_buf_posted);
			
 
				-		INIT_LIST_HEAD(&scd->scd_buf_blocked);
			
 
				-
			
 
				-		scd->scd_ev.ev_data = scd;
			
 
				-		scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
			
 
				-
			
 
				-		/*
			
 
				-		 * NB: don't use lst_serial_wq for adding buffer,
			
 
				-		 * see details in srpc_service_add_buffers()
			
 
				-		 */
			
 
				-		swi_init_workitem(&scd->scd_buf_wi,
			
 
				-				  srpc_add_buffer, lst_test_wq[i]);
			
 
				-
			
 
				-		if (i && srpc_serv_is_framework(svc)) {
			
 
				-			/*
			
 
				-			 * NB: framework service only needs srpc_service_cd for
			
 
				-			 * one partition, but we allocate for all to make
			
 
				-			 * it easier to implement, it will waste a little
			
 
				-			 * memory but nobody should care about this
			
 
				-			 */
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		for (j = 0; j < nrpcs; j++) {
			
 
				-			rpc = kzalloc_cpt(sizeof(*rpc), GFP_NOFS, i);
			
 
				-			if (!rpc) {
			
 
				-				srpc_service_fini(svc);
			
 
				-				return -ENOMEM;
			
 
				-			}
			
 
				-			list_add(&rpc->srpc_list, &scd->scd_rpc_free);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-srpc_add_service(struct srpc_service *sv)
			
 
				-{
			
 
				-	int id = sv->sv_id;
			
 
				-
			
 
				-	LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
			
 
				-
			
 
				-	if (srpc_service_init(sv))
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	spin_lock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-	LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
			
 
				-
			
 
				-	if (srpc_data.rpc_services[id]) {
			
 
				-		spin_unlock(&srpc_data.rpc_glock);
			
 
				-		goto failed;
			
 
				-	}
			
 
				-
			
 
				-	srpc_data.rpc_services[id] = sv;
			
 
				-	spin_unlock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
			
 
				-	return 0;
			
 
				-
			
 
				- failed:
			
 
				-	srpc_service_fini(sv);
			
 
				-	return -EBUSY;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-srpc_remove_service(struct srpc_service *sv)
			
 
				-{
			
 
				-	int id = sv->sv_id;
			
 
				-
			
 
				-	spin_lock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-	if (srpc_data.rpc_services[id] != sv) {
			
 
				-		spin_unlock(&srpc_data.rpc_glock);
			
 
				-		return -ENOENT;
			
 
				-	}
			
 
				-
			
 
				-	srpc_data.rpc_services[id] = NULL;
			
 
				-	spin_unlock(&srpc_data.rpc_glock);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
			
 
				-		       int len, int options, struct lnet_process_id peer,
			
 
				-		       struct lnet_handle_md *mdh, struct srpc_event *ev)
			
 
				-{
			
 
				-	int rc;
			
 
				-	struct lnet_md md;
			
 
				-	struct lnet_handle_me meh;
			
 
				-
			
 
				-	rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
			
 
				-			  local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh);
			
 
				-	if (rc) {
			
 
				-		CERROR("LNetMEAttach failed: %d\n", rc);
			
 
				-		LASSERT(rc == -ENOMEM);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	md.threshold = 1;
			
 
				-	md.user_ptr = ev;
			
 
				-	md.start = buf;
			
 
				-	md.length = len;
			
 
				-	md.options = options;
			
 
				-	md.eq_handle = srpc_data.rpc_lnet_eq;
			
 
				-
			
 
				-	rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
			
 
				-	if (rc) {
			
 
				-		CERROR("LNetMDAttach failed: %d\n", rc);
			
 
				-		LASSERT(rc == -ENOMEM);
			
 
				-
			
 
				-		rc = LNetMEUnlink(meh);
			
 
				-		LASSERT(!rc);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n",
			
 
				-	       libcfs_id2str(peer), portal, matchbits);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
			
 
				-		      int options, struct lnet_process_id peer,
			
 
				-		      lnet_nid_t self, struct lnet_handle_md *mdh,
			
 
				-		      struct srpc_event *ev)
			
 
				-{
			
 
				-	int rc;
			
 
				-	struct lnet_md md;
			
 
				-
			
 
				-	md.user_ptr = ev;
			
 
				-	md.start = buf;
			
 
				-	md.length = len;
			
 
				-	md.eq_handle = srpc_data.rpc_lnet_eq;
			
 
				-	md.threshold = options & LNET_MD_OP_GET ? 2 : 1;
			
 
				-	md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
			
 
				-
			
 
				-	rc = LNetMDBind(md, LNET_UNLINK, mdh);
			
 
				-	if (rc) {
			
 
				-		CERROR("LNetMDBind failed: %d\n", rc);
			
 
				-		LASSERT(rc == -ENOMEM);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
			
 
				-	 * they're only meaningful for MDs attached to an ME (i.e. passive
			
 
				-	 * buffers...
			
 
				-	 */
			
 
				-	if (options & LNET_MD_OP_PUT) {
			
 
				-		rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
			
 
				-			     portal, matchbits, 0, 0);
			
 
				-	} else {
			
 
				-		LASSERT(options & LNET_MD_OP_GET);
			
 
				-
			
 
				-		rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
			
 
				-	}
			
 
				-
			
 
				-	if (rc) {
			
 
				-		CERROR("LNet%s(%s, %d, %lld) failed: %d\n",
			
 
				-		       options & LNET_MD_OP_PUT ? "Put" : "Get",
			
 
				-		       libcfs_id2str(peer), portal, matchbits, rc);
			
 
				-
			
 
				-		/*
			
 
				-		 * The forthcoming unlink event will complete this operation
			
 
				-		 * with failure, so fall through and return success here.
			
 
				-		 */
			
 
				-		rc = LNetMDUnlink(*mdh);
			
 
				-		LASSERT(!rc);
			
 
				-	} else {
			
 
				-		CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n",
			
 
				-		       libcfs_id2str(peer), portal, matchbits);
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
			
 
				-			 struct lnet_handle_md *mdh, struct srpc_event *ev)
			
 
				-{
			
 
				-	struct lnet_process_id any = { 0 };
			
 
				-
			
 
				-	any.nid = LNET_NID_ANY;
			
 
				-	any.pid = LNET_PID_ANY;
			
 
				-
			
 
				-	return srpc_post_passive_rdma(srpc_serv_portal(service),
			
 
				-				      local, service, buf, len,
			
 
				-				      LNET_MD_OP_PUT, any, mdh, ev);
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
			
 
				-__must_hold(&scd->scd_lock)
			
 
				-{
			
 
				-	struct srpc_service *sv = scd->scd_svc;
			
 
				-	struct srpc_msg	*msg = &buf->buf_msg;
			
 
				-	int rc;
			
 
				-
			
 
				-	LNetInvalidateMDHandle(&buf->buf_mdh);
			
 
				-	list_add(&buf->buf_list, &scd->scd_buf_posted);
			
 
				-	scd->scd_buf_nposted++;
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-	rc = srpc_post_passive_rqtbuf(sv->sv_id,
			
 
				-				      !srpc_serv_is_framework(sv),
			
 
				-				      msg, sizeof(*msg), &buf->buf_mdh,
			
 
				-				      &scd->scd_ev);
			
 
				-
			
 
				-	/*
			
 
				-	 * At this point, a RPC (new or delayed) may have arrived in
			
 
				-	 * msg and its event handler has been called. So we must add
			
 
				-	 * buf to scd_buf_posted _before_ dropping scd_lock
			
 
				-	 */
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-	if (!rc) {
			
 
				-		if (!sv->sv_shuttingdown)
			
 
				-			return 0;
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-		/*
			
 
				-		 * srpc_shutdown_service might have tried to unlink me
			
 
				-		 * when my buf_mdh was still invalid
			
 
				-		 */
			
 
				-		LNetMDUnlink(buf->buf_mdh);
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	scd->scd_buf_nposted--;
			
 
				-	if (sv->sv_shuttingdown)
			
 
				-		return rc; /* don't allow to change scd_buf_posted */
			
 
				-
			
 
				-	list_del(&buf->buf_list);
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-	kfree(buf);
			
 
				-
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-srpc_add_buffer(struct swi_workitem *wi)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd, scd_buf_wi);
			
 
				-	struct srpc_buffer *buf;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * it's called by workitem scheduler threads, these threads
			
 
				-	 * should have been set CPT affinity, so buffers will be posted
			
 
				-	 * on CPT local list of Portal
			
 
				-	 */
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-	while (scd->scd_buf_adjust > 0 &&
			
 
				-	       !scd->scd_svc->sv_shuttingdown) {
			
 
				-		scd->scd_buf_adjust--; /* consume it */
			
 
				-		scd->scd_buf_posting++;
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-		buf = kzalloc(sizeof(*buf), GFP_NOFS);
			
 
				-		if (!buf) {
			
 
				-			CERROR("Failed to add new buf to service: %s\n",
			
 
				-			       scd->scd_svc->sv_name);
			
 
				-			spin_lock(&scd->scd_lock);
			
 
				-			rc = -ENOMEM;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-		if (scd->scd_svc->sv_shuttingdown) {
			
 
				-			spin_unlock(&scd->scd_lock);
			
 
				-			kfree(buf);
			
 
				-
			
 
				-			spin_lock(&scd->scd_lock);
			
 
				-			rc = -ESHUTDOWN;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		rc = srpc_service_post_buffer(scd, buf);
			
 
				-		if (rc)
			
 
				-			break; /* buf has been freed inside */
			
 
				-
			
 
				-		LASSERT(scd->scd_buf_posting > 0);
			
 
				-		scd->scd_buf_posting--;
			
 
				-		scd->scd_buf_total++;
			
 
				-		scd->scd_buf_low = max(2, scd->scd_buf_total / 4);
			
 
				-	}
			
 
				-
			
 
				-	if (rc) {
			
 
				-		scd->scd_buf_err_stamp = ktime_get_real_seconds();
			
 
				-		scd->scd_buf_err = rc;
			
 
				-
			
 
				-		LASSERT(scd->scd_buf_posting > 0);
			
 
				-		scd->scd_buf_posting--;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		scd->scd_buf_err = 0;
			
 
				-		scd->scd_buf_err_stamp = 0;
			
 
				-		scd->scd_buf_posting = 0;
			
 
				-		scd->scd_buf_adjust = nbuffer;
			
 
				-		/* start to post buffers */
			
 
				-		swi_schedule_workitem(&scd->scd_buf_wi);
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-		/* framework service only post buffer for one partition  */
			
 
				-		if (srpc_serv_is_framework(sv))
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-		/*
			
 
				-		 * NB: srpc_service_add_buffers() can be called inside
			
 
				-		 * thread context of lst_serial_wq, and we don't normally
			
 
				-		 * allow to sleep inside thread context of WI scheduler
			
 
				-		 * because it will block current scheduler thread from doing
			
 
				-		 * anything else, even worse, it could deadlock if it's
			
 
				-		 * waiting on result from another WI of the same scheduler.
			
 
				-		 * However, it's safe at here because scd_buf_wi is scheduled
			
 
				-		 * by thread in a different WI scheduler (lst_test_wq),
			
 
				-		 * so we don't have any risk of deadlock, though this could
			
 
				-		 * block all WIs pending on lst_serial_wq for a moment
			
 
				-		 * which is not good but not fatal.
			
 
				-		 */
			
 
				-		lst_wait_until(scd->scd_buf_err ||
			
 
				-			       (!scd->scd_buf_adjust &&
			
 
				-				!scd->scd_buf_posting),
			
 
				-			       scd->scd_lock, "waiting for adding buffer\n");
			
 
				-
			
 
				-		if (scd->scd_buf_err && !rc)
			
 
				-			rc = scd->scd_buf_err;
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-	}
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	int num;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(!sv->sv_shuttingdown);
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		num = scd->scd_buf_total + scd->scd_buf_posting;
			
 
				-		scd->scd_buf_adjust -= min(nbuffer, num);
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* returns 1 if sv has finished, otherwise 0 */
			
 
				-int
			
 
				-srpc_finish_service(struct srpc_service *sv)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	struct srpc_server_rpc *rpc;
			
 
				-	int i;
			
 
				-
			
 
				-	LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				-		swi_cancel_workitem(&scd->scd_buf_wi);
			
 
				-
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		if (scd->scd_buf_nposted > 0) {
			
 
				-			CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n",
			
 
				-			       scd->scd_buf_nposted);
			
 
				-			spin_unlock(&scd->scd_lock);
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		if (list_empty(&scd->scd_rpc_active)) {
			
 
				-			spin_unlock(&scd->scd_lock);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		rpc = list_entry(scd->scd_rpc_active.next,
			
 
				-				 struct srpc_server_rpc, srpc_list);
			
 
				-		CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s, ev fired %d type %d status %d lnet %d\n",
			
 
				-			rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
			
 
				-			swi_state2str(rpc->srpc_wi.swi_state),
			
 
				-			rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
			
 
				-			rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/* no lock needed from now on */
			
 
				-	srpc_service_fini(sv);
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-/* called with sv->sv_lock held */
			
 
				-static void
			
 
				-srpc_service_recycle_buffer(struct srpc_service_cd *scd,
			
 
				-			    struct srpc_buffer *buf)
			
 
				-__must_hold(&scd->scd_lock)
			
 
				-{
			
 
				-	if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
			
 
				-		if (srpc_service_post_buffer(scd, buf)) {
			
 
				-			CWARN("Failed to post %s buffer\n",
			
 
				-			      scd->scd_svc->sv_name);
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* service is shutting down, or we want to recycle some buffers */
			
 
				-	scd->scd_buf_total--;
			
 
				-
			
 
				-	if (scd->scd_buf_adjust < 0) {
			
 
				-		scd->scd_buf_adjust++;
			
 
				-		if (scd->scd_buf_adjust < 0 &&
			
 
				-		    !scd->scd_buf_total && !scd->scd_buf_posting) {
			
 
				-			CDEBUG(D_INFO,
			
 
				-			       "Try to recycle %d buffers but nothing left\n",
			
 
				-			       scd->scd_buf_adjust);
			
 
				-			scd->scd_buf_adjust = 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-	kfree(buf);
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-srpc_abort_service(struct srpc_service *sv)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	struct srpc_server_rpc *rpc;
			
 
				-	int i;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
			
 
				-	       sv->sv_id, sv->sv_name);
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		/*
			
 
				-		 * schedule in-flight RPCs to notice the abort, NB:
			
 
				-		 * racing with incoming RPCs; complete fix should make test
			
 
				-		 * RPCs carry session ID in its headers
			
 
				-		 */
			
 
				-		list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
			
 
				-			rpc->srpc_aborted = 1;
			
 
				-			swi_schedule_workitem(&rpc->srpc_wi);
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-srpc_shutdown_service(struct srpc_service *sv)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	struct srpc_server_rpc *rpc;
			
 
				-	struct srpc_buffer *buf;
			
 
				-	int i;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
			
 
				-	       sv->sv_id, sv->sv_name);
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-	sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		/* schedule in-flight RPCs to notice the shutdown */
			
 
				-		list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
			
 
				-			swi_schedule_workitem(&rpc->srpc_wi);
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-		/*
			
 
				-		 * OK to traverse scd_buf_posted without lock, since no one
			
 
				-		 * touches scd_buf_posted now
			
 
				-		 */
			
 
				-		list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
			
 
				-			LNetMDUnlink(buf->buf_mdh);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_send_request(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_event *ev = &rpc->crpc_reqstev;
			
 
				-	int rc;
			
 
				-
			
 
				-	ev->ev_fired = 0;
			
 
				-	ev->ev_data = rpc;
			
 
				-	ev->ev_type = SRPC_REQUEST_SENT;
			
 
				-
			
 
				-	 rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service),
			
 
				-				    rpc->crpc_service, &rpc->crpc_reqstmsg,
			
 
				-				    sizeof(struct srpc_msg), LNET_MD_OP_PUT,
			
 
				-				    rpc->crpc_dest, LNET_NID_ANY,
			
 
				-				    &rpc->crpc_reqstmdh, ev);
			
 
				-	if (rc) {
			
 
				-		LASSERT(rc == -ENOMEM);
			
 
				-		ev->ev_fired = 1;  /* no more event expected */
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_prepare_reply(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_event *ev = &rpc->crpc_replyev;
			
 
				-	__u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
			
 
				-	int rc;
			
 
				-
			
 
				-	ev->ev_fired = 0;
			
 
				-	ev->ev_data = rpc;
			
 
				-	ev->ev_type = SRPC_REPLY_RCVD;
			
 
				-
			
 
				-	*id = srpc_next_id();
			
 
				-
			
 
				-	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
			
 
				-				    &rpc->crpc_replymsg,
			
 
				-				    sizeof(struct srpc_msg),
			
 
				-				    LNET_MD_OP_PUT, rpc->crpc_dest,
			
 
				-				    &rpc->crpc_replymdh, ev);
			
 
				-	if (rc) {
			
 
				-		LASSERT(rc == -ENOMEM);
			
 
				-		ev->ev_fired = 1;  /* no more event expected */
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_prepare_bulk(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_bulk *bk = &rpc->crpc_bulk;
			
 
				-	struct srpc_event *ev = &rpc->crpc_bulkev;
			
 
				-	__u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
			
 
				-	int rc;
			
 
				-	int opt;
			
 
				-
			
 
				-	LASSERT(bk->bk_niov <= LNET_MAX_IOV);
			
 
				-
			
 
				-	if (!bk->bk_niov)
			
 
				-		return 0; /* nothing to do */
			
 
				-
			
 
				-	opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
			
 
				-	opt |= LNET_MD_KIOV;
			
 
				-
			
 
				-	ev->ev_fired = 0;
			
 
				-	ev->ev_data = rpc;
			
 
				-	ev->ev_type = SRPC_BULK_REQ_RCVD;
			
 
				-
			
 
				-	*id = srpc_next_id();
			
 
				-
			
 
				-	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
			
 
				-				    &bk->bk_iovs[0], bk->bk_niov, opt,
			
 
				-				    rpc->crpc_dest, &bk->bk_mdh, ev);
			
 
				-	if (rc) {
			
 
				-		LASSERT(rc == -ENOMEM);
			
 
				-		ev->ev_fired = 1;  /* no more event expected */
			
 
				-	}
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-srpc_do_bulk(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_event *ev = &rpc->srpc_ev;
			
 
				-	struct srpc_bulk *bk = rpc->srpc_bulk;
			
 
				-	__u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
			
 
				-	int rc;
			
 
				-	int opt;
			
 
				-
			
 
				-	LASSERT(bk);
			
 
				-
			
 
				-	opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
			
 
				-	opt |= LNET_MD_KIOV;
			
 
				-
			
 
				-	ev->ev_fired = 0;
			
 
				-	ev->ev_data = rpc;
			
 
				-	ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
			
 
				-
			
 
				-	rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
			
 
				-				   &bk->bk_iovs[0], bk->bk_niov, opt,
			
 
				-				   rpc->srpc_peer, rpc->srpc_self,
			
 
				-				   &bk->bk_mdh, ev);
			
 
				-	if (rc)
			
 
				-		ev->ev_fired = 1;  /* no more event expected */
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/* only called from srpc_handle_rpc */
			
 
				-static void
			
 
				-srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd = rpc->srpc_scd;
			
 
				-	struct srpc_service *sv = scd->scd_svc;
			
 
				-	struct srpc_buffer *buffer;
			
 
				-
			
 
				-	LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
			
 
				-
			
 
				-	rpc->srpc_status = status;
			
 
				-
			
 
				-	CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
			
 
				-		     "Server RPC %p done: service %s, peer %s, status %s:%d\n",
			
 
				-		     rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
			
 
				-		     swi_state2str(rpc->srpc_wi.swi_state), status);
			
 
				-
			
 
				-	if (status) {
			
 
				-		spin_lock(&srpc_data.rpc_glock);
			
 
				-		srpc_data.rpc_counters.rpcs_dropped++;
			
 
				-		spin_unlock(&srpc_data.rpc_glock);
			
 
				-	}
			
 
				-
			
 
				-	if (rpc->srpc_done)
			
 
				-		(*rpc->srpc_done) (rpc);
			
 
				-	LASSERT(!rpc->srpc_bulk);
			
 
				-
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-	if (rpc->srpc_reqstbuf) {
			
 
				-		/*
			
 
				-		 * NB might drop sv_lock in srpc_service_recycle_buffer, but
			
 
				-		 * sv won't go away for scd_rpc_active must not be empty
			
 
				-		 */
			
 
				-		srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
			
 
				-		rpc->srpc_reqstbuf = NULL;
			
 
				-	}
			
 
				-
			
 
				-	list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
			
 
				-
			
 
				-	/*
			
 
				-	 * No one can schedule me now since:
			
 
				-	 * - I'm not on scd_rpc_active.
			
 
				-	 * - all LNet events have been fired.
			
 
				-	 * Cancel pending schedules and prevent future schedule attempts:
			
 
				-	 */
			
 
				-	LASSERT(rpc->srpc_ev.ev_fired);
			
 
				-
			
 
				-	if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
			
 
				-		buffer = list_entry(scd->scd_buf_blocked.next,
			
 
				-				    struct srpc_buffer, buf_list);
			
 
				-		list_del(&buffer->buf_list);
			
 
				-
			
 
				-		srpc_init_server_rpc(rpc, scd, buffer);
			
 
				-		list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
			
 
				-		swi_schedule_workitem(&rpc->srpc_wi);
			
 
				-	} else {
			
 
				-		list_add(&rpc->srpc_list, &scd->scd_rpc_free);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-}
			
 
				-
			
 
				-/* handles an incoming RPC */
			
 
				-void
			
 
				-srpc_handle_rpc(struct swi_workitem *wi)
			
 
				-{
			
 
				-	struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc, srpc_wi);
			
 
				-	struct srpc_service_cd *scd = rpc->srpc_scd;
			
 
				-	struct srpc_service *sv = scd->scd_svc;
			
 
				-	struct srpc_event *ev = &rpc->srpc_ev;
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	LASSERT(wi == &rpc->srpc_wi);
			
 
				-
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-	if (sv->sv_shuttingdown || rpc->srpc_aborted) {
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-		if (rpc->srpc_bulk)
			
 
				-			LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
			
 
				-		LNetMDUnlink(rpc->srpc_replymdh);
			
 
				-
			
 
				-		if (ev->ev_fired) { /* no more event, OK to finish */
			
 
				-			srpc_server_rpc_done(rpc, -ESHUTDOWN);
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-	switch (wi->swi_state) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case SWI_STATE_NEWBORN: {
			
 
				-		struct srpc_msg *msg;
			
 
				-		struct srpc_generic_reply *reply;
			
 
				-
			
 
				-		msg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				-		reply = &rpc->srpc_replymsg.msg_body.reply;
			
 
				-
			
 
				-		if (!msg->msg_magic) {
			
 
				-			/* moaned already in srpc_lnet_ev_handler */
			
 
				-			srpc_server_rpc_done(rpc, EBADMSG);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		srpc_unpack_msg_hdr(msg);
			
 
				-		if (msg->msg_version != SRPC_MSG_VERSION) {
			
 
				-			CWARN("Version mismatch: %u, %u expected, from %s\n",
			
 
				-			      msg->msg_version, SRPC_MSG_VERSION,
			
 
				-			      libcfs_id2str(rpc->srpc_peer));
			
 
				-			reply->status = EPROTO;
			
 
				-			/* drop through and send reply */
			
 
				-		} else {
			
 
				-			reply->status = 0;
			
 
				-			rc = (*sv->sv_handler)(rpc);
			
 
				-			LASSERT(!reply->status || !rpc->srpc_bulk);
			
 
				-			if (rc) {
			
 
				-				srpc_server_rpc_done(rpc, rc);
			
 
				-				return;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_BULK_STARTED;
			
 
				-
			
 
				-		if (rpc->srpc_bulk) {
			
 
				-			rc = srpc_do_bulk(rpc);
			
 
				-			if (!rc)
			
 
				-				return; /* wait for bulk */
			
 
				-
			
 
				-			LASSERT(ev->ev_fired);
			
 
				-			ev->ev_status = rc;
			
 
				-		}
			
 
				-	}
			
 
				-		/* fall through */
			
 
				-	case SWI_STATE_BULK_STARTED:
			
 
				-		LASSERT(!rpc->srpc_bulk || ev->ev_fired);
			
 
				-
			
 
				-		if (rpc->srpc_bulk) {
			
 
				-			rc = ev->ev_status;
			
 
				-
			
 
				-			if (sv->sv_bulk_ready)
			
 
				-				rc = (*sv->sv_bulk_ready) (rpc, rc);
			
 
				-
			
 
				-			if (rc) {
			
 
				-				srpc_server_rpc_done(rpc, rc);
			
 
				-				return;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
			
 
				-		rc = srpc_send_reply(rpc);
			
 
				-		if (!rc)
			
 
				-			return; /* wait for reply */
			
 
				-		srpc_server_rpc_done(rpc, rc);
			
 
				-		return;
			
 
				-
			
 
				-	case SWI_STATE_REPLY_SUBMITTED:
			
 
				-		if (!ev->ev_fired) {
			
 
				-			CERROR("RPC %p: bulk %p, service %d\n",
			
 
				-			       rpc, rpc->srpc_bulk, sv->sv_id);
			
 
				-			CERROR("Event: status %d, type %d, lnet %d\n",
			
 
				-			       ev->ev_status, ev->ev_type, ev->ev_lnet);
			
 
				-			LASSERT(ev->ev_fired);
			
 
				-		}
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_DONE;
			
 
				-		srpc_server_rpc_done(rpc, ev->ev_status);
			
 
				-		return;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-srpc_client_rpc_expired(void *data)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc = data;
			
 
				-
			
 
				-	CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n",
			
 
				-	      rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				-	      rpc->crpc_timeout);
			
 
				-
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-	rpc->crpc_timeout = 0;
			
 
				-	srpc_abort_rpc(rpc, -ETIMEDOUT);
			
 
				-
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-	spin_lock(&srpc_data.rpc_glock);
			
 
				-	srpc_data.rpc_counters.rpcs_expired++;
			
 
				-	spin_unlock(&srpc_data.rpc_glock);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	struct stt_timer *timer = &rpc->crpc_timer;
			
 
				-
			
 
				-	if (!rpc->crpc_timeout)
			
 
				-		return;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&timer->stt_list);
			
 
				-	timer->stt_data	= rpc;
			
 
				-	timer->stt_func	= srpc_client_rpc_expired;
			
 
				-	timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout;
			
 
				-	stt_add_timer(timer);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Called with rpc->crpc_lock held.
			
 
				- *
			
 
				- * Upon exit the RPC expiry timer is not queued and the handler is not
			
 
				- * running on any CPU.
			
 
				- */
			
 
				-static void
			
 
				-srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	/* timer not planted or already exploded */
			
 
				-	if (!rpc->crpc_timeout)
			
 
				-		return;
			
 
				-
			
 
				-	/* timer successfully defused */
			
 
				-	if (stt_del_timer(&rpc->crpc_timer))
			
 
				-		return;
			
 
				-
			
 
				-	/* timer detonated, wait for it to explode */
			
 
				-	while (rpc->crpc_timeout) {
			
 
				-		spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-		schedule();
			
 
				-
			
 
				-		spin_lock(&rpc->crpc_lock);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status)
			
 
				-{
			
 
				-	struct swi_workitem *wi = &rpc->crpc_wi;
			
 
				-
			
 
				-	LASSERT(status || wi->swi_state == SWI_STATE_DONE);
			
 
				-
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-	rpc->crpc_closed = 1;
			
 
				-	if (!rpc->crpc_status)
			
 
				-		rpc->crpc_status = status;
			
 
				-
			
 
				-	srpc_del_client_rpc_timer(rpc);
			
 
				-
			
 
				-	CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
			
 
				-		     "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
			
 
				-		     rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				-		     swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
			
 
				-
			
 
				-	/*
			
 
				-	 * No one can schedule me now since:
			
 
				-	 * - RPC timer has been defused.
			
 
				-	 * - all LNet events have been fired.
			
 
				-	 * - crpc_closed has been set, preventing srpc_abort_rpc from
			
 
				-	 *   scheduling me.
			
 
				-	 * Cancel pending schedules and prevent future schedule attempts:
			
 
				-	 */
			
 
				-	LASSERT(!srpc_event_pending(rpc));
			
 
				-
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-	(*rpc->crpc_done)(rpc);
			
 
				-}
			
 
				-
			
 
				-/* sends an outgoing RPC */
			
 
				-void
			
 
				-srpc_send_rpc(struct swi_workitem *wi)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-	struct srpc_msg *reply;
			
 
				-	int do_bulk;
			
 
				-
			
 
				-	LASSERT(wi);
			
 
				-
			
 
				-	rpc = container_of(wi, struct srpc_client_rpc, crpc_wi);
			
 
				-
			
 
				-	LASSERT(rpc);
			
 
				-	LASSERT(wi == &rpc->crpc_wi);
			
 
				-
			
 
				-	reply = &rpc->crpc_replymsg;
			
 
				-	do_bulk = rpc->crpc_bulk.bk_niov > 0;
			
 
				-
			
 
				-	spin_lock(&rpc->crpc_lock);
			
 
				-
			
 
				-	if (rpc->crpc_aborted) {
			
 
				-		spin_unlock(&rpc->crpc_lock);
			
 
				-		goto abort;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&rpc->crpc_lock);
			
 
				-
			
 
				-	switch (wi->swi_state) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case SWI_STATE_NEWBORN:
			
 
				-		LASSERT(!srpc_event_pending(rpc));
			
 
				-
			
 
				-		rc = srpc_prepare_reply(rpc);
			
 
				-		if (rc) {
			
 
				-			srpc_client_rpc_done(rpc, rc);
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		rc = srpc_prepare_bulk(rpc);
			
 
				-		if (rc)
			
 
				-			break;
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
			
 
				-		rc = srpc_send_request(rpc);
			
 
				-		break;
			
 
				-
			
 
				-	case SWI_STATE_REQUEST_SUBMITTED:
			
 
				-		/*
			
 
				-		 * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
			
 
				-		 * order; however, they're processed in a strict order:
			
 
				-		 * rqt, rpy, and bulk.
			
 
				-		 */
			
 
				-		if (!rpc->crpc_reqstev.ev_fired)
			
 
				-			break;
			
 
				-
			
 
				-		rc = rpc->crpc_reqstev.ev_status;
			
 
				-		if (rc)
			
 
				-			break;
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_REQUEST_SENT;
			
 
				-		/* perhaps more events */
			
 
				-		/* fall through */
			
 
				-	case SWI_STATE_REQUEST_SENT: {
			
 
				-		enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service);
			
 
				-
			
 
				-		if (!rpc->crpc_replyev.ev_fired)
			
 
				-			break;
			
 
				-
			
 
				-		rc = rpc->crpc_replyev.ev_status;
			
 
				-		if (rc)
			
 
				-			break;
			
 
				-
			
 
				-		srpc_unpack_msg_hdr(reply);
			
 
				-		if (reply->msg_type != type ||
			
 
				-		    (reply->msg_magic != SRPC_MSG_MAGIC &&
			
 
				-		     reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
			
 
				-			CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n",
			
 
				-			      libcfs_id2str(rpc->crpc_dest),
			
 
				-			      reply->msg_type, type,
			
 
				-			      reply->msg_magic, SRPC_MSG_MAGIC);
			
 
				-			rc = -EBADMSG;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (do_bulk && reply->msg_body.reply.status) {
			
 
				-			CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n",
			
 
				-			      reply->msg_body.reply.status,
			
 
				-			      libcfs_id2str(rpc->crpc_dest));
			
 
				-			LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
			
 
				-		}
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_REPLY_RECEIVED;
			
 
				-	}
			
 
				-		/* fall through */
			
 
				-	case SWI_STATE_REPLY_RECEIVED:
			
 
				-		if (do_bulk && !rpc->crpc_bulkev.ev_fired)
			
 
				-			break;
			
 
				-
			
 
				-		rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
			
 
				-
			
 
				-		/*
			
 
				-		 * Bulk buffer was unlinked due to remote error. Clear error
			
 
				-		 * since reply buffer still contains valid data.
			
 
				-		 * NB rpc->crpc_done shouldn't look into bulk data in case of
			
 
				-		 * remote error.
			
 
				-		 */
			
 
				-		if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
			
 
				-		    !rpc->crpc_status && reply->msg_body.reply.status)
			
 
				-			rc = 0;
			
 
				-
			
 
				-		wi->swi_state = SWI_STATE_DONE;
			
 
				-		srpc_client_rpc_done(rpc, rc);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (rc) {
			
 
				-		spin_lock(&rpc->crpc_lock);
			
 
				-		srpc_abort_rpc(rpc, rc);
			
 
				-		spin_unlock(&rpc->crpc_lock);
			
 
				-	}
			
 
				-
			
 
				-abort:
			
 
				-	if (rpc->crpc_aborted) {
			
 
				-		LNetMDUnlink(rpc->crpc_reqstmdh);
			
 
				-		LNetMDUnlink(rpc->crpc_replymdh);
			
 
				-		LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
			
 
				-
			
 
				-		if (!srpc_event_pending(rpc)) {
			
 
				-			srpc_client_rpc_done(rpc, -EINTR);
			
 
				-			return;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-struct srpc_client_rpc *
			
 
				-srpc_create_client_rpc(struct lnet_process_id peer, int service,
			
 
				-		       int nbulkiov, int bulklen,
			
 
				-		       void (*rpc_done)(struct srpc_client_rpc *),
			
 
				-		       void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
			
 
				-{
			
 
				-	struct srpc_client_rpc *rpc;
			
 
				-
			
 
				-	rpc = kzalloc(offsetof(struct srpc_client_rpc,
			
 
				-			       crpc_bulk.bk_iovs[nbulkiov]), GFP_KERNEL);
			
 
				-	if (!rpc)
			
 
				-		return NULL;
			
 
				-
			
 
				-	srpc_init_client_rpc(rpc, peer, service, nbulkiov,
			
 
				-			     bulklen, rpc_done, rpc_fini, priv);
			
 
				-	return rpc;
			
 
				-}
			
 
				-
			
 
				-/* called with rpc->crpc_lock held */
			
 
				-void
			
 
				-srpc_abort_rpc(struct srpc_client_rpc *rpc, int why)
			
 
				-{
			
 
				-	LASSERT(why);
			
 
				-
			
 
				-	if (rpc->crpc_aborted ||	/* already aborted */
			
 
				-	    rpc->crpc_closed)		/* callback imminent */
			
 
				-		return;
			
 
				-
			
 
				-	CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n",
			
 
				-	       rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				-	       swi_state2str(rpc->crpc_wi.swi_state), why);
			
 
				-
			
 
				-	rpc->crpc_aborted = 1;
			
 
				-	rpc->crpc_status = why;
			
 
				-	swi_schedule_workitem(&rpc->crpc_wi);
			
 
				-}
			
 
				-
			
 
				-/* called with rpc->crpc_lock held */
			
 
				-void
			
 
				-srpc_post_rpc(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	LASSERT(!rpc->crpc_aborted);
			
 
				-	LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
			
 
				-
			
 
				-	CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
			
 
				-	       libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
			
 
				-	       rpc->crpc_timeout);
			
 
				-
			
 
				-	srpc_add_client_rpc_timer(rpc);
			
 
				-	swi_schedule_workitem(&rpc->crpc_wi);
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-srpc_send_reply(struct srpc_server_rpc *rpc)
			
 
				-{
			
 
				-	struct srpc_event *ev = &rpc->srpc_ev;
			
 
				-	struct srpc_msg *msg = &rpc->srpc_replymsg;
			
 
				-	struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
			
 
				-	struct srpc_service_cd *scd = rpc->srpc_scd;
			
 
				-	struct srpc_service *sv = scd->scd_svc;
			
 
				-	__u64 rpyid;
			
 
				-	int rc;
			
 
				-
			
 
				-	LASSERT(buffer);
			
 
				-	rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
			
 
				-
			
 
				-	spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-	if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
			
 
				-		/*
			
 
				-		 * Repost buffer before replying since test client
			
 
				-		 * might send me another RPC once it gets the reply
			
 
				-		 */
			
 
				-		if (srpc_service_post_buffer(scd, buffer))
			
 
				-			CWARN("Failed to repost %s buffer\n", sv->sv_name);
			
 
				-		rpc->srpc_reqstbuf = NULL;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-	ev->ev_fired = 0;
			
 
				-	ev->ev_data = rpc;
			
 
				-	ev->ev_type = SRPC_REPLY_SENT;
			
 
				-
			
 
				-	msg->msg_magic = SRPC_MSG_MAGIC;
			
 
				-	msg->msg_version = SRPC_MSG_VERSION;
			
 
				-	msg->msg_type = srpc_service2reply(sv->sv_id);
			
 
				-
			
 
				-	rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
			
 
				-				   sizeof(*msg), LNET_MD_OP_PUT,
			
 
				-				   rpc->srpc_peer, rpc->srpc_self,
			
 
				-				   &rpc->srpc_replymdh, ev);
			
 
				-	if (rc)
			
 
				-		ev->ev_fired = 1; /* no more event expected */
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-/* when in kernel always called with LNET_LOCK() held, and in thread context */
			
 
				-static void
			
 
				-srpc_lnet_ev_handler(struct lnet_event *ev)
			
 
				-{
			
 
				-	struct srpc_service_cd *scd;
			
 
				-	struct srpc_event *rpcev = ev->md.user_ptr;
			
 
				-	struct srpc_client_rpc *crpc;
			
 
				-	struct srpc_server_rpc *srpc;
			
 
				-	struct srpc_buffer *buffer;
			
 
				-	struct srpc_service *sv;
			
 
				-	struct srpc_msg *msg;
			
 
				-	enum srpc_msg_type type;
			
 
				-
			
 
				-	LASSERT(!in_interrupt());
			
 
				-
			
 
				-	if (ev->status) {
			
 
				-		__u32 errors;
			
 
				-
			
 
				-		spin_lock(&srpc_data.rpc_glock);
			
 
				-		if (ev->status != -ECANCELED) /* cancellation is not error */
			
 
				-			srpc_data.rpc_counters.errors++;
			
 
				-		errors = srpc_data.rpc_counters.errors;
			
 
				-		spin_unlock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-		CNETERR("LNet event status %d type %d, RPC errors %u\n",
			
 
				-			ev->status, ev->type, errors);
			
 
				-	}
			
 
				-
			
 
				-	rpcev->ev_lnet = ev->type;
			
 
				-
			
 
				-	switch (rpcev->ev_type) {
			
 
				-	default:
			
 
				-		CERROR("Unknown event: status %d, type %d, lnet %d\n",
			
 
				-		       rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
			
 
				-		LBUG();
			
 
				-	case SRPC_REQUEST_SENT:
			
 
				-		if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
			
 
				-			spin_lock(&srpc_data.rpc_glock);
			
 
				-			srpc_data.rpc_counters.rpcs_sent++;
			
 
				-			spin_unlock(&srpc_data.rpc_glock);
			
 
				-		}
			
 
				-		/* fall through */
			
 
				-	case SRPC_REPLY_RCVD:
			
 
				-	case SRPC_BULK_REQ_RCVD:
			
 
				-		crpc = rpcev->ev_data;
			
 
				-
			
 
				-		if (rpcev != &crpc->crpc_reqstev &&
			
 
				-		    rpcev != &crpc->crpc_replyev &&
			
 
				-		    rpcev != &crpc->crpc_bulkev) {
			
 
				-			CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
			
 
				-			       rpcev, crpc, &crpc->crpc_reqstev,
			
 
				-			       &crpc->crpc_replyev, &crpc->crpc_bulkev);
			
 
				-			CERROR("Bad event: status %d, type %d, lnet %d\n",
			
 
				-			       rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
			
 
				-			LBUG();
			
 
				-		}
			
 
				-
			
 
				-		spin_lock(&crpc->crpc_lock);
			
 
				-
			
 
				-		LASSERT(!rpcev->ev_fired);
			
 
				-		rpcev->ev_fired = 1;
			
 
				-		rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
			
 
				-						-EINTR : ev->status;
			
 
				-		swi_schedule_workitem(&crpc->crpc_wi);
			
 
				-
			
 
				-		spin_unlock(&crpc->crpc_lock);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_REQUEST_RCVD:
			
 
				-		scd = rpcev->ev_data;
			
 
				-		sv = scd->scd_svc;
			
 
				-
			
 
				-		LASSERT(rpcev == &scd->scd_ev);
			
 
				-
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		LASSERT(ev->unlinked);
			
 
				-		LASSERT(ev->type == LNET_EVENT_PUT ||
			
 
				-			ev->type == LNET_EVENT_UNLINK);
			
 
				-		LASSERT(ev->type != LNET_EVENT_UNLINK ||
			
 
				-			sv->sv_shuttingdown);
			
 
				-
			
 
				-		buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg);
			
 
				-		buffer->buf_peer = ev->initiator;
			
 
				-		buffer->buf_self = ev->target.nid;
			
 
				-
			
 
				-		LASSERT(scd->scd_buf_nposted > 0);
			
 
				-		scd->scd_buf_nposted--;
			
 
				-
			
 
				-		if (sv->sv_shuttingdown) {
			
 
				-			/*
			
 
				-			 * Leave buffer on scd->scd_buf_nposted since
			
 
				-			 * srpc_finish_service needs to traverse it.
			
 
				-			 */
			
 
				-			spin_unlock(&scd->scd_lock);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (scd->scd_buf_err_stamp &&
			
 
				-		    scd->scd_buf_err_stamp < ktime_get_real_seconds()) {
			
 
				-			/* re-enable adding buffer */
			
 
				-			scd->scd_buf_err_stamp = 0;
			
 
				-			scd->scd_buf_err = 0;
			
 
				-		}
			
 
				-
			
 
				-		if (!scd->scd_buf_err &&	/* adding buffer is enabled */
			
 
				-		    !scd->scd_buf_adjust &&
			
 
				-		    scd->scd_buf_nposted < scd->scd_buf_low) {
			
 
				-			scd->scd_buf_adjust = max(scd->scd_buf_total / 2,
			
 
				-						  SFW_TEST_WI_MIN);
			
 
				-			swi_schedule_workitem(&scd->scd_buf_wi);
			
 
				-		}
			
 
				-
			
 
				-		list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
			
 
				-		msg = &buffer->buf_msg;
			
 
				-		type = srpc_service2request(sv->sv_id);
			
 
				-
			
 
				-		if (ev->status || ev->mlength != sizeof(*msg) ||
			
 
				-		    (msg->msg_type != type &&
			
 
				-		     msg->msg_type != __swab32(type)) ||
			
 
				-		    (msg->msg_magic != SRPC_MSG_MAGIC &&
			
 
				-		     msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
			
 
				-			CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n",
			
 
				-			       sv->sv_name, libcfs_id2str(ev->initiator),
			
 
				-			       ev->status, ev->mlength,
			
 
				-			       msg->msg_type, msg->msg_magic);
			
 
				-
			
 
				-			/*
			
 
				-			 * NB can't call srpc_service_recycle_buffer here since
			
 
				-			 * it may call LNetM[DE]Attach. The invalid magic tells
			
 
				-			 * srpc_handle_rpc to drop this RPC
			
 
				-			 */
			
 
				-			msg->msg_magic = 0;
			
 
				-		}
			
 
				-
			
 
				-		if (!list_empty(&scd->scd_rpc_free)) {
			
 
				-			srpc = list_entry(scd->scd_rpc_free.next,
			
 
				-					  struct srpc_server_rpc,
			
 
				-					  srpc_list);
			
 
				-			list_del(&srpc->srpc_list);
			
 
				-
			
 
				-			srpc_init_server_rpc(srpc, scd, buffer);
			
 
				-			list_add_tail(&srpc->srpc_list,
			
 
				-				      &scd->scd_rpc_active);
			
 
				-			swi_schedule_workitem(&srpc->srpc_wi);
			
 
				-		} else {
			
 
				-			list_add_tail(&buffer->buf_list,
			
 
				-				      &scd->scd_buf_blocked);
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-
			
 
				-		spin_lock(&srpc_data.rpc_glock);
			
 
				-		srpc_data.rpc_counters.rpcs_rcvd++;
			
 
				-		spin_unlock(&srpc_data.rpc_glock);
			
 
				-		break;
			
 
				-
			
 
				-	case SRPC_BULK_GET_RPLD:
			
 
				-		LASSERT(ev->type == LNET_EVENT_SEND ||
			
 
				-			ev->type == LNET_EVENT_REPLY ||
			
 
				-			ev->type == LNET_EVENT_UNLINK);
			
 
				-
			
 
				-		if (!ev->unlinked)
			
 
				-			break; /* wait for final event */
			
 
				-		/* fall through */
			
 
				-	case SRPC_BULK_PUT_SENT:
			
 
				-		if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
			
 
				-			spin_lock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-			if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
			
 
				-				srpc_data.rpc_counters.bulk_get += ev->mlength;
			
 
				-			else
			
 
				-				srpc_data.rpc_counters.bulk_put += ev->mlength;
			
 
				-
			
 
				-			spin_unlock(&srpc_data.rpc_glock);
			
 
				-		}
			
 
				-		/* fall through */
			
 
				-	case SRPC_REPLY_SENT:
			
 
				-		srpc = rpcev->ev_data;
			
 
				-		scd = srpc->srpc_scd;
			
 
				-
			
 
				-		LASSERT(rpcev == &srpc->srpc_ev);
			
 
				-
			
 
				-		spin_lock(&scd->scd_lock);
			
 
				-
			
 
				-		rpcev->ev_fired = 1;
			
 
				-		rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
			
 
				-				   -EINTR : ev->status;
			
 
				-		swi_schedule_workitem(&srpc->srpc_wi);
			
 
				-
			
 
				-		spin_unlock(&scd->scd_lock);
			
 
				-		break;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-srpc_startup(void)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
			
 
				-	spin_lock_init(&srpc_data.rpc_glock);
			
 
				-
			
 
				-	/* 1 second pause to avoid timestamp reuse */
			
 
				-	set_current_state(TASK_UNINTERRUPTIBLE);
			
 
				-	schedule_timeout(HZ);
			
 
				-	srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48;
			
 
				-
			
 
				-	srpc_data.rpc_state = SRPC_STATE_NONE;
			
 
				-
			
 
				-	rc = LNetNIInit(LNET_PID_LUSTRE);
			
 
				-	if (rc < 0) {
			
 
				-		CERROR("LNetNIInit() has failed: %d\n", rc);
			
 
				-		return rc;
			
 
				-	}
			
 
				-
			
 
				-	srpc_data.rpc_state = SRPC_STATE_NI_INIT;
			
 
				-
			
 
				-	LNetInvalidateEQHandle(&srpc_data.rpc_lnet_eq);
			
 
				-	rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
			
 
				-	if (rc) {
			
 
				-		CERROR("LNetEQAlloc() has failed: %d\n", rc);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
			
 
				-	LASSERT(!rc);
			
 
				-	rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
			
 
				-	LASSERT(!rc);
			
 
				-
			
 
				-	srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
			
 
				-
			
 
				-	rc = stt_startup();
			
 
				-
			
 
				-bail:
			
 
				-	if (rc)
			
 
				-		srpc_shutdown();
			
 
				-	else
			
 
				-		srpc_data.rpc_state = SRPC_STATE_RUNNING;
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-srpc_shutdown(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	int rc;
			
 
				-	int state;
			
 
				-
			
 
				-	state = srpc_data.rpc_state;
			
 
				-	srpc_data.rpc_state = SRPC_STATE_STOPPING;
			
 
				-
			
 
				-	switch (state) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case SRPC_STATE_RUNNING:
			
 
				-		spin_lock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-		for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
			
 
				-			struct srpc_service *sv = srpc_data.rpc_services[i];
			
 
				-
			
 
				-			LASSERTF(!sv, "service not empty: id %d, name %s\n",
			
 
				-				 i, sv->sv_name);
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock(&srpc_data.rpc_glock);
			
 
				-
			
 
				-		stt_shutdown();
			
 
				-		/* fall through */
			
 
				-	case SRPC_STATE_EQ_INIT:
			
 
				-		rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
			
 
				-		rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
			
 
				-		LASSERT(!rc);
			
 
				-		rc = LNetEQFree(srpc_data.rpc_lnet_eq);
			
 
				-		LASSERT(!rc); /* the EQ should have no user by now */
			
 
				-		/* fall through */
			
 
				-	case SRPC_STATE_NI_INIT:
			
 
				-		LNetNIFini();
			
 
				-	}
			
 
				-}
			
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ b/drivers/staging/lustre/lnet/selftest/rpc.h
@@ -1,295 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- */
			
 
				-
			
 
				-#ifndef __SELFTEST_RPC_H__
			
 
				-#define __SELFTEST_RPC_H__
			
 
				-
			
 
				-#include <uapi/linux/lnet/lnetst.h>
			
 
				-
			
 
				-/*
			
 
				- * LST wired structures
			
 
				- *
			
 
				- * XXX: *REPLY == *REQST + 1
			
 
				- */
			
 
				-enum srpc_msg_type {
			
 
				-	SRPC_MSG_MKSN_REQST	= 0,
			
 
				-	SRPC_MSG_MKSN_REPLY	= 1,
			
 
				-	SRPC_MSG_RMSN_REQST	= 2,
			
 
				-	SRPC_MSG_RMSN_REPLY	= 3,
			
 
				-	SRPC_MSG_BATCH_REQST	= 4,
			
 
				-	SRPC_MSG_BATCH_REPLY	= 5,
			
 
				-	SRPC_MSG_STAT_REQST	= 6,
			
 
				-	SRPC_MSG_STAT_REPLY	= 7,
			
 
				-	SRPC_MSG_TEST_REQST	= 8,
			
 
				-	SRPC_MSG_TEST_REPLY	= 9,
			
 
				-	SRPC_MSG_DEBUG_REQST	= 10,
			
 
				-	SRPC_MSG_DEBUG_REPLY	= 11,
			
 
				-	SRPC_MSG_BRW_REQST	= 12,
			
 
				-	SRPC_MSG_BRW_REPLY	= 13,
			
 
				-	SRPC_MSG_PING_REQST	= 14,
			
 
				-	SRPC_MSG_PING_REPLY	= 15,
			
 
				-	SRPC_MSG_JOIN_REQST	= 16,
			
 
				-	SRPC_MSG_JOIN_REPLY	= 17,
			
 
				-};
			
 
				-
			
 
				-/* CAVEAT EMPTOR:
			
 
				- * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
			
 
				- * and 2nd field matchbits of bulk buffer if any.
			
 
				- *
			
 
				- * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
			
 
				- * session id if needed.
			
 
				- */
			
 
				-struct srpc_generic_reqst {
			
 
				-	__u64			rpyid;		/* reply buffer matchbits */
			
 
				-	__u64			bulkid;		/* bulk buffer matchbits */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_generic_reply {
			
 
				-	__u32			status;
			
 
				-	struct lst_sid		sid;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-/* FRAMEWORK RPCs */
			
 
				-struct srpc_mksn_reqst {
			
 
				-	__u64			mksn_rpyid;	/* reply buffer matchbits */
			
 
				-	struct lst_sid		mksn_sid;	/* session id */
			
 
				-	__u32			mksn_force;	/* use brute force */
			
 
				-	char			mksn_name[LST_NAME_SIZE];
			
 
				-} WIRE_ATTR; /* make session request */
			
 
				-
			
 
				-struct srpc_mksn_reply {
			
 
				-	__u32			mksn_status;	/* session status */
			
 
				-	struct lst_sid		mksn_sid;	/* session id */
			
 
				-	__u32			mksn_timeout;	/* session timeout */
			
 
				-	char			mksn_name[LST_NAME_SIZE];
			
 
				-} WIRE_ATTR; /* make session reply */
			
 
				-
			
 
				-struct srpc_rmsn_reqst {
			
 
				-	__u64			rmsn_rpyid;	/* reply buffer matchbits */
			
 
				-	struct lst_sid		rmsn_sid;	/* session id */
			
 
				-} WIRE_ATTR; /* remove session request */
			
 
				-
			
 
				-struct srpc_rmsn_reply {
			
 
				-	__u32			rmsn_status;
			
 
				-	struct lst_sid		rmsn_sid;	/* session id */
			
 
				-} WIRE_ATTR; /* remove session reply */
			
 
				-
			
 
				-struct srpc_join_reqst {
			
 
				-	__u64			join_rpyid;	/* reply buffer matchbits */
			
 
				-	struct lst_sid		join_sid;	/* session id to join */
			
 
				-	char			join_group[LST_NAME_SIZE]; /* group name */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_join_reply {
			
 
				-	__u32			join_status;	/* returned status */
			
 
				-	struct lst_sid		join_sid;	/* session id */
			
 
				-	__u32			join_timeout;	/* # seconds' inactivity to
			
 
				-						 * expire
			
 
				-						 */
			
 
				-	char			join_session[LST_NAME_SIZE]; /* session name */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_debug_reqst {
			
 
				-	__u64			dbg_rpyid;	/* reply buffer matchbits */
			
 
				-	struct lst_sid		dbg_sid;	/* session id */
			
 
				-	__u32			dbg_flags;	/* bitmap of debug */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_debug_reply {
			
 
				-	__u32			dbg_status;	/* returned code */
			
 
				-	struct lst_sid		dbg_sid;	/* session id */
			
 
				-	__u32			dbg_timeout;	/* session timeout */
			
 
				-	__u32			dbg_nbatch;	/* # of batches in the node */
			
 
				-	char			dbg_name[LST_NAME_SIZE]; /* session name */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-#define SRPC_BATCH_OPC_RUN	1
			
 
				-#define SRPC_BATCH_OPC_STOP	2
			
 
				-#define SRPC_BATCH_OPC_QUERY	3
			
 
				-
			
 
				-struct srpc_batch_reqst {
			
 
				-	__u64		   bar_rpyid;	   /* reply buffer matchbits */
			
 
				-	struct lst_sid	   bar_sid;	   /* session id */
			
 
				-	struct lst_bid	   bar_bid;	   /* batch id */
			
 
				-	__u32		   bar_opc;	   /* create/start/stop batch */
			
 
				-	__u32		   bar_testidx;    /* index of test */
			
 
				-	__u32		   bar_arg;	   /* parameters */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_batch_reply {
			
 
				-	__u32		   bar_status;	   /* status of request */
			
 
				-	struct lst_sid	   bar_sid;	   /* session id */
			
 
				-	__u32		   bar_active;	   /* # of active tests in batch/test */
			
 
				-	__u32		   bar_time;	   /* remained time */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_stat_reqst {
			
 
				-	__u64		   str_rpyid;	   /* reply buffer matchbits */
			
 
				-	struct lst_sid	   str_sid;	   /* session id */
			
 
				-	__u32		   str_type;	   /* type of stat */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_stat_reply {
			
 
				-	__u32		   str_status;
			
 
				-	struct lst_sid	   str_sid;
			
 
				-	struct sfw_counters	str_fw;
			
 
				-	struct srpc_counters	str_rpc;
			
 
				-	struct lnet_counters    str_lnet;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct test_bulk_req {
			
 
				-	__u32		   blk_opc;	   /* bulk operation code */
			
 
				-	__u32		   blk_npg;	   /* # of pages */
			
 
				-	__u32		   blk_flags;	   /* reserved flags */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct test_bulk_req_v1 {
			
 
				-	__u16		   blk_opc;	   /* bulk operation code */
			
 
				-	__u16		   blk_flags;	   /* data check flags */
			
 
				-	__u32		   blk_len;	   /* data length */
			
 
				-	__u32		   blk_offset;	   /* offset */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct test_ping_req {
			
 
				-	__u32		   png_size;	   /* size of ping message */
			
 
				-	__u32		   png_flags;	   /* reserved flags */
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_test_reqst {
			
 
				-	__u64			tsr_rpyid;	/* reply buffer matchbits */
			
 
				-	__u64			tsr_bulkid;	/* bulk buffer matchbits */
			
 
				-	struct lst_sid		tsr_sid;	/* session id */
			
 
				-	struct lst_bid		tsr_bid;	/* batch id */
			
 
				-	__u32			tsr_service;	/* test type: bulk|ping|... */
			
 
				-	__u32			tsr_loop;	/* test client loop count or
			
 
				-						 * # server buffers needed
			
 
				-						 */
			
 
				-	__u32			tsr_concur;	/* concurrency of test */
			
 
				-	__u8			tsr_is_client;	/* is test client or not */
			
 
				-	__u8			tsr_stop_onerr; /* stop on error */
			
 
				-	__u32			tsr_ndest;	/* # of dest nodes */
			
 
				-
			
 
				-	union {
			
 
				-		struct test_ping_req	ping;
			
 
				-		struct test_bulk_req	bulk_v0;
			
 
				-		struct test_bulk_req_v1	bulk_v1;
			
 
				-	} tsr_u;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_test_reply {
			
 
				-	__u32			tsr_status;	/* returned code */
			
 
				-	struct lst_sid		tsr_sid;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-/* TEST RPCs */
			
 
				-struct srpc_ping_reqst {
			
 
				-	__u64		   pnr_rpyid;
			
 
				-	__u32		   pnr_magic;
			
 
				-	__u32		   pnr_seq;
			
 
				-	__u64		   pnr_time_sec;
			
 
				-	__u64		   pnr_time_usec;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_ping_reply {
			
 
				-	__u32		   pnr_status;
			
 
				-	__u32		   pnr_magic;
			
 
				-	__u32		   pnr_seq;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-struct srpc_brw_reqst {
			
 
				-	__u64		   brw_rpyid;	   /* reply buffer matchbits */
			
 
				-	__u64		   brw_bulkid;	   /* bulk buffer matchbits */
			
 
				-	__u32		   brw_rw;	   /* read or write */
			
 
				-	__u32		   brw_len;	   /* bulk data len */
			
 
				-	__u32		   brw_flags;	   /* bulk data patterns */
			
 
				-} WIRE_ATTR; /* bulk r/w request */
			
 
				-
			
 
				-struct srpc_brw_reply {
			
 
				-	__u32		   brw_status;
			
 
				-} WIRE_ATTR; /* bulk r/w reply */
			
 
				-
			
 
				-#define SRPC_MSG_MAGIC		0xeeb0f00d
			
 
				-#define SRPC_MSG_VERSION	1
			
 
				-
			
 
				-struct srpc_msg {
			
 
				-	__u32	msg_magic;     /* magic number */
			
 
				-	__u32	msg_version;   /* message version number */
			
 
				-	__u32	msg_type;      /* type of message body: srpc_msg_type */
			
 
				-	__u32	msg_reserved0;
			
 
				-	__u32	msg_reserved1;
			
 
				-	__u32	msg_ses_feats; /* test session features */
			
 
				-	union {
			
 
				-		struct srpc_generic_reqst	reqst;
			
 
				-		struct srpc_generic_reply	reply;
			
 
				-
			
 
				-		struct srpc_mksn_reqst		mksn_reqst;
			
 
				-		struct srpc_mksn_reply		mksn_reply;
			
 
				-		struct srpc_rmsn_reqst		rmsn_reqst;
			
 
				-		struct srpc_rmsn_reply		rmsn_reply;
			
 
				-		struct srpc_debug_reqst		dbg_reqst;
			
 
				-		struct srpc_debug_reply		dbg_reply;
			
 
				-		struct srpc_batch_reqst		bat_reqst;
			
 
				-		struct srpc_batch_reply		bat_reply;
			
 
				-		struct srpc_stat_reqst		stat_reqst;
			
 
				-		struct srpc_stat_reply		stat_reply;
			
 
				-		struct srpc_test_reqst		tes_reqst;
			
 
				-		struct srpc_test_reply		tes_reply;
			
 
				-		struct srpc_join_reqst		join_reqst;
			
 
				-		struct srpc_join_reply		join_reply;
			
 
				-
			
 
				-		struct srpc_ping_reqst		ping_reqst;
			
 
				-		struct srpc_ping_reply		ping_reply;
			
 
				-		struct srpc_brw_reqst		brw_reqst;
			
 
				-		struct srpc_brw_reply		brw_reply;
			
 
				-	}     msg_body;
			
 
				-} WIRE_ATTR;
			
 
				-
			
 
				-static inline void
			
 
				-srpc_unpack_msg_hdr(struct srpc_msg *msg)
			
 
				-{
			
 
				-	if (msg->msg_magic == SRPC_MSG_MAGIC)
			
 
				-		return; /* no flipping needed */
			
 
				-
			
 
				-	/*
			
 
				-	 * We do not swap the magic number here as it is needed to
			
 
				-	 * determine whether the body needs to be swapped.
			
 
				-	 */
			
 
				-	/* __swab32s(&msg->msg_magic); */
			
 
				-	__swab32s(&msg->msg_type);
			
 
				-	__swab32s(&msg->msg_version);
			
 
				-	__swab32s(&msg->msg_ses_feats);
			
 
				-	__swab32s(&msg->msg_reserved0);
			
 
				-	__swab32s(&msg->msg_reserved1);
			
 
				-}
			
 
				-
			
 
				-#endif /* __SELFTEST_RPC_H__ */
			
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -1,622 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/selftest.h
			
 
				- *
			
 
				- * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				- */
			
 
				-#ifndef __SELFTEST_SELFTEST_H__
			
 
				-#define __SELFTEST_SELFTEST_H__
			
 
				-
			
 
				-#define LNET_ONLY
			
 
				-
			
 
				-#include <linux/lnet/lib-lnet.h>
			
 
				-#include <linux/lnet/lib-types.h>
			
 
				-#include <uapi/linux/lnet/lnetst.h>
			
 
				-
			
 
				-#include "rpc.h"
			
 
				-#include "timer.h"
			
 
				-
			
 
				-#ifndef MADE_WITHOUT_COMPROMISE
			
 
				-#define MADE_WITHOUT_COMPROMISE
			
 
				-#endif
			
 
				-
			
 
				-#define SWI_STATE_NEWBORN		0
			
 
				-#define SWI_STATE_REPLY_SUBMITTED	1
			
 
				-#define SWI_STATE_REPLY_SENT		2
			
 
				-#define SWI_STATE_REQUEST_SUBMITTED	3
			
 
				-#define SWI_STATE_REQUEST_SENT		4
			
 
				-#define SWI_STATE_REPLY_RECEIVED	5
			
 
				-#define SWI_STATE_BULK_STARTED		6
			
 
				-#define SWI_STATE_DONE			10
			
 
				-
			
 
				-/* forward refs */
			
 
				-struct srpc_service;
			
 
				-struct srpc_service_cd;
			
 
				-struct sfw_test_unit;
			
 
				-struct sfw_test_instance;
			
 
				-
			
 
				-/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
			
 
				- * services, e.g. create/modify session.
			
 
				- */
			
 
				-#define SRPC_SERVICE_DEBUG		0
			
 
				-#define SRPC_SERVICE_MAKE_SESSION	1
			
 
				-#define SRPC_SERVICE_REMOVE_SESSION	2
			
 
				-#define SRPC_SERVICE_BATCH		3
			
 
				-#define SRPC_SERVICE_TEST		4
			
 
				-#define SRPC_SERVICE_QUERY_STAT		5
			
 
				-#define SRPC_SERVICE_JOIN		6
			
 
				-#define SRPC_FRAMEWORK_SERVICE_MAX_ID	10
			
 
				-/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
			
 
				-#define SRPC_SERVICE_BRW		11
			
 
				-#define SRPC_SERVICE_PING		12
			
 
				-#define SRPC_SERVICE_MAX_ID		12
			
 
				-
			
 
				-#define SRPC_REQUEST_PORTAL		50
			
 
				-/* a lazy portal for framework RPC requests */
			
 
				-#define SRPC_FRAMEWORK_REQUEST_PORTAL	51
			
 
				-/* all reply/bulk RDMAs go to this portal */
			
 
				-#define SRPC_RDMA_PORTAL		52
			
 
				-
			
 
				-static inline enum srpc_msg_type
			
 
				-srpc_service2request(int service)
			
 
				-{
			
 
				-	switch (service) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	case SRPC_SERVICE_DEBUG:
			
 
				-		return SRPC_MSG_DEBUG_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_MAKE_SESSION:
			
 
				-		return SRPC_MSG_MKSN_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_REMOVE_SESSION:
			
 
				-		return SRPC_MSG_RMSN_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_BATCH:
			
 
				-		return SRPC_MSG_BATCH_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_TEST:
			
 
				-		return SRPC_MSG_TEST_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_QUERY_STAT:
			
 
				-		return SRPC_MSG_STAT_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_BRW:
			
 
				-		return SRPC_MSG_BRW_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_PING:
			
 
				-		return SRPC_MSG_PING_REQST;
			
 
				-
			
 
				-	case SRPC_SERVICE_JOIN:
			
 
				-		return SRPC_MSG_JOIN_REQST;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline enum srpc_msg_type
			
 
				-srpc_service2reply(int service)
			
 
				-{
			
 
				-	return srpc_service2request(service) + 1;
			
 
				-}
			
 
				-
			
 
				-enum srpc_event_type {
			
 
				-	SRPC_BULK_REQ_RCVD   = 1, /* passive bulk request(PUT sink/GET source)
			
 
				-				   * received
			
 
				-				   */
			
 
				-	SRPC_BULK_PUT_SENT   = 2, /* active bulk PUT sent (source) */
			
 
				-	SRPC_BULK_GET_RPLD   = 3, /* active bulk GET replied (sink) */
			
 
				-	SRPC_REPLY_RCVD      = 4, /* incoming reply received */
			
 
				-	SRPC_REPLY_SENT      = 5, /* outgoing reply sent */
			
 
				-	SRPC_REQUEST_RCVD    = 6, /* incoming request received */
			
 
				-	SRPC_REQUEST_SENT    = 7, /* outgoing request sent */
			
 
				-};
			
 
				-
			
 
				-/* RPC event */
			
 
				-struct srpc_event {
			
 
				-	enum srpc_event_type	ev_type;	/* what's up */
			
 
				-	enum lnet_event_kind	ev_lnet;	/* LNet event type */
			
 
				-	int		  ev_fired;  /* LNet event fired? */
			
 
				-	int		  ev_status; /* LNet event status */
			
 
				-	void		  *ev_data;  /* owning server/client RPC */
			
 
				-};
			
 
				-
			
 
				-/* bulk descriptor */
			
 
				-struct srpc_bulk {
			
 
				-	int		 bk_len;     /* len of bulk data */
			
 
				-	struct lnet_handle_md	bk_mdh;
			
 
				-	int		 bk_sink;    /* sink/source */
			
 
				-	int		 bk_niov;    /* # iov in bk_iovs */
			
 
				-	struct bio_vec		bk_iovs[0];
			
 
				-};
			
 
				-
			
 
				-/* message buffer descriptor */
			
 
				-struct srpc_buffer {
			
 
				-	struct list_head  buf_list; /* chain on srpc_service::*_msgq */
			
 
				-	struct srpc_msg	  buf_msg;
			
 
				-	struct lnet_handle_md	buf_mdh;
			
 
				-	lnet_nid_t	  buf_self;
			
 
				-	struct lnet_process_id	buf_peer;
			
 
				-};
			
 
				-
			
 
				-struct swi_workitem;
			
 
				-typedef void (*swi_action_t) (struct swi_workitem *);
			
 
				-
			
 
				-struct swi_workitem {
			
 
				-	struct workqueue_struct *swi_wq;
			
 
				-	struct work_struct  swi_work;
			
 
				-	swi_action_t	    swi_action;
			
 
				-	int		    swi_state;
			
 
				-};
			
 
				-
			
 
				-/* server-side state of a RPC */
			
 
				-struct srpc_server_rpc {
			
 
				-	/* chain on srpc_service::*_rpcq */
			
 
				-	struct list_head       srpc_list;
			
 
				-	struct srpc_service_cd *srpc_scd;
			
 
				-	struct swi_workitem	srpc_wi;
			
 
				-	struct srpc_event	srpc_ev;	/* bulk/reply event */
			
 
				-	lnet_nid_t	       srpc_self;
			
 
				-	struct lnet_process_id	srpc_peer;
			
 
				-	struct srpc_msg		srpc_replymsg;
			
 
				-	struct lnet_handle_md	srpc_replymdh;
			
 
				-	struct srpc_buffer	*srpc_reqstbuf;
			
 
				-	struct srpc_bulk	*srpc_bulk;
			
 
				-
			
 
				-	unsigned int	       srpc_aborted; /* being given up */
			
 
				-	int		       srpc_status;
			
 
				-	void		       (*srpc_done)(struct srpc_server_rpc *);
			
 
				-};
			
 
				-
			
 
				-/* client-side state of a RPC */
			
 
				-struct srpc_client_rpc {
			
 
				-	struct list_head  crpc_list;	  /* chain on user's lists */
			
 
				-	spinlock_t	  crpc_lock;	  /* serialize */
			
 
				-	int		  crpc_service;
			
 
				-	atomic_t	  crpc_refcount;
			
 
				-	int		  crpc_timeout;   /* # seconds to wait for reply */
			
 
				-	struct stt_timer       crpc_timer;
			
 
				-	struct swi_workitem	crpc_wi;
			
 
				-	struct lnet_process_id	crpc_dest;
			
 
				-
			
 
				-	void		  (*crpc_done)(struct srpc_client_rpc *);
			
 
				-	void		  (*crpc_fini)(struct srpc_client_rpc *);
			
 
				-	int		  crpc_status;	  /* completion status */
			
 
				-	void		  *crpc_priv;	  /* caller data */
			
 
				-
			
 
				-	/* state flags */
			
 
				-	unsigned int	  crpc_aborted:1; /* being given up */
			
 
				-	unsigned int	  crpc_closed:1;  /* completed */
			
 
				-
			
 
				-	/* RPC events */
			
 
				-	struct srpc_event	crpc_bulkev;	/* bulk event */
			
 
				-	struct srpc_event	crpc_reqstev;	/* request event */
			
 
				-	struct srpc_event	crpc_replyev;	/* reply event */
			
 
				-
			
 
				-	/* bulk, request(reqst), and reply exchanged on wire */
			
 
				-	struct srpc_msg		crpc_reqstmsg;
			
 
				-	struct srpc_msg		crpc_replymsg;
			
 
				-	struct lnet_handle_md	crpc_reqstmdh;
			
 
				-	struct lnet_handle_md	crpc_replymdh;
			
 
				-	struct srpc_bulk	crpc_bulk;
			
 
				-};
			
 
				-
			
 
				-#define srpc_client_rpc_size(rpc)					\
			
 
				-offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
			
 
				-
			
 
				-#define srpc_client_rpc_addref(rpc)					\
			
 
				-do {									\
			
 
				-	CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n",				\
			
 
				-	       (rpc), libcfs_id2str((rpc)->crpc_dest),			\
			
 
				-	       atomic_read(&(rpc)->crpc_refcount));			\
			
 
				-	LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0);		\
			
 
				-	atomic_inc(&(rpc)->crpc_refcount);				\
			
 
				-} while (0)
			
 
				-
			
 
				-#define srpc_client_rpc_decref(rpc)					\
			
 
				-do {									\
			
 
				-	CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n",				\
			
 
				-	       (rpc), libcfs_id2str((rpc)->crpc_dest),			\
			
 
				-	       atomic_read(&(rpc)->crpc_refcount));			\
			
 
				-	LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0);		\
			
 
				-	if (atomic_dec_and_test(&(rpc)->crpc_refcount))			\
			
 
				-		srpc_destroy_client_rpc(rpc);				\
			
 
				-} while (0)
			
 
				-
			
 
				-#define srpc_event_pending(rpc)   (!(rpc)->crpc_bulkev.ev_fired ||	\
			
 
				-				   !(rpc)->crpc_reqstev.ev_fired ||	\
			
 
				-				   !(rpc)->crpc_replyev.ev_fired)
			
 
				-
			
 
				-/* CPU partition data of srpc service */
			
 
				-struct srpc_service_cd {
			
 
				-	/** serialize */
			
 
				-	spinlock_t		scd_lock;
			
 
				-	/** backref to service */
			
 
				-	struct srpc_service	*scd_svc;
			
 
				-	/** event buffer */
			
 
				-	struct srpc_event	scd_ev;
			
 
				-	/** free RPC descriptors */
			
 
				-	struct list_head	scd_rpc_free;
			
 
				-	/** in-flight RPCs */
			
 
				-	struct list_head	scd_rpc_active;
			
 
				-	/** workitem for posting buffer */
			
 
				-	struct swi_workitem	scd_buf_wi;
			
 
				-	/** CPT id */
			
 
				-	int			scd_cpt;
			
 
				-	/** error code for scd_buf_wi */
			
 
				-	int			scd_buf_err;
			
 
				-	/** timestamp for scd_buf_err */
			
 
				-	time64_t		scd_buf_err_stamp;
			
 
				-	/** total # request buffers */
			
 
				-	int			scd_buf_total;
			
 
				-	/** # posted request buffers */
			
 
				-	int			scd_buf_nposted;
			
 
				-	/** in progress of buffer posting */
			
 
				-	int			scd_buf_posting;
			
 
				-	/** allocate more buffers if scd_buf_nposted < scd_buf_low */
			
 
				-	int			scd_buf_low;
			
 
				-	/** increase/decrease some buffers */
			
 
				-	int			scd_buf_adjust;
			
 
				-	/** posted message buffers */
			
 
				-	struct list_head	scd_buf_posted;
			
 
				-	/** blocked for RPC descriptor */
			
 
				-	struct list_head	scd_buf_blocked;
			
 
				-};
			
 
				-
			
 
				-/* number of server workitems (mini-thread) for testing service */
			
 
				-#define SFW_TEST_WI_MIN		256
			
 
				-#define SFW_TEST_WI_MAX		2048
			
 
				-/* extra buffers for tolerating buggy peers, or unbalanced number
			
 
				- * of peers between partitions
			
 
				- */
			
 
				-#define SFW_TEST_WI_EXTRA	64
			
 
				-
			
 
				-/* number of server workitems (mini-thread) for framework service */
			
 
				-#define SFW_FRWK_WI_MIN		16
			
 
				-#define SFW_FRWK_WI_MAX		256
			
 
				-
			
 
				-struct srpc_service {
			
 
				-	int			sv_id;		/* service id */
			
 
				-	const char		*sv_name;	/* human readable name */
			
 
				-	int			sv_wi_total;	/* total server workitems */
			
 
				-	int			sv_shuttingdown;
			
 
				-	int			sv_ncpts;
			
 
				-	/* percpt data for srpc_service */
			
 
				-	struct srpc_service_cd	**sv_cpt_data;
			
 
				-	/* Service callbacks:
			
 
				-	 * - sv_handler: process incoming RPC request
			
 
				-	 * - sv_bulk_ready: notify bulk data
			
 
				-	 */
			
 
				-	int (*sv_handler)(struct srpc_server_rpc *);
			
 
				-	int (*sv_bulk_ready)(struct srpc_server_rpc *, int);
			
 
				-};
			
 
				-
			
 
				-struct sfw_session {
			
 
				-	struct list_head sn_list;    /* chain on fw_zombie_sessions */
			
 
				-	struct lst_sid	 sn_id;      /* unique identifier */
			
 
				-	unsigned int	 sn_timeout; /* # seconds' inactivity to expire */
			
 
				-	int		 sn_timer_active;
			
 
				-	unsigned int	 sn_features;
			
 
				-	struct stt_timer      sn_timer;
			
 
				-	struct list_head sn_batches; /* list of batches */
			
 
				-	char		 sn_name[LST_NAME_SIZE];
			
 
				-	atomic_t	 sn_refcount;
			
 
				-	atomic_t	 sn_brw_errors;
			
 
				-	atomic_t	 sn_ping_errors;
			
 
				-	unsigned long	 sn_started;
			
 
				-};
			
 
				-
			
 
				-#define sfw_sid_equal(sid0, sid1)     ((sid0).ses_nid == (sid1).ses_nid && \
			
 
				-				       (sid0).ses_stamp == (sid1).ses_stamp)
			
 
				-
			
 
				-struct sfw_batch {
			
 
				-	struct list_head bat_list;	/* chain on sn_batches */
			
 
				-	struct lst_bid	 bat_id;	/* batch id */
			
 
				-	int		 bat_error;	/* error code of batch */
			
 
				-	struct sfw_session	*bat_session;	/* batch's session */
			
 
				-	atomic_t	 bat_nactive;	/* # of active tests */
			
 
				-	struct list_head bat_tests;	/* test instances */
			
 
				-};
			
 
				-
			
 
				-struct sfw_test_client_ops {
			
 
				-	int  (*tso_init)(struct sfw_test_instance *tsi); /* initialize test
			
 
				-							  * client
			
 
				-							  */
			
 
				-	void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test
			
 
				-							  * client
			
 
				-							  */
			
 
				-	int  (*tso_prep_rpc)(struct sfw_test_unit *tsu,
			
 
				-			     struct lnet_process_id dest,
			
 
				-			     struct srpc_client_rpc **rpc);	/* prep a tests rpc */
			
 
				-	void (*tso_done_rpc)(struct sfw_test_unit *tsu,
			
 
				-			     struct srpc_client_rpc *rpc);	/* done a test rpc */
			
 
				-};
			
 
				-
			
 
				-struct sfw_test_instance {
			
 
				-	struct list_head	   tsi_list;		/* chain on batch */
			
 
				-	int			   tsi_service;		/* test type */
			
 
				-	struct sfw_batch		*tsi_batch;	/* batch */
			
 
				-	struct sfw_test_client_ops	*tsi_ops;	/* test client operation
			
 
				-							 */
			
 
				-
			
 
				-	/* public parameter for all test units */
			
 
				-	unsigned int		   tsi_is_client:1;	/* is test client */
			
 
				-	unsigned int		   tsi_stoptsu_onerr:1; /* stop tsu on error */
			
 
				-	int			   tsi_concur;		/* concurrency */
			
 
				-	int			   tsi_loop;		/* loop count */
			
 
				-
			
 
				-	/* status of test instance */
			
 
				-	spinlock_t		   tsi_lock;		/* serialize */
			
 
				-	unsigned int		   tsi_stopping:1;	/* test is stopping */
			
 
				-	atomic_t		   tsi_nactive;		/* # of active test
			
 
				-							 * unit
			
 
				-							 */
			
 
				-	struct list_head	   tsi_units;		/* test units */
			
 
				-	struct list_head	   tsi_free_rpcs;	/* free rpcs */
			
 
				-	struct list_head	   tsi_active_rpcs;	/* active rpcs */
			
 
				-
			
 
				-	union {
			
 
				-		struct test_ping_req	ping;		/* ping parameter */
			
 
				-		struct test_bulk_req	bulk_v0;	/* bulk parameter */
			
 
				-		struct test_bulk_req_v1	bulk_v1;	/* bulk v1 parameter */
			
 
				-	} tsi_u;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * XXX: trailing (PAGE_SIZE % sizeof(struct lnet_process_id)) bytes at the end
			
 
				- * of pages are not used
			
 
				- */
			
 
				-#define SFW_MAX_CONCUR	   LST_MAX_CONCUR
			
 
				-#define SFW_ID_PER_PAGE    (PAGE_SIZE / sizeof(struct lnet_process_id_packed))
			
 
				-#define SFW_MAX_NDESTS	   (LNET_MAX_IOV * SFW_ID_PER_PAGE)
			
 
				-#define sfw_id_pages(n)    (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
			
 
				-
			
 
				-struct sfw_test_unit {
			
 
				-	struct list_head    tsu_list;	   /* chain on lst_test_instance */
			
 
				-	struct lnet_process_id		tsu_dest;	/* id of dest node */
			
 
				-	int		    tsu_loop;	   /* loop count of the test */
			
 
				-	struct sfw_test_instance	*tsu_instance; /* pointer to test instance */
			
 
				-	void		    *tsu_private;  /* private data */
			
 
				-	struct swi_workitem	tsu_worker;	/* workitem of the test unit */
			
 
				-};
			
 
				-
			
 
				-struct sfw_test_case {
			
 
				-	struct list_head      tsc_list;		/* chain on fw_tests */
			
 
				-	struct srpc_service		*tsc_srv_service;	/* test service */
			
 
				-	struct sfw_test_client_ops	*tsc_cli_ops;	/* ops of test client */
			
 
				-};
			
 
				-
			
 
				-struct srpc_client_rpc *
			
 
				-sfw_create_rpc(struct lnet_process_id peer, int service,
			
 
				-	       unsigned int features, int nbulkiov, int bulklen,
			
 
				-	       void (*done)(struct srpc_client_rpc *), void *priv);
			
 
				-int sfw_create_test_rpc(struct sfw_test_unit *tsu,
			
 
				-			struct lnet_process_id peer, unsigned int features,
			
 
				-			int nblk, int blklen, struct srpc_client_rpc **rpc);
			
 
				-void sfw_abort_rpc(struct srpc_client_rpc *rpc);
			
 
				-void sfw_post_rpc(struct srpc_client_rpc *rpc);
			
 
				-void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
			
 
				-void sfw_unpack_message(struct srpc_msg *msg);
			
 
				-void sfw_free_pages(struct srpc_server_rpc *rpc);
			
 
				-void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
			
 
				-int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
			
 
				-		    int sink);
			
 
				-int sfw_make_session(struct srpc_mksn_reqst *request,
			
 
				-		     struct srpc_mksn_reply *reply);
			
 
				-
			
 
				-struct srpc_client_rpc *
			
 
				-srpc_create_client_rpc(struct lnet_process_id peer, int service,
			
 
				-		       int nbulkiov, int bulklen,
			
 
				-		       void (*rpc_done)(struct srpc_client_rpc *),
			
 
				-		       void (*rpc_fini)(struct srpc_client_rpc *), void *priv);
			
 
				-void srpc_post_rpc(struct srpc_client_rpc *rpc);
			
 
				-void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
			
 
				-void srpc_free_bulk(struct srpc_bulk *bk);
			
 
				-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
			
 
				-				  unsigned int bulk_npg, unsigned int bulk_len,
			
 
				-				  int sink);
			
 
				-void srpc_send_rpc(struct swi_workitem *wi);
			
 
				-int srpc_send_reply(struct srpc_server_rpc *rpc);
			
 
				-int srpc_add_service(struct srpc_service *sv);
			
 
				-int srpc_remove_service(struct srpc_service *sv);
			
 
				-void srpc_shutdown_service(struct srpc_service *sv);
			
 
				-void srpc_abort_service(struct srpc_service *sv);
			
 
				-int srpc_finish_service(struct srpc_service *sv);
			
 
				-int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer);
			
 
				-void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer);
			
 
				-void srpc_get_counters(struct srpc_counters *cnt);
			
 
				-void srpc_set_counters(const struct srpc_counters *cnt);
			
 
				-
			
 
				-extern struct workqueue_struct *lst_serial_wq;
			
 
				-extern struct workqueue_struct **lst_test_wq;
			
 
				-
			
 
				-static inline int
			
 
				-srpc_serv_is_framework(struct srpc_service *svc)
			
 
				-{
			
 
				-	return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-swi_wi_action(struct work_struct *wi)
			
 
				-{
			
 
				-	struct swi_workitem *swi;
			
 
				-
			
 
				-	swi = container_of(wi, struct swi_workitem, swi_work);
			
 
				-
			
 
				-	swi->swi_action(swi);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-swi_init_workitem(struct swi_workitem *swi,
			
 
				-		  swi_action_t action, struct workqueue_struct *wq)
			
 
				-{
			
 
				-	swi->swi_wq = wq;
			
 
				-	swi->swi_action = action;
			
 
				-	swi->swi_state = SWI_STATE_NEWBORN;
			
 
				-	INIT_WORK(&swi->swi_work, swi_wi_action);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-swi_schedule_workitem(struct swi_workitem *wi)
			
 
				-{
			
 
				-	queue_work(wi->swi_wq, &wi->swi_work);
			
 
				-}
			
 
				-
			
 
				-static inline int
			
 
				-swi_cancel_workitem(struct swi_workitem *swi)
			
 
				-{
			
 
				-	return cancel_work_sync(&swi->swi_work);
			
 
				-}
			
 
				-
			
 
				-int sfw_startup(void);
			
 
				-int srpc_startup(void);
			
 
				-void sfw_shutdown(void);
			
 
				-void srpc_shutdown(void);
			
 
				-
			
 
				-static inline void
			
 
				-srpc_destroy_client_rpc(struct srpc_client_rpc *rpc)
			
 
				-{
			
 
				-	LASSERT(rpc);
			
 
				-	LASSERT(!srpc_event_pending(rpc));
			
 
				-	LASSERT(!atomic_read(&rpc->crpc_refcount));
			
 
				-
			
 
				-	if (!rpc->crpc_fini)
			
 
				-		kfree(rpc);
			
 
				-	else
			
 
				-		(*rpc->crpc_fini)(rpc);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-srpc_init_client_rpc(struct srpc_client_rpc *rpc, struct lnet_process_id peer,
			
 
				-		     int service, int nbulkiov, int bulklen,
			
 
				-		     void (*rpc_done)(struct srpc_client_rpc *),
			
 
				-		     void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
			
 
				-{
			
 
				-	LASSERT(nbulkiov <= LNET_MAX_IOV);
			
 
				-
			
 
				-	memset(rpc, 0, offsetof(struct srpc_client_rpc,
			
 
				-				crpc_bulk.bk_iovs[nbulkiov]));
			
 
				-
			
 
				-	INIT_LIST_HEAD(&rpc->crpc_list);
			
 
				-	swi_init_workitem(&rpc->crpc_wi, srpc_send_rpc,
			
 
				-			  lst_test_wq[lnet_cpt_of_nid(peer.nid)]);
			
 
				-	spin_lock_init(&rpc->crpc_lock);
			
 
				-	atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
			
 
				-
			
 
				-	rpc->crpc_dest = peer;
			
 
				-	rpc->crpc_priv = priv;
			
 
				-	rpc->crpc_service = service;
			
 
				-	rpc->crpc_bulk.bk_len = bulklen;
			
 
				-	rpc->crpc_bulk.bk_niov = nbulkiov;
			
 
				-	rpc->crpc_done = rpc_done;
			
 
				-	rpc->crpc_fini = rpc_fini;
			
 
				-	LNetInvalidateMDHandle(&rpc->crpc_reqstmdh);
			
 
				-	LNetInvalidateMDHandle(&rpc->crpc_replymdh);
			
 
				-	LNetInvalidateMDHandle(&rpc->crpc_bulk.bk_mdh);
			
 
				-
			
 
				-	/* no event is expected at this point */
			
 
				-	rpc->crpc_bulkev.ev_fired = 1;
			
 
				-	rpc->crpc_reqstev.ev_fired = 1;
			
 
				-	rpc->crpc_replyev.ev_fired = 1;
			
 
				-
			
 
				-	rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
			
 
				-	rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
			
 
				-	rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
			
 
				-}
			
 
				-
			
 
				-static inline const char *
			
 
				-swi_state2str(int state)
			
 
				-{
			
 
				-#define STATE2STR(x) case x: return #x
			
 
				-	switch (state) {
			
 
				-	default:
			
 
				-		LBUG();
			
 
				-	STATE2STR(SWI_STATE_NEWBORN);
			
 
				-	STATE2STR(SWI_STATE_REPLY_SUBMITTED);
			
 
				-	STATE2STR(SWI_STATE_REPLY_SENT);
			
 
				-	STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
			
 
				-	STATE2STR(SWI_STATE_REQUEST_SENT);
			
 
				-	STATE2STR(SWI_STATE_REPLY_RECEIVED);
			
 
				-	STATE2STR(SWI_STATE_BULK_STARTED);
			
 
				-	STATE2STR(SWI_STATE_DONE);
			
 
				-	}
			
 
				-#undef STATE2STR
			
 
				-}
			
 
				-
			
 
				-#define selftest_wait_events()					\
			
 
				-	do {							\
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);	\
			
 
				-		schedule_timeout(HZ / 10);	\
			
 
				-	} while (0)
			
 
				-
			
 
				-#define lst_wait_until(cond, lock, fmt, ...)				\
			
 
				-do {									\
			
 
				-	int __I = 2;							\
			
 
				-	while (!(cond)) {						\
			
 
				-		CDEBUG(is_power_of_2(++__I) ? D_WARNING : D_NET,	\
			
 
				-		       fmt, ## __VA_ARGS__);				\
			
 
				-		spin_unlock(&(lock));					\
			
 
				-									\
			
 
				-		selftest_wait_events();					\
			
 
				-									\
			
 
				-		spin_lock(&(lock));					\
			
 
				-	}								\
			
 
				-} while (0)
			
 
				-
			
 
				-static inline void
			
 
				-srpc_wait_service_shutdown(struct srpc_service *sv)
			
 
				-{
			
 
				-	int i = 2;
			
 
				-
			
 
				-	LASSERT(sv->sv_shuttingdown);
			
 
				-
			
 
				-	while (!srpc_finish_service(sv)) {
			
 
				-		i++;
			
 
				-		CDEBUG(((i & -i) == i) ? D_WARNING : D_NET,
			
 
				-		       "Waiting for %s service to shutdown...\n",
			
 
				-		       sv->sv_name);
			
 
				-		selftest_wait_events();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-extern struct sfw_test_client_ops brw_test_client;
			
 
				-void brw_init_test_client(void);
			
 
				-
			
 
				-extern struct srpc_service brw_test_service;
			
 
				-void brw_init_test_service(void);
			
 
				-
			
 
				-extern struct sfw_test_client_ops ping_test_client;
			
 
				-void ping_init_test_client(void);
			
 
				-
			
 
				-extern struct srpc_service ping_test_service;
			
 
				-void ping_init_test_service(void);
			
 
				-
			
 
				-#endif /* __SELFTEST_SELFTEST_H__ */
			
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ b/drivers/staging/lustre/lnet/selftest/timer.c
@@ -1,244 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see
			
 
				- * http://www.gnu.org/licenses/gpl-2.0.html
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-/*
			
 
				- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				- * Use is subject to license terms.
			
 
				- *
			
 
				- * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				- */
			
 
				-/*
			
 
				- * This file is part of Lustre, http://www.lustre.org/
			
 
				- * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				- *
			
 
				- * lnet/selftest/timer.c
			
 
				- *
			
 
				- * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#define DEBUG_SUBSYSTEM S_LNET
			
 
				-
			
 
				-#include "selftest.h"
			
 
				-
			
 
				-/*
			
 
				- * Timers are implemented as a sorted queue of expiry times. The queue
			
 
				- * is slotted, with each slot holding timers which expire in a
			
 
				- * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
			
 
				- * sorted by increasing expiry time. The number of slots is 2**7 (128),
			
 
				- * to cover a time period of 1024 seconds into the future before wrapping.
			
 
				- */
			
 
				-#define STTIMER_MINPOLL        3	/* log2 min poll interval (8 s) */
			
 
				-#define STTIMER_SLOTTIME	BIT(STTIMER_MINPOLL)
			
 
				-#define STTIMER_SLOTTIMEMASK   (~(STTIMER_SLOTTIME - 1))
			
 
				-#define STTIMER_NSLOTS		BIT(7)
			
 
				-#define STTIMER_SLOT(t)	       (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
			
 
				-						    (STTIMER_NSLOTS - 1))])
			
 
				-
			
 
				-static struct st_timer_data {
			
 
				-	spinlock_t	  stt_lock;
			
 
				-	unsigned long	  stt_prev_slot; /* start time of the slot processed
			
 
				-					  * previously
			
 
				-					  */
			
 
				-	struct list_head  stt_hash[STTIMER_NSLOTS];
			
 
				-	int		  stt_shuttingdown;
			
 
				-	wait_queue_head_t stt_waitq;
			
 
				-	int		  stt_nthreads;
			
 
				-} stt_data;
			
 
				-
			
 
				-void
			
 
				-stt_add_timer(struct stt_timer *timer)
			
 
				-{
			
 
				-	struct list_head *pos;
			
 
				-
			
 
				-	spin_lock(&stt_data.stt_lock);
			
 
				-
			
 
				-	LASSERT(stt_data.stt_nthreads > 0);
			
 
				-	LASSERT(!stt_data.stt_shuttingdown);
			
 
				-	LASSERT(timer->stt_func);
			
 
				-	LASSERT(list_empty(&timer->stt_list));
			
 
				-	LASSERT(timer->stt_expires > ktime_get_real_seconds());
			
 
				-
			
 
				-	/* a simple insertion sort */
			
 
				-	list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) {
			
 
				-		struct stt_timer *old = list_entry(pos, struct stt_timer,
			
 
				-						   stt_list);
			
 
				-
			
 
				-		if (timer->stt_expires >= old->stt_expires)
			
 
				-			break;
			
 
				-	}
			
 
				-	list_add(&timer->stt_list, pos);
			
 
				-
			
 
				-	spin_unlock(&stt_data.stt_lock);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * The function returns whether it has deactivated a pending timer or not.
			
 
				- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
			
 
				- * active timer returns 1.)
			
 
				- *
			
 
				- * CAVEAT EMPTOR:
			
 
				- * When 0 is returned, it is possible that timer->stt_func _is_ running on
			
 
				- * another CPU.
			
 
				- */
			
 
				-int
			
 
				-stt_del_timer(struct stt_timer *timer)
			
 
				-{
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	spin_lock(&stt_data.stt_lock);
			
 
				-
			
 
				-	LASSERT(stt_data.stt_nthreads > 0);
			
 
				-	LASSERT(!stt_data.stt_shuttingdown);
			
 
				-
			
 
				-	if (!list_empty(&timer->stt_list)) {
			
 
				-		ret = 1;
			
 
				-		list_del_init(&timer->stt_list);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&stt_data.stt_lock);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-/* called with stt_data.stt_lock held */
			
 
				-static int
			
 
				-stt_expire_list(struct list_head *slot, time64_t now)
			
 
				-{
			
 
				-	int expired = 0;
			
 
				-	struct stt_timer *timer;
			
 
				-
			
 
				-	while (!list_empty(slot)) {
			
 
				-		timer = list_entry(slot->next, struct stt_timer, stt_list);
			
 
				-
			
 
				-		if (timer->stt_expires > now)
			
 
				-			break;
			
 
				-
			
 
				-		list_del_init(&timer->stt_list);
			
 
				-		spin_unlock(&stt_data.stt_lock);
			
 
				-
			
 
				-		expired++;
			
 
				-		(*timer->stt_func) (timer->stt_data);
			
 
				-
			
 
				-		spin_lock(&stt_data.stt_lock);
			
 
				-	}
			
 
				-
			
 
				-	return expired;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-stt_check_timers(unsigned long *last)
			
 
				-{
			
 
				-	int expired = 0;
			
 
				-	time64_t now;
			
 
				-	unsigned long this_slot;
			
 
				-
			
 
				-	now = ktime_get_real_seconds();
			
 
				-	this_slot = now & STTIMER_SLOTTIMEMASK;
			
 
				-
			
 
				-	spin_lock(&stt_data.stt_lock);
			
 
				-
			
 
				-	while (time_after_eq(this_slot, *last)) {
			
 
				-		expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
			
 
				-		this_slot = this_slot - STTIMER_SLOTTIME;
			
 
				-	}
			
 
				-
			
 
				-	*last = now & STTIMER_SLOTTIMEMASK;
			
 
				-	spin_unlock(&stt_data.stt_lock);
			
 
				-	return expired;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-stt_timer_main(void *arg)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	while (!stt_data.stt_shuttingdown) {
			
 
				-		stt_check_timers(&stt_data.stt_prev_slot);
			
 
				-
			
 
				-		rc = wait_event_timeout(stt_data.stt_waitq,
			
 
				-					stt_data.stt_shuttingdown,
			
 
				-					STTIMER_SLOTTIME * HZ);
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&stt_data.stt_lock);
			
 
				-	stt_data.stt_nthreads--;
			
 
				-	spin_unlock(&stt_data.stt_lock);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-stt_start_timer_thread(void)
			
 
				-{
			
 
				-	struct task_struct *task;
			
 
				-
			
 
				-	LASSERT(!stt_data.stt_shuttingdown);
			
 
				-
			
 
				-	task = kthread_run(stt_timer_main, NULL, "st_timer");
			
 
				-	if (IS_ERR(task))
			
 
				-		return PTR_ERR(task);
			
 
				-
			
 
				-	spin_lock(&stt_data.stt_lock);
			
 
				-	stt_data.stt_nthreads++;
			
 
				-	spin_unlock(&stt_data.stt_lock);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-stt_startup(void)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	stt_data.stt_shuttingdown = 0;
			
 
				-	stt_data.stt_prev_slot = ktime_get_real_seconds() & STTIMER_SLOTTIMEMASK;
			
 
				-
			
 
				-	spin_lock_init(&stt_data.stt_lock);
			
 
				-	for (i = 0; i < STTIMER_NSLOTS; i++)
			
 
				-		INIT_LIST_HEAD(&stt_data.stt_hash[i]);
			
 
				-
			
 
				-	stt_data.stt_nthreads = 0;
			
 
				-	init_waitqueue_head(&stt_data.stt_waitq);
			
 
				-	rc = stt_start_timer_thread();
			
 
				-	if (rc)
			
 
				-		CERROR("Can't spawn timer thread: %d\n", rc);
			
 
				-
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-stt_shutdown(void)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	spin_lock(&stt_data.stt_lock);
			
 
				-
			
 
				-	for (i = 0; i < STTIMER_NSLOTS; i++)
			
 
				-		LASSERT(list_empty(&stt_data.stt_hash[i]));
			
 
				-
			
 
				-	stt_data.stt_shuttingdown = 1;
			
 
				-
			
 
				-	wake_up(&stt_data.stt_waitq);
			
 
				-	lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock,
			
 
				-		       "waiting for %d threads to terminate\n",
			
 
				-		       stt_data.stt_nthreads);
			
 
				-
			
 
				-	spin_unlock(&stt_data.stt_lock);
			
 
				-}
		`@@ -1 +0,0 @@`
		`-obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/`