Эх сурвалжийг харах

Merge branch 'work.aio-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull aio updates from Al Viro:
 "Majority of AIO stuff this cycle. aio-fsync and aio-poll, mostly.

  The only thing I'm holding back for a day or so is Adam's aio ioprio -
  his last-minute fixup is trivial (missing stub in !CONFIG_BLOCK case),
  but let it sit in -next for decency sake..."

* 'work.aio-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (46 commits)
  aio: sanitize the limit checking in io_submit(2)
  aio: fold do_io_submit() into callers
  aio: shift copyin of iocb into io_submit_one()
  aio_read_events_ring(): make a bit more readable
  aio: all callers of aio_{read,write,fsync,poll} treat 0 and -EIOCBQUEUED the same way
  aio: take list removal to (some) callers of aio_complete()
  aio: add missing break for the IOCB_CMD_FDSYNC case
  random: convert to ->poll_mask
  timerfd: convert to ->poll_mask
  eventfd: switch to ->poll_mask
  pipe: convert to ->poll_mask
  crypto: af_alg: convert to ->poll_mask
  net/rxrpc: convert to ->poll_mask
  net/iucv: convert to ->poll_mask
  net/phonet: convert to ->poll_mask
  net/nfc: convert to ->poll_mask
  net/caif: convert to ->poll_mask
  net/bluetooth: convert to ->poll_mask
  net/sctp: convert to ->poll_mask
  net/tipc: convert to ->poll_mask
  ...
Linus Torvalds 7 жил өмнө
parent
commit
408afb8d78
99 өөрчлөгдсөн 849 нэмэгдсэн , 595 устгасан
  1. 7 2
      Documentation/filesystems/Locking
  2. 14 1
      Documentation/filesystems/vfs.txt
  3. 1 0
      arch/x86/entry/syscalls/syscall_32.tbl
  4. 1 0
      arch/x86/entry/syscalls/syscall_64.tbl
  5. 3 11
      crypto/af_alg.c
  6. 2 2
      crypto/algif_aead.c
  7. 0 2
      crypto/algif_hash.c
  8. 0 1
      crypto/algif_rng.c
  9. 2 2
      crypto/algif_skcipher.c
  10. 16 13
      drivers/char/random.c
  11. 1 2
      drivers/isdn/mISDN/socket.c
  12. 1 1
      drivers/net/ppp/pppoe.c
  13. 0 1
      drivers/net/ppp/pptp.c
  14. 2 2
      drivers/staging/comedi/drivers/serial2002.c
  15. 1 1
      drivers/staging/ipx/af_ipx.c
  16. 1 1
      drivers/vfio/virqfd.c
  17. 1 1
      drivers/vhost/vhost.c
  18. 492 239
      fs/aio.c
  19. 11 4
      fs/eventfd.c
  20. 2 3
      fs/eventpoll.c
  21. 13 9
      fs/pipe.c
  22. 49 36
      fs/select.c
  23. 11 11
      fs/timerfd.c
  24. 1 2
      include/crypto/if_alg.h
  25. 0 2
      include/linux/aio.h
  26. 7 0
      include/linux/compat.h
  27. 2 0
      include/linux/fs.h
  28. 1 0
      include/linux/net.h
  29. 12 2
      include/linux/poll.h
  30. 1 2
      include/linux/skbuff.h
  31. 6 0
      include/linux/syscalls.h
  32. 1 1
      include/net/bluetooth/bluetooth.h
  33. 15 0
      include/net/busy_poll.h
  34. 0 2
      include/net/iucv/af_iucv.h
  35. 1 2
      include/net/sctp/sctp.h
  36. 0 2
      include/net/sock.h
  37. 1 2
      include/net/tcp.h
  38. 1 1
      include/net/udp.h
  39. 3 1
      include/uapi/asm-generic/unistd.h
  40. 8 4
      include/uapi/linux/aio_abi.h
  41. 0 4
      include/uapi/linux/types.h
  42. 2 0
      kernel/sys_ni.c
  43. 1 1
      mm/memcontrol.c
  44. 4 14
      net/9p/trans_fd.c
  45. 1 1
      net/appletalk/ddp.c
  46. 3 8
      net/atm/common.c
  47. 1 1
      net/atm/common.h
  48. 1 1
      net/atm/pvc.c
  49. 1 1
      net/atm/svc.c
  50. 1 1
      net/ax25/af_ax25.c
  51. 2 5
      net/bluetooth/af_bluetooth.c
  52. 0 1
      net/bluetooth/bnep/sock.c
  53. 0 1
      net/bluetooth/cmtp/sock.c
  54. 1 1
      net/bluetooth/hci_sock.c
  55. 0 1
      net/bluetooth/hidp/sock.c
  56. 1 1
      net/bluetooth/l2cap_sock.c
  57. 1 1
      net/bluetooth/rfcomm/sock.c
  58. 1 1
      net/bluetooth/sco.c
  59. 4 8
      net/caif/caif_socket.c
  60. 1 1
      net/can/bcm.c
  61. 1 1
      net/can/raw.c
  62. 4 9
      net/core/datagram.c
  63. 0 6
      net/core/sock.c
  64. 1 2
      net/dccp/dccp.h
  65. 1 1
      net/dccp/ipv4.c
  66. 1 1
      net/dccp/ipv6.c
  67. 2 11
      net/dccp/proto.c
  68. 3 3
      net/decnet/af_decnet.c
  69. 2 2
      net/ieee802154/socket.c
  70. 4 4
      net/ipv4/af_inet.c
  71. 6 17
      net/ipv4/tcp.c
  72. 5 5
      net/ipv4/udp.c
  73. 2 2
      net/ipv6/af_inet6.c
  74. 2 2
      net/ipv6/raw.c
  75. 2 5
      net/iucv/af_iucv.c
  76. 5 5
      net/kcm/kcmsock.c
  77. 1 1
      net/key/af_key.c
  78. 1 1
      net/l2tp/l2tp_ip.c
  79. 1 1
      net/l2tp/l2tp_ip6.c
  80. 1 1
      net/l2tp/l2tp_ppp.c
  81. 1 1
      net/llc/af_llc.c
  82. 1 1
      net/netlink/af_netlink.c
  83. 1 1
      net/netrom/af_netrom.c
  84. 3 6
      net/nfc/llcp_sock.c
  85. 2 2
      net/nfc/rawsock.c
  86. 4 5
      net/packet/af_packet.c
  87. 3 6
      net/phonet/socket.c
  88. 1 1
      net/qrtr/qrtr.c
  89. 1 1
      net/rose/af_rose.c
  90. 3 7
      net/rxrpc/af_rxrpc.c
  91. 1 1
      net/sctp/ipv6.c
  92. 1 1
      net/sctp/protocol.c
  93. 1 3
      net/sctp/socket.c
  94. 40 15
      net/socket.c
  95. 5 9
      net/tipc/socket.c
  96. 11 19
      net/unix/af_unix.c
  97. 6 13
      net/vmw_vsock/af_vsock.c
  98. 1 1
      net/x25/af_x25.c
  99. 1 1
      virt/kvm/eventfd.c

+ 7 - 2
Documentation/filesystems/Locking

@@ -440,7 +440,9 @@ prototypes:
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
-	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+	__poll_t (*poll_mask) (struct file *, __poll_t);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*mmap) (struct file *, struct vm_area_struct *);
@@ -471,7 +473,7 @@ prototypes:
 };
 };
 
 
 locking rules:
 locking rules:
-	All may block.
+	All except for ->poll_mask may block.
 
 
 ->llseek() locking has moved from llseek to the individual llseek
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
 implementations.  If your fs is not using generic_file_llseek, you
@@ -503,6 +505,9 @@ in sys_read() and friends.
 the lease within the individual filesystem to record the result of the
 the lease within the individual filesystem to record the result of the
 operation
 operation
 
 
+->poll_mask can be called with or without the waitqueue lock for the waitqueue
+returned from ->get_poll_head.
+
 --------------------------- dquot_operations -------------------------------
 --------------------------- dquot_operations -------------------------------
 prototypes:
 prototypes:
 	int (*write_dquot) (struct dquot *);
 	int (*write_dquot) (struct dquot *);

+ 14 - 1
Documentation/filesystems/vfs.txt

@@ -856,7 +856,9 @@ struct file_operations {
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate) (struct file *, struct dir_context *);
-	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+	__poll_t (*poll_mask) (struct file *, __poll_t);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*mmap) (struct file *, struct vm_area_struct *);
@@ -901,6 +903,17 @@ otherwise noted.
 	activity on this file and (optionally) go to sleep until there
 	activity on this file and (optionally) go to sleep until there
 	is activity. Called by the select(2) and poll(2) system calls
 	is activity. Called by the select(2) and poll(2) system calls
 
 
+  get_poll_head: Returns the struct wait_queue_head that callers can
+  wait on.  Callers need to check the returned events using ->poll_mask
+  once woken.  Can return NULL to indicate polling is not supported,
+  or any error code using the ERR_PTR convention to indicate that a
+  grave error occured and ->poll_mask shall not be called.
+
+  poll_mask: return the mask of EPOLL* values describing the file descriptor
+  state.  Called either before going to sleep on the waitqueue returned by
+  get_poll_head, or after it has been woken.  If ->get_poll_head and
+  ->poll_mask are implemented ->poll does not need to be implement.
+
   unlocked_ioctl: called by the ioctl(2) system call.
   unlocked_ioctl: called by the ioctl(2) system call.
 
 
   compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
   compat_ioctl: called by the ioctl(2) system call when 32 bit system calls

+ 1 - 0
arch/x86/entry/syscalls/syscall_32.tbl

@@ -396,3 +396,4 @@
 382	i386	pkey_free		sys_pkey_free			__ia32_sys_pkey_free
 382	i386	pkey_free		sys_pkey_free			__ia32_sys_pkey_free
 383	i386	statx			sys_statx			__ia32_sys_statx
 383	i386	statx			sys_statx			__ia32_sys_statx
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
+385	i386	io_pgetevents		sys_io_pgetevents		__ia32_compat_sys_io_pgetevents

+ 1 - 0
arch/x86/entry/syscalls/syscall_64.tbl

@@ -341,6 +341,7 @@
 330	common	pkey_alloc		__x64_sys_pkey_alloc
 330	common	pkey_alloc		__x64_sys_pkey_alloc
 331	common	pkey_free		__x64_sys_pkey_free
 331	common	pkey_free		__x64_sys_pkey_free
 332	common	statx			__x64_sys_statx
 332	common	statx			__x64_sys_statx
+333	common	io_pgetevents		__x64_sys_io_pgetevents
 
 
 #
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
 # x32-specific system call numbers start at 512 to avoid cache impact

+ 3 - 11
crypto/af_alg.c

@@ -347,7 +347,6 @@ static const struct proto_ops alg_proto_ops = {
 	.sendpage	=	sock_no_sendpage,
 	.sendpage	=	sock_no_sendpage,
 	.sendmsg	=	sock_no_sendmsg,
 	.sendmsg	=	sock_no_sendmsg,
 	.recvmsg	=	sock_no_recvmsg,
 	.recvmsg	=	sock_no_recvmsg,
-	.poll		=	sock_no_poll,
 
 
 	.bind		=	alg_bind,
 	.bind		=	alg_bind,
 	.release	=	af_alg_release,
 	.release	=	af_alg_release,
@@ -1061,19 +1060,12 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err)
 }
 }
 EXPORT_SYMBOL_GPL(af_alg_async_cb);
 EXPORT_SYMBOL_GPL(af_alg_async_cb);
 
 
-/**
- * af_alg_poll - poll system call handler
- */
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
-			 poll_table *wait)
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct af_alg_ctx *ctx = ask->private;
 	struct af_alg_ctx *ctx = ask->private;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 
 	if (!ctx->more || ctx->used)
 	if (!ctx->more || ctx->used)
 		mask |= EPOLLIN | EPOLLRDNORM;
 		mask |= EPOLLIN | EPOLLRDNORM;
@@ -1083,7 +1075,7 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock,
 
 
 	return mask;
 	return mask;
 }
 }
-EXPORT_SYMBOL_GPL(af_alg_poll);
+EXPORT_SYMBOL_GPL(af_alg_poll_mask);
 
 
 /**
 /**
  * af_alg_alloc_areq - allocate struct af_alg_async_req
  * af_alg_alloc_areq - allocate struct af_alg_async_req

+ 2 - 2
crypto/algif_aead.c

@@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = {
 	.sendmsg	=	aead_sendmsg,
 	.sendmsg	=	aead_sendmsg,
 	.sendpage	=	af_alg_sendpage,
 	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	aead_recvmsg,
 	.recvmsg	=	aead_recvmsg,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 };
 
 
 static int aead_check_key(struct socket *sock)
 static int aead_check_key(struct socket *sock)
@@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = {
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 };
 
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
 static void *aead_bind(const char *name, u32 type, u32 mask)

+ 0 - 2
crypto/algif_hash.c

@@ -288,7 +288,6 @@ static struct proto_ops algif_hash_ops = {
 	.mmap		=	sock_no_mmap,
 	.mmap		=	sock_no_mmap,
 	.bind		=	sock_no_bind,
 	.bind		=	sock_no_bind,
 	.setsockopt	=	sock_no_setsockopt,
 	.setsockopt	=	sock_no_setsockopt,
-	.poll		=	sock_no_poll,
 
 
 	.release	=	af_alg_release,
 	.release	=	af_alg_release,
 	.sendmsg	=	hash_sendmsg,
 	.sendmsg	=	hash_sendmsg,
@@ -396,7 +395,6 @@ static struct proto_ops algif_hash_ops_nokey = {
 	.mmap		=	sock_no_mmap,
 	.mmap		=	sock_no_mmap,
 	.bind		=	sock_no_bind,
 	.bind		=	sock_no_bind,
 	.setsockopt	=	sock_no_setsockopt,
 	.setsockopt	=	sock_no_setsockopt,
-	.poll		=	sock_no_poll,
 
 
 	.release	=	af_alg_release,
 	.release	=	af_alg_release,
 	.sendmsg	=	hash_sendmsg_nokey,
 	.sendmsg	=	hash_sendmsg_nokey,

+ 0 - 1
crypto/algif_rng.c

@@ -106,7 +106,6 @@ static struct proto_ops algif_rng_ops = {
 	.bind		=	sock_no_bind,
 	.bind		=	sock_no_bind,
 	.accept		=	sock_no_accept,
 	.accept		=	sock_no_accept,
 	.setsockopt	=	sock_no_setsockopt,
 	.setsockopt	=	sock_no_setsockopt,
-	.poll		=	sock_no_poll,
 	.sendmsg	=	sock_no_sendmsg,
 	.sendmsg	=	sock_no_sendmsg,
 	.sendpage	=	sock_no_sendpage,
 	.sendpage	=	sock_no_sendpage,
 
 

+ 2 - 2
crypto/algif_skcipher.c

@@ -205,7 +205,7 @@ static struct proto_ops algif_skcipher_ops = {
 	.sendmsg	=	skcipher_sendmsg,
 	.sendmsg	=	skcipher_sendmsg,
 	.sendpage	=	af_alg_sendpage,
 	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	skcipher_recvmsg,
 	.recvmsg	=	skcipher_recvmsg,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 };
 
 
 static int skcipher_check_key(struct socket *sock)
 static int skcipher_check_key(struct socket *sock)
@@ -301,7 +301,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 };
 
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 static void *skcipher_bind(const char *name, u32 type, u32 mask)

+ 16 - 13
drivers/char/random.c

@@ -402,8 +402,7 @@ static struct poolinfo {
 /*
 /*
  * Static global variables
  * Static global variables
  */
  */
-static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_wait);
 static struct fasync_struct *fasync;
 static struct fasync_struct *fasync;
 
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -722,8 +721,8 @@ retry:
 
 
 		/* should we wake readers? */
 		/* should we wake readers? */
 		if (entropy_bits >= random_read_wakeup_bits &&
 		if (entropy_bits >= random_read_wakeup_bits &&
-		    wq_has_sleeper(&random_read_wait)) {
-			wake_up_interruptible(&random_read_wait);
+		    wq_has_sleeper(&random_wait)) {
+			wake_up_interruptible_poll(&random_wait, POLLIN);
 			kill_fasync(&fasync, SIGIO, POLL_IN);
 			kill_fasync(&fasync, SIGIO, POLL_IN);
 		}
 		}
 		/* If the input pool is getting full, send some
 		/* If the input pool is getting full, send some
@@ -1397,7 +1396,7 @@ retry:
 	trace_debit_entropy(r->name, 8 * ibytes);
 	trace_debit_entropy(r->name, 8 * ibytes);
 	if (ibytes &&
 	if (ibytes &&
 	    (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
 	    (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
-		wake_up_interruptible(&random_write_wait);
+		wake_up_interruptible_poll(&random_wait, POLLOUT);
 		kill_fasync(&fasync, SIGIO, POLL_OUT);
 		kill_fasync(&fasync, SIGIO, POLL_OUT);
 	}
 	}
 
 
@@ -1839,7 +1838,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes)
 		if (nonblock)
 		if (nonblock)
 			return -EAGAIN;
 			return -EAGAIN;
 
 
-		wait_event_interruptible(random_read_wait,
+		wait_event_interruptible(random_wait,
 			ENTROPY_BITS(&input_pool) >=
 			ENTROPY_BITS(&input_pool) >=
 			random_read_wakeup_bits);
 			random_read_wakeup_bits);
 		if (signal_pending(current))
 		if (signal_pending(current))
@@ -1876,14 +1875,17 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 	return ret;
 	return ret;
 }
 }
 
 
+static struct wait_queue_head *
+random_get_poll_head(struct file *file, __poll_t events)
+{
+	return &random_wait;
+}
+
 static __poll_t
 static __poll_t
-random_poll(struct file *file, poll_table * wait)
+random_poll_mask(struct file *file, __poll_t events)
 {
 {
-	__poll_t mask;
+	__poll_t mask = 0;
 
 
-	poll_wait(file, &random_read_wait, wait);
-	poll_wait(file, &random_write_wait, wait);
-	mask = 0;
 	if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
 	if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
 		mask |= EPOLLIN | EPOLLRDNORM;
 		mask |= EPOLLIN | EPOLLRDNORM;
 	if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
 	if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
@@ -1990,7 +1992,8 @@ static int random_fasync(int fd, struct file *filp, int on)
 const struct file_operations random_fops = {
 const struct file_operations random_fops = {
 	.read  = random_read,
 	.read  = random_read,
 	.write = random_write,
 	.write = random_write,
-	.poll  = random_poll,
+	.get_poll_head  = random_get_poll_head,
+	.poll_mask  = random_poll_mask,
 	.unlocked_ioctl = random_ioctl,
 	.unlocked_ioctl = random_ioctl,
 	.fasync = random_fasync,
 	.fasync = random_fasync,
 	.llseek = noop_llseek,
 	.llseek = noop_llseek,
@@ -2323,7 +2326,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
 	 * We'll be woken up again once below random_write_wakeup_thresh,
 	 * We'll be woken up again once below random_write_wakeup_thresh,
 	 * or when the calling thread is about to terminate.
 	 * or when the calling thread is about to terminate.
 	 */
 	 */
-	wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+	wait_event_interruptible(random_wait, kthread_should_stop() ||
 			ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
 			ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
 	mix_pool_bytes(poolp, buffer, count);
 	mix_pool_bytes(poolp, buffer, count);
 	credit_entropy_bits(poolp, entropy);
 	credit_entropy_bits(poolp, entropy);

+ 1 - 2
drivers/isdn/mISDN/socket.c

@@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = {
 	.getname	= data_sock_getname,
 	.getname	= data_sock_getname,
 	.sendmsg	= mISDN_sock_sendmsg,
 	.sendmsg	= mISDN_sock_sendmsg,
 	.recvmsg	= mISDN_sock_recvmsg,
 	.recvmsg	= mISDN_sock_recvmsg,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= data_sock_setsockopt,
 	.setsockopt	= data_sock_setsockopt,
@@ -745,7 +745,6 @@ static const struct proto_ops base_sock_ops = {
 	.getname	= sock_no_getname,
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.setsockopt	= sock_no_setsockopt,

+ 1 - 1
drivers/net/ppp/pppoe.c

@@ -1107,7 +1107,7 @@ static const struct proto_ops pppoe_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= pppoe_getname,
 	.getname	= pppoe_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.setsockopt	= sock_no_setsockopt,

+ 0 - 1
drivers/net/ppp/pptp.c

@@ -624,7 +624,6 @@ static const struct proto_ops pptp_ops = {
 	.socketpair = sock_no_socketpair,
 	.socketpair = sock_no_socketpair,
 	.accept     = sock_no_accept,
 	.accept     = sock_no_accept,
 	.getname    = pptp_getname,
 	.getname    = pptp_getname,
-	.poll       = sock_no_poll,
 	.listen     = sock_no_listen,
 	.listen     = sock_no_listen,
 	.shutdown   = sock_no_shutdown,
 	.shutdown   = sock_no_shutdown,
 	.setsockopt = sock_no_setsockopt,
 	.setsockopt = sock_no_setsockopt,

+ 2 - 2
drivers/staging/comedi/drivers/serial2002.c

@@ -113,7 +113,7 @@ static void serial2002_tty_read_poll_wait(struct file *f, int timeout)
 		long elapsed;
 		long elapsed;
 		__poll_t mask;
 		__poll_t mask;
 
 
-		mask = f->f_op->poll(f, &table.pt);
+		mask = vfs_poll(f, &table.pt);
 		if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
 		if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
 			    EPOLLHUP | EPOLLERR)) {
 			    EPOLLHUP | EPOLLERR)) {
 			break;
 			break;
@@ -136,7 +136,7 @@ static int serial2002_tty_read(struct file *f, int timeout)
 
 
 	result = -1;
 	result = -1;
 	if (!IS_ERR(f)) {
 	if (!IS_ERR(f)) {
-		if (f->f_op->poll) {
+		if (file_can_poll(f)) {
 			serial2002_tty_read_poll_wait(f, timeout);
 			serial2002_tty_read_poll_wait(f, timeout);
 
 
 			if (kernel_read(f, &ch, 1, &pos) == 1)
 			if (kernel_read(f, &ch, 1, &pos) == 1)

+ 1 - 1
drivers/staging/ipx/af_ipx.c

@@ -1965,7 +1965,7 @@ static const struct proto_ops ipx_dgram_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= ipx_getname,
 	.getname	= ipx_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= ipx_ioctl,
 	.ioctl		= ipx_ioctl,
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ipx_compat_ioctl,
 	.compat_ioctl	= ipx_compat_ioctl,

+ 1 - 1
drivers/vfio/virqfd.c

@@ -166,7 +166,7 @@ int vfio_virqfd_enable(void *opaque,
 	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
 	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
 	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
 	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
 
 
-	events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
+	events = vfs_poll(irqfd.file, &virqfd->pt);
 
 
 	/*
 	/*
 	 * Check if there was an event already pending on the eventfd
 	 * Check if there was an event already pending on the eventfd

+ 1 - 1
drivers/vhost/vhost.c

@@ -208,7 +208,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
 	if (poll->wqh)
 	if (poll->wqh)
 		return 0;
 		return 0;
 
 
-	mask = file->f_op->poll(file, &poll->table);
+	mask = vfs_poll(file, &poll->table);
 	if (mask)
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
 		vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
 	if (mask & EPOLLERR) {
 	if (mask & EPOLLERR) {

+ 492 - 239
fs/aio.c

@@ -5,6 +5,7 @@
  *	Implements an efficient asynchronous io interface.
  *	Implements an efficient asynchronous io interface.
  *
  *
  *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
  *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
+ *	Copyright 2018 Christoph Hellwig.
  *
  *
  *	See ../COPYING for licensing terms.
  *	See ../COPYING for licensing terms.
  */
  */
@@ -46,6 +47,8 @@
 
 
 #include "internal.h"
 #include "internal.h"
 
 
+#define KIOCB_KEY		0
+
 #define AIO_RING_MAGIC			0xa10a10a1
 #define AIO_RING_MAGIC			0xa10a10a1
 #define AIO_RING_COMPAT_FEATURES	1
 #define AIO_RING_COMPAT_FEATURES	1
 #define AIO_RING_INCOMPAT_FEATURES	0
 #define AIO_RING_INCOMPAT_FEATURES	0
@@ -156,21 +159,29 @@ struct kioctx {
 	unsigned		id;
 	unsigned		id;
 };
 };
 
 
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED		((void *) (~0ULL))
+struct fsync_iocb {
+	struct work_struct	work;
+	struct file		*file;
+	bool			datasync;
+};
+
+struct poll_iocb {
+	struct file		*file;
+	__poll_t		events;
+	struct wait_queue_head	*head;
+
+	union {
+		struct wait_queue_entry	wait;
+		struct work_struct	work;
+	};
+};
 
 
 struct aio_kiocb {
 struct aio_kiocb {
-	struct kiocb		common;
+	union {
+		struct kiocb		rw;
+		struct fsync_iocb	fsync;
+		struct poll_iocb	poll;
+	};
 
 
 	struct kioctx		*ki_ctx;
 	struct kioctx		*ki_ctx;
 	kiocb_cancel_fn		*ki_cancel;
 	kiocb_cancel_fn		*ki_cancel;
@@ -264,9 +275,6 @@ static int __init aio_setup(void)
 
 
 	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-
-	pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
-
 	return 0;
 	return 0;
 }
 }
 __initcall(aio_setup);
 __initcall(aio_setup);
@@ -552,42 +560,20 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
 
 
 void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
 {
-	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
+	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
 	struct kioctx *ctx = req->ki_ctx;
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	spin_lock_irqsave(&ctx->ctx_lock, flags);
-
-	if (!req->ki_list.next)
-		list_add(&req->ki_list, &ctx->active_reqs);
+	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+		return;
 
 
+	spin_lock_irqsave(&ctx->ctx_lock, flags);
+	list_add_tail(&req->ki_list, &ctx->active_reqs);
 	req->ki_cancel = cancel;
 	req->ki_cancel = cancel;
-
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
 
-static int kiocb_cancel(struct aio_kiocb *kiocb)
-{
-	kiocb_cancel_fn *old, *cancel;
-
-	/*
-	 * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
-	 * actually has a cancel function, hence the cmpxchg()
-	 */
-
-	cancel = READ_ONCE(kiocb->ki_cancel);
-	do {
-		if (!cancel || cancel == KIOCB_CANCELLED)
-			return -EINVAL;
-
-		old = cancel;
-		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
-	} while (cancel != old);
-
-	return cancel(&kiocb->common);
-}
-
 /*
 /*
  * free_ioctx() should be RCU delayed to synchronize against the RCU
  * free_ioctx() should be RCU delayed to synchronize against the RCU
  * protected lookup_ioctx() and also needs process context to call
  * protected lookup_ioctx() and also needs process context to call
@@ -634,7 +620,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
 	while (!list_empty(&ctx->active_reqs)) {
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
 		req = list_first_entry(&ctx->active_reqs,
 				       struct aio_kiocb, ki_list);
 				       struct aio_kiocb, ki_list);
-		kiocb_cancel(req);
+		req->ki_cancel(&req->rw);
 		list_del_init(&req->ki_list);
 		list_del_init(&req->ki_list);
 	}
 	}
 
 
@@ -1041,7 +1027,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 		goto out_put;
 		goto out_put;
 
 
 	percpu_ref_get(&ctx->reqs);
 	percpu_ref_get(&ctx->reqs);
-
+	INIT_LIST_HEAD(&req->ki_list);
 	req->ki_ctx = ctx;
 	req->ki_ctx = ctx;
 	return req;
 	return req;
 out_put:
 out_put:
@@ -1049,15 +1035,6 @@ out_put:
 	return NULL;
 	return NULL;
 }
 }
 
 
-static void kiocb_free(struct aio_kiocb *req)
-{
-	if (req->common.ki_filp)
-		fput(req->common.ki_filp);
-	if (req->ki_eventfd != NULL)
-		eventfd_ctx_put(req->ki_eventfd);
-	kmem_cache_free(kiocb_cachep, req);
-}
-
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
 {
 	struct aio_ring __user *ring  = (void __user *)ctx_id;
 	struct aio_ring __user *ring  = (void __user *)ctx_id;
@@ -1088,44 +1065,14 @@ out:
 /* aio_complete
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  *	Called when the io request on the given iocb is complete.
  */
  */
-static void aio_complete(struct kiocb *kiocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 {
 {
-	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
 	struct io_event	*ev_page, *event;
 	unsigned tail, pos, head;
 	unsigned tail, pos, head;
 	unsigned long	flags;
 	unsigned long	flags;
 
 
-	if (kiocb->ki_flags & IOCB_WRITE) {
-		struct file *file = kiocb->ki_filp;
-
-		/*
-		 * Tell lockdep we inherited freeze protection from submission
-		 * thread.
-		 */
-		if (S_ISREG(file_inode(file)->i_mode))
-			__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
-		file_end_write(file);
-	}
-
-	/*
-	 * Special case handling for sync iocbs:
-	 *  - events go directly into the iocb for fast handling
-	 *  - the sync task with the iocb in its stack holds the single iocb
-	 *    ref, no other paths have a way to get another ref
-	 *  - the sync task helpfully left a reference to itself in the iocb
-	 */
-	BUG_ON(is_sync_kiocb(kiocb));
-
-	if (iocb->ki_list.next) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&ctx->ctx_lock, flags);
-		list_del(&iocb->ki_list);
-		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-	}
-
 	/*
 	/*
 	 * Add a completion event to the ring buffer. Must be done holding
 	 * Add a completion event to the ring buffer. Must be done holding
 	 * ctx->completion_lock to prevent other code from messing with the tail
 	 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1179,11 +1126,12 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
 	 * eventfd. The eventfd_signal() function is safe to be called
 	 * eventfd. The eventfd_signal() function is safe to be called
 	 * from IRQ context.
 	 * from IRQ context.
 	 */
 	 */
-	if (iocb->ki_eventfd != NULL)
+	if (iocb->ki_eventfd) {
 		eventfd_signal(iocb->ki_eventfd, 1);
 		eventfd_signal(iocb->ki_eventfd, 1);
+		eventfd_ctx_put(iocb->ki_eventfd);
+	}
 
 
-	/* everything turned out well, dispose of the aiocb. */
-	kiocb_free(iocb);
+	kmem_cache_free(kiocb_cachep, iocb);
 
 
 	/*
 	/*
 	 * We have to order our ring_info tail store above and test
 	 * We have to order our ring_info tail store above and test
@@ -1249,14 +1197,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
 		if (head == tail)
 		if (head == tail)
 			break;
 			break;
 
 
-		avail = min(avail, nr - ret);
-		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
-			    ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-
 		pos = head + AIO_EVENTS_OFFSET;
 		pos = head + AIO_EVENTS_OFFSET;
 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
 		pos %= AIO_EVENTS_PER_PAGE;
 		pos %= AIO_EVENTS_PER_PAGE;
 
 
+		avail = min(avail, nr - ret);
+		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
+
 		ev = kmap(page);
 		ev = kmap(page);
 		copy_ret = copy_to_user(event + ret, ev + pos,
 		copy_ret = copy_to_user(event + ret, ev + pos,
 					sizeof(*ev) * avail);
 					sizeof(*ev) * avail);
@@ -1327,10 +1274,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 		wait_event_interruptible_hrtimeout(ctx->wait,
 		wait_event_interruptible_hrtimeout(ctx->wait,
 				aio_read_events(ctx, min_nr, nr, event, &ret),
 				aio_read_events(ctx, min_nr, nr, event, &ret),
 				until);
 				until);
-
-	if (!ret && signal_pending(current))
-		ret = -EINTR;
-
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1446,6 +1389,58 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
+static void aio_remove_iocb(struct aio_kiocb *iocb)
+{
+	struct kioctx *ctx = iocb->ki_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->ctx_lock, flags);
+	list_del(&iocb->ki_list);
+	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+
+static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+
+	if (!list_empty_careful(&iocb->ki_list))
+		aio_remove_iocb(iocb);
+
+	if (kiocb->ki_flags & IOCB_WRITE) {
+		struct inode *inode = file_inode(kiocb->ki_filp);
+
+		/*
+		 * Tell lockdep we inherited freeze protection from submission
+		 * thread.
+		 */
+		if (S_ISREG(inode->i_mode))
+			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+		file_end_write(kiocb->ki_filp);
+	}
+
+	fput(kiocb->ki_filp);
+	aio_complete(iocb, res, res2);
+}
+
+static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+{
+	int ret;
+
+	req->ki_filp = fget(iocb->aio_fildes);
+	if (unlikely(!req->ki_filp))
+		return -EBADF;
+	req->ki_complete = aio_complete_rw;
+	req->ki_pos = iocb->aio_offset;
+	req->ki_flags = iocb_flags(req->ki_filp);
+	if (iocb->aio_flags & IOCB_FLAG_RESFD)
+		req->ki_flags |= IOCB_EVENTFD;
+	req->ki_hint = file_write_hint(req->ki_filp);
+	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+	if (unlikely(ret))
+		fput(req->ki_filp);
+	return ret;
+}
+
 static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
 static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
 		bool vectored, bool compat, struct iov_iter *iter)
 		bool vectored, bool compat, struct iov_iter *iter)
 {
 {
@@ -1465,11 +1460,11 @@ static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
 	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
 	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
 }
 }
 
 
-static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
 {
 {
 	switch (ret) {
 	switch (ret) {
 	case -EIOCBQUEUED:
 	case -EIOCBQUEUED:
-		return ret;
+		break;
 	case -ERESTARTSYS:
 	case -ERESTARTSYS:
 	case -ERESTARTNOINTR:
 	case -ERESTARTNOINTR:
 	case -ERESTARTNOHAND:
 	case -ERESTARTNOHAND:
@@ -1481,85 +1476,270 @@ static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
 		ret = -EINTR;
 		ret = -EINTR;
 		/*FALLTHRU*/
 		/*FALLTHRU*/
 	default:
 	default:
-		aio_complete(req, ret, 0);
-		return 0;
+		aio_complete_rw(req, ret, 0);
 	}
 	}
 }
 }
 
 
 static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 		bool compat)
 		bool compat)
 {
 {
-	struct file *file = req->ki_filp;
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 	struct iov_iter iter;
+	struct file *file;
 	ssize_t ret;
 	ssize_t ret;
 
 
+	ret = aio_prep_rw(req, iocb);
+	if (ret)
+		return ret;
+	file = req->ki_filp;
+
+	ret = -EBADF;
 	if (unlikely(!(file->f_mode & FMODE_READ)))
 	if (unlikely(!(file->f_mode & FMODE_READ)))
-		return -EBADF;
+		goto out_fput;
+	ret = -EINVAL;
 	if (unlikely(!file->f_op->read_iter))
 	if (unlikely(!file->f_op->read_iter))
-		return -EINVAL;
+		goto out_fput;
 
 
 	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
 	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
 	if (ret)
 	if (ret)
-		return ret;
+		goto out_fput;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
 	if (!ret)
-		ret = aio_ret(req, call_read_iter(file, req, &iter));
+		aio_rw_done(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
 	kfree(iovec);
+out_fput:
+	if (unlikely(ret))
+		fput(file);
 	return ret;
 	return ret;
 }
 }
 
 
 static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 		bool compat)
 		bool compat)
 {
 {
-	struct file *file = req->ki_filp;
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 	struct iov_iter iter;
+	struct file *file;
 	ssize_t ret;
 	ssize_t ret;
 
 
+	ret = aio_prep_rw(req, iocb);
+	if (ret)
+		return ret;
+	file = req->ki_filp;
+
+	ret = -EBADF;
 	if (unlikely(!(file->f_mode & FMODE_WRITE)))
 	if (unlikely(!(file->f_mode & FMODE_WRITE)))
-		return -EBADF;
+		goto out_fput;
+	ret = -EINVAL;
 	if (unlikely(!file->f_op->write_iter))
 	if (unlikely(!file->f_op->write_iter))
-		return -EINVAL;
+		goto out_fput;
 
 
 	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
 	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
 	if (ret)
 	if (ret)
-		return ret;
+		goto out_fput;
 	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
 	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret) {
 	if (!ret) {
-		req->ki_flags |= IOCB_WRITE;
-		file_start_write(file);
-		ret = aio_ret(req, call_write_iter(file, req, &iter));
 		/*
 		/*
-		 * We release freeze protection in aio_complete().  Fool lockdep
-		 * by telling it the lock got released so that it doesn't
-		 * complain about held lock when we return to userspace.
+		 * Open-code file_start_write here to grab freeze protection,
+		 * which will be released by another thread in
+		 * aio_complete_rw().  Fool lockdep by telling it the lock got
+		 * released so that it doesn't complain about the held lock when
+		 * we return to userspace.
 		 */
 		 */
-		if (S_ISREG(file_inode(file)->i_mode))
+		if (S_ISREG(file_inode(file)->i_mode)) {
+			__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
 			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+		}
+		req->ki_flags |= IOCB_WRITE;
+		aio_rw_done(req, call_write_iter(file, req, &iter));
 	}
 	}
 	kfree(iovec);
 	kfree(iovec);
+out_fput:
+	if (unlikely(ret))
+		fput(file);
 	return ret;
 	return ret;
 }
 }
 
 
+static void aio_fsync_work(struct work_struct *work)
+{
+	struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+	int ret;
+
+	ret = vfs_fsync(req->file, req->datasync);
+	fput(req->file);
+	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+}
+
+static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+{
+	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
+			iocb->aio_rw_flags))
+		return -EINVAL;
+	req->file = fget(iocb->aio_fildes);
+	if (unlikely(!req->file))
+		return -EBADF;
+	if (unlikely(!req->file->f_op->fsync)) {
+		fput(req->file);
+		return -EINVAL;
+	}
+
+	req->datasync = datasync;
+	INIT_WORK(&req->work, aio_fsync_work);
+	schedule_work(&req->work);
+	return 0;
+}
+
+/* need to use list_del_init so we can check if item was present */
+static inline bool __aio_poll_remove(struct poll_iocb *req)
+{
+	if (list_empty(&req->wait.entry))
+		return false;
+	list_del_init(&req->wait.entry);
+	return true;
+}
+
+static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+	fput(iocb->poll.file);
+	aio_complete(iocb, mangle_poll(mask), 0);
+}
+
+static void aio_poll_work(struct work_struct *work)
+{
+	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
+
+	if (!list_empty_careful(&iocb->ki_list))
+		aio_remove_iocb(iocb);
+	__aio_poll_complete(iocb, iocb->poll.events);
+}
+
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+	struct poll_iocb *req = &aiocb->poll;
+	struct wait_queue_head *head = req->head;
+	bool found = false;
+
+	spin_lock(&head->lock);
+	found = __aio_poll_remove(req);
+	spin_unlock(&head->lock);
+
+	if (found) {
+		req->events = 0;
+		INIT_WORK(&req->work, aio_poll_work);
+		schedule_work(&req->work);
+	}
+	return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+		void *key)
+{
+	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+	struct file *file = req->file;
+	__poll_t mask = key_to_poll(key);
+
+	assert_spin_locked(&req->head->lock);
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & req->events))
+		return 0;
+
+	mask = file->f_op->poll_mask(file, req->events);
+	if (!mask)
+		return 0;
+
+	__aio_poll_remove(req);
+
+	/*
+	 * Try completing without a context switch if we can acquire ctx_lock
+	 * without spinning.  Otherwise we need to defer to a workqueue to
+	 * avoid a deadlock due to the lock order.
+	 */
+	if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+		list_del_init(&iocb->ki_list);
+		spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+		__aio_poll_complete(iocb, mask);
+	} else {
+		req->events = mask;
+		INIT_WORK(&req->work, aio_poll_work);
+		schedule_work(&req->work);
+	}
+
+	return 1;
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+	struct kioctx *ctx = aiocb->ki_ctx;
+	struct poll_iocb *req = &aiocb->poll;
+	__poll_t mask;
+
+	/* reject any unknown events outside the normal event mask. */
+	if ((u16)iocb->aio_buf != iocb->aio_buf)
+		return -EINVAL;
+	/* reject fields that are not defined for poll */
+	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+		return -EINVAL;
+
+	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+	req->file = fget(iocb->aio_fildes);
+	if (unlikely(!req->file))
+		return -EBADF;
+	if (!file_has_poll_mask(req->file))
+		goto out_fail;
+
+	req->head = req->file->f_op->get_poll_head(req->file, req->events);
+	if (!req->head)
+		goto out_fail;
+	if (IS_ERR(req->head)) {
+		mask = EPOLLERR;
+		goto done;
+	}
+
+	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+	aiocb->ki_cancel = aio_poll_cancel;
+
+	spin_lock_irq(&ctx->ctx_lock);
+	spin_lock(&req->head->lock);
+	mask = req->file->f_op->poll_mask(req->file, req->events);
+	if (!mask) {
+		__add_wait_queue(req->head, &req->wait);
+		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+	}
+	spin_unlock(&req->head->lock);
+	spin_unlock_irq(&ctx->ctx_lock);
+done:
+	if (mask)
+		__aio_poll_complete(aiocb, mask);
+	return 0;
+out_fail:
+	fput(req->file);
+	return -EINVAL; /* same as no support for IOCB_CMD_POLL */
+}
+
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-			 struct iocb *iocb, bool compat)
+			 bool compat)
 {
 {
 	struct aio_kiocb *req;
 	struct aio_kiocb *req;
-	struct file *file;
+	struct iocb iocb;
 	ssize_t ret;
 	ssize_t ret;
 
 
+	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+		return -EFAULT;
+
 	/* enforce forwards compatibility on users */
 	/* enforce forwards compatibility on users */
-	if (unlikely(iocb->aio_reserved2)) {
+	if (unlikely(iocb.aio_reserved2)) {
 		pr_debug("EINVAL: reserve field set\n");
 		pr_debug("EINVAL: reserve field set\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
 	/* prevent overflows */
 	/* prevent overflows */
 	if (unlikely(
 	if (unlikely(
-	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
-	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
-	    ((ssize_t)iocb->aio_nbytes < 0)
+	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+	    ((ssize_t)iocb.aio_nbytes < 0)
 	   )) {
 	   )) {
 		pr_debug("EINVAL: overflow check\n");
 		pr_debug("EINVAL: overflow check\n");
 		return -EINVAL;
 		return -EINVAL;
@@ -1569,37 +1749,19 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	if (unlikely(!req))
 	if (unlikely(!req))
 		return -EAGAIN;
 		return -EAGAIN;
 
 
-	req->common.ki_filp = file = fget(iocb->aio_fildes);
-	if (unlikely(!req->common.ki_filp)) {
-		ret = -EBADF;
-		goto out_put_req;
-	}
-	req->common.ki_pos = iocb->aio_offset;
-	req->common.ki_complete = aio_complete;
-	req->common.ki_flags = iocb_flags(req->common.ki_filp);
-	req->common.ki_hint = file_write_hint(file);
-
-	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+	if (iocb.aio_flags & IOCB_FLAG_RESFD) {
 		/*
 		/*
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
 		 * instance of the file* now. The file descriptor must be
 		 * instance of the file* now. The file descriptor must be
 		 * an eventfd() fd, and will be signaled for each completed
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 * event using the eventfd_signal() function.
 		 */
 		 */
-		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
+		req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
 		if (IS_ERR(req->ki_eventfd)) {
 		if (IS_ERR(req->ki_eventfd)) {
 			ret = PTR_ERR(req->ki_eventfd);
 			ret = PTR_ERR(req->ki_eventfd);
 			req->ki_eventfd = NULL;
 			req->ki_eventfd = NULL;
 			goto out_put_req;
 			goto out_put_req;
 		}
 		}
-
-		req->common.ki_flags |= IOCB_EVENTFD;
-	}
-
-	ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
-	if (unlikely(ret)) {
-		pr_debug("EINVAL: aio_rw_flags\n");
-		goto out_put_req;
 	}
 	}
 
 
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1609,41 +1771,67 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	}
 	}
 
 
 	req->ki_user_iocb = user_iocb;
 	req->ki_user_iocb = user_iocb;
-	req->ki_user_data = iocb->aio_data;
+	req->ki_user_data = iocb.aio_data;
 
 
-	get_file(file);
-	switch (iocb->aio_lio_opcode) {
+	switch (iocb.aio_lio_opcode) {
 	case IOCB_CMD_PREAD:
 	case IOCB_CMD_PREAD:
-		ret = aio_read(&req->common, iocb, false, compat);
+		ret = aio_read(&req->rw, &iocb, false, compat);
 		break;
 		break;
 	case IOCB_CMD_PWRITE:
 	case IOCB_CMD_PWRITE:
-		ret = aio_write(&req->common, iocb, false, compat);
+		ret = aio_write(&req->rw, &iocb, false, compat);
 		break;
 		break;
 	case IOCB_CMD_PREADV:
 	case IOCB_CMD_PREADV:
-		ret = aio_read(&req->common, iocb, true, compat);
+		ret = aio_read(&req->rw, &iocb, true, compat);
 		break;
 		break;
 	case IOCB_CMD_PWRITEV:
 	case IOCB_CMD_PWRITEV:
-		ret = aio_write(&req->common, iocb, true, compat);
+		ret = aio_write(&req->rw, &iocb, true, compat);
+		break;
+	case IOCB_CMD_FSYNC:
+		ret = aio_fsync(&req->fsync, &iocb, false);
+		break;
+	case IOCB_CMD_FDSYNC:
+		ret = aio_fsync(&req->fsync, &iocb, true);
+		break;
+	case IOCB_CMD_POLL:
+		ret = aio_poll(req, &iocb);
 		break;
 		break;
 	default:
 	default:
-		pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+		pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
 		ret = -EINVAL;
 		ret = -EINVAL;
 		break;
 		break;
 	}
 	}
-	fput(file);
 
 
-	if (ret && ret != -EIOCBQUEUED)
+	/*
+	 * If ret is 0, we'd either done aio_complete() ourselves or have
+	 * arranged for that to be done asynchronously.  Anything non-zero
+	 * means that we need to destroy req ourselves.
+	 */
+	if (ret)
 		goto out_put_req;
 		goto out_put_req;
 	return 0;
 	return 0;
 out_put_req:
 out_put_req:
 	put_reqs_available(ctx, 1);
 	put_reqs_available(ctx, 1);
 	percpu_ref_put(&ctx->reqs);
 	percpu_ref_put(&ctx->reqs);
-	kiocb_free(req);
+	if (req->ki_eventfd)
+		eventfd_ctx_put(req->ki_eventfd);
+	kmem_cache_free(kiocb_cachep, req);
 	return ret;
 	return ret;
 }
 }
 
 
-static long do_io_submit(aio_context_t ctx_id, long nr,
-			  struct iocb __user *__user *iocbpp, bool compat)
+/* sys_io_submit:
+ *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
+ *	the number of iocbs queued.  May return -EINVAL if the aio_context
+ *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
+ *	*iocbpp[0] is not properly initialized, if the operation specified
+ *	is invalid for the file descriptor in the iocb.  May fail with
+ *	-EFAULT if any of the data structures point to invalid data.  May
+ *	fail with -EBADF if the file descriptor specified in the first
+ *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
+ *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
+ *	fail with -ENOSYS if not implemented.
+ */
+SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
+		struct iocb __user * __user *, iocbpp)
 {
 {
 	struct kioctx *ctx;
 	struct kioctx *ctx;
 	long ret = 0;
 	long ret = 0;
@@ -1653,39 +1841,25 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
 	if (unlikely(nr < 0))
 	if (unlikely(nr < 0))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
-		nr = LONG_MAX/sizeof(*iocbpp);
-
-	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
-		return -EFAULT;
-
 	ctx = lookup_ioctx(ctx_id);
 	ctx = lookup_ioctx(ctx_id);
 	if (unlikely(!ctx)) {
 	if (unlikely(!ctx)) {
 		pr_debug("EINVAL: invalid context id\n");
 		pr_debug("EINVAL: invalid context id\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	blk_start_plug(&plug);
+	if (nr > ctx->nr_events)
+		nr = ctx->nr_events;
 
 
-	/*
-	 * AKPM: should this return a partial result if some of the IOs were
-	 * successfully submitted?
-	 */
-	for (i=0; i<nr; i++) {
+	blk_start_plug(&plug);
+	for (i = 0; i < nr; i++) {
 		struct iocb __user *user_iocb;
 		struct iocb __user *user_iocb;
-		struct iocb tmp;
 
 
-		if (unlikely(__get_user(user_iocb, iocbpp + i))) {
+		if (unlikely(get_user(user_iocb, iocbpp + i))) {
 			ret = -EFAULT;
 			ret = -EFAULT;
 			break;
 			break;
 		}
 		}
 
 
-		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
-			ret = -EFAULT;
-			break;
-		}
-
-		ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+		ret = io_submit_one(ctx, user_iocb, false);
 		if (ret)
 		if (ret)
 			break;
 			break;
 	}
 	}
@@ -1695,59 +1869,44 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
 	return i ? i : ret;
 	return i ? i : ret;
 }
 }
 
 
-/* sys_io_submit:
- *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
- *	the number of iocbs queued.  May return -EINVAL if the aio_context
- *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
- *	*iocbpp[0] is not properly initialized, if the operation specified
- *	is invalid for the file descriptor in the iocb.  May fail with
- *	-EFAULT if any of the data structures point to invalid data.  May
- *	fail with -EBADF if the file descriptor specified in the first
- *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
- *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
- *	fail with -ENOSYS if not implemented.
- */
-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
-		struct iocb __user * __user *, iocbpp)
-{
-	return do_io_submit(ctx_id, nr, iocbpp, 0);
-}
-
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+		       int, nr, compat_uptr_t __user *, iocbpp)
 {
 {
-	compat_uptr_t uptr;
-	int i;
+	struct kioctx *ctx;
+	long ret = 0;
+	int i = 0;
+	struct blk_plug plug;
 
 
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
+	if (unlikely(nr < 0))
+		return -EINVAL;
+
+	ctx = lookup_ioctx(ctx_id);
+	if (unlikely(!ctx)) {
+		pr_debug("EINVAL: invalid context id\n");
+		return -EINVAL;
 	}
 	}
-	return 0;
-}
 
 
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
+	if (nr > ctx->nr_events)
+		nr = ctx->nr_events;
 
 
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
-		       int, nr, u32 __user *, iocb)
-{
-	struct iocb __user * __user *iocb64;
-	long ret;
+	blk_start_plug(&plug);
+	for (i = 0; i < nr; i++) {
+		compat_uptr_t user_iocb;
 
 
-	if (unlikely(nr < 0))
-		return -EINVAL;
+		if (unlikely(get_user(user_iocb, iocbpp + i))) {
+			ret = -EFAULT;
+			break;
+		}
 
 
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
+		ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
+		if (ret)
+			break;
+	}
+	blk_finish_plug(&plug);
 
 
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = do_io_submit(ctx_id, nr, iocb64, 1);
-	return ret;
+	percpu_ref_put(&ctx->users);
+	return i ? i : ret;
 }
 }
 #endif
 #endif
 
 
@@ -1755,15 +1914,12 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
  *	Finds a given iocb for cancellation.
  *	Finds a given iocb for cancellation.
  */
  */
 static struct aio_kiocb *
 static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
 {
 {
 	struct aio_kiocb *kiocb;
 	struct aio_kiocb *kiocb;
 
 
 	assert_spin_locked(&ctx->ctx_lock);
 	assert_spin_locked(&ctx->ctx_lock);
 
 
-	if (key != KIOCB_KEY)
-		return NULL;
-
 	/* TODO: use a hash or array, this sucks. */
 	/* TODO: use a hash or array, this sucks. */
 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
 		if (kiocb->ki_user_iocb == iocb)
 		if (kiocb->ki_user_iocb == iocb)
@@ -1787,25 +1943,24 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 {
 {
 	struct kioctx *ctx;
 	struct kioctx *ctx;
 	struct aio_kiocb *kiocb;
 	struct aio_kiocb *kiocb;
+	int ret = -EINVAL;
 	u32 key;
 	u32 key;
-	int ret;
 
 
-	ret = get_user(key, &iocb->aio_key);
-	if (unlikely(ret))
+	if (unlikely(get_user(key, &iocb->aio_key)))
 		return -EFAULT;
 		return -EFAULT;
+	if (unlikely(key != KIOCB_KEY))
+		return -EINVAL;
 
 
 	ctx = lookup_ioctx(ctx_id);
 	ctx = lookup_ioctx(ctx_id);
 	if (unlikely(!ctx))
 	if (unlikely(!ctx))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	spin_lock_irq(&ctx->ctx_lock);
 	spin_lock_irq(&ctx->ctx_lock);
-
-	kiocb = lookup_kiocb(ctx, iocb, key);
-	if (kiocb)
-		ret = kiocb_cancel(kiocb);
-	else
-		ret = -EINVAL;
-
+	kiocb = lookup_kiocb(ctx, iocb);
+	if (kiocb) {
+		ret = kiocb->ki_cancel(&kiocb->rw);
+		list_del_init(&kiocb->ki_list);
+	}
 	spin_unlock_irq(&ctx->ctx_lock);
 	spin_unlock_irq(&ctx->ctx_lock);
 
 
 	if (!ret) {
 	if (!ret) {
@@ -1860,13 +2015,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 		struct timespec __user *, timeout)
 		struct timespec __user *, timeout)
 {
 {
 	struct timespec64	ts;
 	struct timespec64	ts;
+	int			ret;
+
+	if (timeout && unlikely(get_timespec64(&ts, timeout)))
+		return -EFAULT;
+
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+	if (!ret && signal_pending(current))
+		ret = -EINTR;
+	return ret;
+}
+
+SYSCALL_DEFINE6(io_pgetevents,
+		aio_context_t, ctx_id,
+		long, min_nr,
+		long, nr,
+		struct io_event __user *, events,
+		struct timespec __user *, timeout,
+		const struct __aio_sigset __user *, usig)
+{
+	struct __aio_sigset	ksig = { NULL, };
+	sigset_t		ksigmask, sigsaved;
+	struct timespec64	ts;
+	int ret;
+
+	if (timeout && unlikely(get_timespec64(&ts, timeout)))
+		return -EFAULT;
+
+	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+		return -EFAULT;
 
 
-	if (timeout) {
-		if (unlikely(get_timespec64(&ts, timeout)))
+	if (ksig.sigmask) {
+		if (ksig.sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
 			return -EFAULT;
 			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+	if (signal_pending(current)) {
+		if (ksig.sigmask) {
+			current->saved_sigmask = sigsaved;
+			set_restore_sigmask();
+		}
+
+		if (!ret)
+			ret = -ERESTARTNOHAND;
+	} else {
+		if (ksig.sigmask)
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 	}
 	}
 
 
-	return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+	return ret;
 }
 }
 
 
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
@@ -1877,13 +2079,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
 		       struct compat_timespec __user *, timeout)
 		       struct compat_timespec __user *, timeout)
 {
 {
 	struct timespec64 t;
 	struct timespec64 t;
+	int ret;
+
+	if (timeout && compat_get_timespec64(&t, timeout))
+		return -EFAULT;
+
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+	if (!ret && signal_pending(current))
+		ret = -EINTR;
+	return ret;
+}
+
+
+struct __compat_aio_sigset {
+	compat_sigset_t __user	*sigmask;
+	compat_size_t		sigsetsize;
+};
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents,
+		compat_aio_context_t, ctx_id,
+		compat_long_t, min_nr,
+		compat_long_t, nr,
+		struct io_event __user *, events,
+		struct compat_timespec __user *, timeout,
+		const struct __compat_aio_sigset __user *, usig)
+{
+	struct __compat_aio_sigset ksig = { NULL, };
+	sigset_t ksigmask, sigsaved;
+	struct timespec64 t;
+	int ret;
+
+	if (timeout && compat_get_timespec64(&t, timeout))
+		return -EFAULT;
+
+	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+		return -EFAULT;
 
 
-	if (timeout) {
-		if (compat_get_timespec64(&t, timeout))
+	if (ksig.sigmask) {
+		if (ksig.sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (get_compat_sigset(&ksigmask, ksig.sigmask))
 			return -EFAULT;
 			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
 
 
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+	if (signal_pending(current)) {
+		if (ksig.sigmask) {
+			current->saved_sigmask = sigsaved;
+			set_restore_sigmask();
+		}
+		if (!ret)
+			ret = -ERESTARTNOHAND;
+	} else {
+		if (ksig.sigmask)
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 	}
 	}
 
 
-	return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+	return ret;
 }
 }
 #endif
 #endif

+ 11 - 4
fs/eventfd.c

@@ -101,14 +101,20 @@ static int eventfd_release(struct inode *inode, struct file *file)
 	return 0;
 	return 0;
 }
 }
 
 
-static __poll_t eventfd_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *
+eventfd_get_poll_head(struct file *file, __poll_t events)
+{
+	struct eventfd_ctx *ctx = file->private_data;
+
+	return &ctx->wqh;
+}
+
+static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
 {
 {
 	struct eventfd_ctx *ctx = file->private_data;
 	struct eventfd_ctx *ctx = file->private_data;
 	__poll_t events = 0;
 	__poll_t events = 0;
 	u64 count;
 	u64 count;
 
 
-	poll_wait(file, &ctx->wqh, wait);
-
 	/*
 	/*
 	 * All writes to ctx->count occur within ctx->wqh.lock.  This read
 	 * All writes to ctx->count occur within ctx->wqh.lock.  This read
 	 * can be done outside ctx->wqh.lock because we know that poll_wait
 	 * can be done outside ctx->wqh.lock because we know that poll_wait
@@ -305,7 +311,8 @@ static const struct file_operations eventfd_fops = {
 	.show_fdinfo	= eventfd_show_fdinfo,
 	.show_fdinfo	= eventfd_show_fdinfo,
 #endif
 #endif
 	.release	= eventfd_release,
 	.release	= eventfd_release,
-	.poll		= eventfd_poll,
+	.get_poll_head	= eventfd_get_poll_head,
+	.poll_mask	= eventfd_poll_mask,
 	.read		= eventfd_read,
 	.read		= eventfd_read,
 	.write		= eventfd_write,
 	.write		= eventfd_write,
 	.llseek		= noop_llseek,
 	.llseek		= noop_llseek,

+ 2 - 3
fs/eventpoll.c

@@ -884,8 +884,7 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
 
 
 	pt->_key = epi->event.events;
 	pt->_key = epi->event.events;
 	if (!is_file_epoll(epi->ffd.file))
 	if (!is_file_epoll(epi->ffd.file))
-		return epi->ffd.file->f_op->poll(epi->ffd.file, pt) &
-		       epi->event.events;
+		return vfs_poll(epi->ffd.file, pt) & epi->event.events;
 
 
 	ep = epi->ffd.file->private_data;
 	ep = epi->ffd.file->private_data;
 	poll_wait(epi->ffd.file, &ep->poll_wait, pt);
 	poll_wait(epi->ffd.file, &ep->poll_wait, pt);
@@ -2025,7 +2024,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 
 
 	/* The target file descriptor must support poll */
 	/* The target file descriptor must support poll */
 	error = -EPERM;
 	error = -EPERM;
-	if (!tf.file->f_op->poll)
+	if (!file_can_poll(tf.file))
 		goto error_tgt_fput;
 		goto error_tgt_fput;
 
 
 	/* Check if EPOLLWAKEUP is allowed */
 	/* Check if EPOLLWAKEUP is allowed */

+ 13 - 9
fs/pipe.c

@@ -509,19 +509,22 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	}
 	}
 }
 }
 
 
-/* No kernel lock held - fine */
-static __poll_t
-pipe_poll(struct file *filp, poll_table *wait)
+static struct wait_queue_head *
+pipe_get_poll_head(struct file *filp, __poll_t events)
 {
 {
-	__poll_t mask;
 	struct pipe_inode_info *pipe = filp->private_data;
 	struct pipe_inode_info *pipe = filp->private_data;
-	int nrbufs;
 
 
-	poll_wait(filp, &pipe->wait, wait);
+	return &pipe->wait;
+}
+
+/* No kernel lock held - fine */
+static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+{
+	struct pipe_inode_info *pipe = filp->private_data;
+	int nrbufs = pipe->nrbufs;
+	__poll_t mask = 0;
 
 
 	/* Reading only -- no need for acquiring the semaphore.  */
 	/* Reading only -- no need for acquiring the semaphore.  */
-	nrbufs = pipe->nrbufs;
-	mask = 0;
 	if (filp->f_mode & FMODE_READ) {
 	if (filp->f_mode & FMODE_READ) {
 		mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
 		mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
 		if (!pipe->writers && filp->f_version != pipe->w_counter)
 		if (!pipe->writers && filp->f_version != pipe->w_counter)
@@ -1020,7 +1023,8 @@ const struct file_operations pipefifo_fops = {
 	.llseek		= no_llseek,
 	.llseek		= no_llseek,
 	.read_iter	= pipe_read,
 	.read_iter	= pipe_read,
 	.write_iter	= pipe_write,
 	.write_iter	= pipe_write,
-	.poll		= pipe_poll,
+	.get_poll_head	= pipe_get_poll_head,
+	.poll_mask	= pipe_poll_mask,
 	.unlocked_ioctl	= pipe_ioctl,
 	.unlocked_ioctl	= pipe_ioctl,
 	.release	= pipe_release,
 	.release	= pipe_release,
 	.fasync		= pipe_fasync,
 	.fasync		= pipe_fasync,

+ 49 - 36
fs/select.c

@@ -34,6 +34,29 @@
 
 
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 
 
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
+{
+	if (file->f_op->poll) {
+		return file->f_op->poll(file, pt);
+	} else if (file_has_poll_mask(file)) {
+		unsigned int events = poll_requested_events(pt);
+		struct wait_queue_head *head;
+
+		if (pt && pt->_qproc) {
+			head = file->f_op->get_poll_head(file, events);
+			if (!head)
+				return DEFAULT_POLLMASK;
+			if (IS_ERR(head))
+				return EPOLLERR;
+			pt->_qproc(file, head, pt);
+		}
+
+		return file->f_op->poll_mask(file, events);
+	} else {
+		return DEFAULT_POLLMASK;
+	}
+}
+EXPORT_SYMBOL_GPL(vfs_poll);
 
 
 /*
 /*
  * Estimate expected accuracy in ns from a timeval.
  * Estimate expected accuracy in ns from a timeval.
@@ -233,7 +256,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 	add_wait_queue(wait_address, &entry->wait);
 	add_wait_queue(wait_address, &entry->wait);
 }
 }
 
 
-int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 			  ktime_t *expires, unsigned long slack)
 			  ktime_t *expires, unsigned long slack)
 {
 {
 	int rc = -EINTR;
 	int rc = -EINTR;
@@ -258,7 +281,6 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 
 
 	return rc;
 	return rc;
 }
 }
-EXPORT_SYMBOL(poll_schedule_timeout);
 
 
 /**
 /**
  * poll_select_set_timeout - helper function to setup the timeout value
  * poll_select_set_timeout - helper function to setup the timeout value
@@ -503,14 +525,10 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
 					continue;
 					continue;
 				f = fdget(i);
 				f = fdget(i);
 				if (f.file) {
 				if (f.file) {
-					const struct file_operations *f_op;
-					f_op = f.file->f_op;
-					mask = DEFAULT_POLLMASK;
-					if (f_op->poll) {
-						wait_key_set(wait, in, out,
-							     bit, busy_flag);
-						mask = (*f_op->poll)(f.file, wait);
-					}
+					wait_key_set(wait, in, out, bit,
+						     busy_flag);
+					mask = vfs_poll(f.file, wait);
+
 					fdput(f);
 					fdput(f);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
 						res_in |= bit;
@@ -813,34 +831,29 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
 				     bool *can_busy_poll,
 				     bool *can_busy_poll,
 				     __poll_t busy_flag)
 				     __poll_t busy_flag)
 {
 {
-	__poll_t mask;
-	int fd;
-
-	mask = 0;
-	fd = pollfd->fd;
-	if (fd >= 0) {
-		struct fd f = fdget(fd);
-		mask = EPOLLNVAL;
-		if (f.file) {
-			/* userland u16 ->events contains POLL... bitmap */
-			__poll_t filter = demangle_poll(pollfd->events) |
-						EPOLLERR | EPOLLHUP;
-			mask = DEFAULT_POLLMASK;
-			if (f.file->f_op->poll) {
-				pwait->_key = filter;
-				pwait->_key |= busy_flag;
-				mask = f.file->f_op->poll(f.file, pwait);
-				if (mask & busy_flag)
-					*can_busy_poll = true;
-			}
-			/* Mask out unneeded events. */
-			mask &= filter;
-			fdput(f);
-		}
-	}
+	int fd = pollfd->fd;
+	__poll_t mask = 0, filter;
+	struct fd f;
+
+	if (fd < 0)
+		goto out;
+	mask = EPOLLNVAL;
+	f = fdget(fd);
+	if (!f.file)
+		goto out;
+
+	/* userland u16 ->events contains POLL... bitmap */
+	filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
+	pwait->_key = filter | busy_flag;
+	mask = vfs_poll(f.file, pwait);
+	if (mask & busy_flag)
+		*can_busy_poll = true;
+	mask &= filter;		/* Mask out unneeded events. */
+	fdput(f);
+
+out:
 	/* ... and so does ->revents */
 	/* ... and so does ->revents */
 	pollfd->revents = mangle_poll(mask);
 	pollfd->revents = mangle_poll(mask);
-
 	return mask;
 	return mask;
 }
 }
 
 

+ 11 - 11
fs/timerfd.c

@@ -226,21 +226,20 @@ static int timerfd_release(struct inode *inode, struct file *file)
 	kfree_rcu(ctx, rcu);
 	kfree_rcu(ctx, rcu);
 	return 0;
 	return 0;
 }
 }
-
-static __poll_t timerfd_poll(struct file *file, poll_table *wait)
+	
+static struct wait_queue_head *timerfd_get_poll_head(struct file *file,
+		__poll_t eventmask)
 {
 {
 	struct timerfd_ctx *ctx = file->private_data;
 	struct timerfd_ctx *ctx = file->private_data;
-	__poll_t events = 0;
-	unsigned long flags;
 
 
-	poll_wait(file, &ctx->wqh, wait);
+	return &ctx->wqh;
+}
 
 
-	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	if (ctx->ticks)
-		events |= EPOLLIN;
-	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask)
+{
+	struct timerfd_ctx *ctx = file->private_data;
 
 
-	return events;
+	return ctx->ticks ? EPOLLIN : 0;
 }
 }
 
 
 static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
@@ -364,7 +363,8 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 
 
 static const struct file_operations timerfd_fops = {
 static const struct file_operations timerfd_fops = {
 	.release	= timerfd_release,
 	.release	= timerfd_release,
-	.poll		= timerfd_poll,
+	.get_poll_head	= timerfd_get_poll_head,
+	.poll_mask	= timerfd_poll_mask,
 	.read		= timerfd_read,
 	.read		= timerfd_read,
 	.llseek		= noop_llseek,
 	.llseek		= noop_llseek,
 	.show_fdinfo	= timerfd_show,
 	.show_fdinfo	= timerfd_show,

+ 1 - 2
include/crypto/if_alg.h

@@ -245,8 +245,7 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
 			int offset, size_t size, int flags);
 			int offset, size_t size, int flags);
 void af_alg_free_resources(struct af_alg_async_req *areq);
 void af_alg_free_resources(struct af_alg_async_req *areq);
 void af_alg_async_cb(struct crypto_async_request *_req, int err);
 void af_alg_async_cb(struct crypto_async_request *_req, int err);
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
-			 poll_table *wait);
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events);
 struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
 struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
 					   unsigned int areqlen);
 					   unsigned int areqlen);
 int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
 int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,

+ 0 - 2
include/linux/aio.h

@@ -8,8 +8,6 @@ struct kioctx;
 struct kiocb;
 struct kiocb;
 struct mm_struct;
 struct mm_struct;
 
 
-#define KIOCB_KEY		0
-
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
 
 /* prototypes */
 /* prototypes */

+ 7 - 0
include/linux/compat.h

@@ -330,6 +330,7 @@ extern int put_compat_rusage(const struct rusage *,
 			     struct compat_rusage __user *);
 			     struct compat_rusage __user *);
 
 
 struct compat_siginfo;
 struct compat_siginfo;
+struct __compat_aio_sigset;
 
 
 struct compat_dirent {
 struct compat_dirent {
 	u32		d_ino;
 	u32		d_ino;
@@ -553,6 +554,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
 					compat_long_t nr,
 					compat_long_t nr,
 					struct io_event __user *events,
 					struct io_event __user *events,
 					struct compat_timespec __user *timeout);
 					struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
+					compat_long_t min_nr,
+					compat_long_t nr,
+					struct io_event __user *events,
+					struct compat_timespec __user *timeout,
+					const struct __compat_aio_sigset __user *usig);
 
 
 /* fs/cookies.c */
 /* fs/cookies.c */
 asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
 asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);

+ 2 - 0
include/linux/fs.h

@@ -1711,6 +1711,8 @@ struct file_operations {
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+	__poll_t (*poll_mask) (struct file *, __poll_t);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*mmap) (struct file *, struct vm_area_struct *);

+ 1 - 0
include/linux/net.h

@@ -147,6 +147,7 @@ struct proto_ops {
 	int		(*getname)   (struct socket *sock,
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
 				      struct sockaddr *addr,
 				      int peer);
 				      int peer);
+	__poll_t	(*poll_mask) (struct socket *sock, __poll_t events);
 	__poll_t	(*poll)	     (struct file *file, struct socket *sock,
 	__poll_t	(*poll)	     (struct file *file, struct socket *sock,
 				      struct poll_table_struct *wait);
 				      struct poll_table_struct *wait);
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,

+ 12 - 2
include/linux/poll.h

@@ -74,6 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 	pt->_key   = ~(__poll_t)0; /* all events enabled */
 	pt->_key   = ~(__poll_t)0; /* all events enabled */
 }
 }
 
 
+static inline bool file_has_poll_mask(struct file *file)
+{
+	return file->f_op->get_poll_head && file->f_op->poll_mask;
+}
+
+static inline bool file_can_poll(struct file *file)
+{
+	return file->f_op->poll || file_has_poll_mask(file);
+}
+
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt);
+
 struct poll_table_entry {
 struct poll_table_entry {
 	struct file *filp;
 	struct file *filp;
 	__poll_t key;
 	__poll_t key;
@@ -96,8 +108,6 @@ struct poll_wqueues {
 
 
 extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
-extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
-				 ktime_t *expires, unsigned long slack);
 extern u64 select_estimate_accuracy(struct timespec64 *tv);
 extern u64 select_estimate_accuracy(struct timespec64 *tv);
 
 
 #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
 #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)

+ 1 - 2
include/linux/skbuff.h

@@ -3250,8 +3250,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
 				    int *peeked, int *off, int *err);
 				    int *peeked, int *off, int *err);
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
 				  int *err);
-__poll_t datagram_poll(struct file *file, struct socket *sock,
-			   struct poll_table_struct *wait);
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
 			   struct iov_iter *to, int size);
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,

+ 6 - 0
include/linux/syscalls.h

@@ -290,6 +290,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
 				long nr,
 				long nr,
 				struct io_event __user *events,
 				struct io_event __user *events,
 				struct timespec __user *timeout);
 				struct timespec __user *timeout);
+asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
+				long min_nr,
+				long nr,
+				struct io_event __user *events,
+				struct timespec __user *timeout,
+				const struct __aio_sigset *sig);
 
 
 /* fs/xattr.c */
 /* fs/xattr.c */
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,

+ 1 - 1
include/net/bluetooth/bluetooth.h

@@ -271,7 +271,7 @@ int  bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		     int flags);
 		     int flags);
 int  bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
 int  bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
 			    size_t len, int flags);
 			    size_t len, int flags);
-__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events);
 int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
 int  bt_sock_wait_ready(struct sock *sk, unsigned long flags);
 int  bt_sock_wait_ready(struct sock *sk, unsigned long flags);

+ 15 - 0
include/net/busy_poll.h

@@ -121,6 +121,21 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 #endif
 }
 }
 
 
+static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
+{
+	if (sk_can_busy_loop(sock->sk) &&
+	    events && (events & POLL_BUSY_LOOP)) {
+		/* once, only if requested by syscall */
+		sk_busy_loop(sock->sk, 1);
+	}
+}
+
+/* if this socket can poll_ll, tell the system call */
+static inline __poll_t sock_poll_busy_flag(struct socket *sock)
+{
+	return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
+}
+
 /* used in the NIC receive handler to mark the skb */
 /* used in the NIC receive handler to mark the skb */
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
 				    struct napi_struct *napi)

+ 0 - 2
include/net/iucv/af_iucv.h

@@ -153,8 +153,6 @@ struct iucv_sock_list {
 	atomic_t	  autobind_name;
 	atomic_t	  autobind_name;
 };
 };
 
 
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
-			    poll_table *wait);
 void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
 void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
 void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
 void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
 void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
 void iucv_accept_enqueue(struct sock *parent, struct sock *sk);

+ 1 - 2
include/net/sctp/sctp.h

@@ -109,8 +109,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 int sctp_inet_listen(struct socket *sock, int backlog);
 int sctp_inet_listen(struct socket *sock, int backlog);
 void sctp_write_space(struct sock *sk);
 void sctp_write_space(struct sock *sk);
 void sctp_data_ready(struct sock *sk);
 void sctp_data_ready(struct sock *sk);
-__poll_t sctp_poll(struct file *file, struct socket *sock,
-		poll_table *wait);
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events);
 void sctp_sock_rfree(struct sk_buff *skb);
 void sctp_sock_rfree(struct sk_buff *skb);
 void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		    struct sctp_association *asoc);
 		    struct sctp_association *asoc);

+ 0 - 2
include/net/sock.h

@@ -1591,8 +1591,6 @@ int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
 int sock_no_socketpair(struct socket *, struct socket *);
 int sock_no_accept(struct socket *, struct socket *, int, bool);
 int sock_no_accept(struct socket *, struct socket *, int, bool);
 int sock_no_getname(struct socket *, struct sockaddr *, int);
 int sock_no_getname(struct socket *, struct sockaddr *, int);
-__poll_t sock_no_poll(struct file *, struct socket *,
-			  struct poll_table_struct *);
 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
 int sock_no_listen(struct socket *, int);
 int sock_no_listen(struct socket *, int);
 int sock_no_shutdown(struct socket *, int);
 int sock_no_shutdown(struct socket *, int);

+ 1 - 2
include/net/tcp.h

@@ -388,8 +388,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
 void tcp_close(struct sock *sk, long timeout);
 void tcp_close(struct sock *sk, long timeout);
 void tcp_init_sock(struct sock *sk);
 void tcp_init_sock(struct sock *sk);
 void tcp_init_transfer(struct sock *sk, int bpf_op);
 void tcp_init_transfer(struct sock *sk, int bpf_op);
-__poll_t tcp_poll(struct file *file, struct socket *sock,
-		      struct poll_table_struct *wait);
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events);
 int tcp_getsockopt(struct sock *sk, int level, int optname,
 int tcp_getsockopt(struct sock *sk, int level, int optname,
 		   char __user *optval, int __user *optlen);
 		   char __user *optval, int __user *optlen);
 int tcp_setsockopt(struct sock *sk, int level, int optname,
 int tcp_setsockopt(struct sock *sk, int level, int optname,

+ 1 - 1
include/net/udp.h

@@ -276,7 +276,7 @@ int udp_init_sock(struct sock *sk);
 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int __udp_disconnect(struct sock *sk, int flags);
 int __udp_disconnect(struct sock *sk, int flags);
 int udp_disconnect(struct sock *sk, int flags);
 int udp_disconnect(struct sock *sk, int flags);
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 				       netdev_features_t features,
 				       netdev_features_t features,
 				       bool is_ipv6);
 				       bool is_ipv6);

+ 3 - 1
include/uapi/asm-generic/unistd.h

@@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 #define __NR_statx 291
 #define __NR_statx 291
 __SYSCALL(__NR_statx,     sys_statx)
 __SYSCALL(__NR_statx,     sys_statx)
+#define __NR_io_pgetevents 292
+__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 
 
 #undef __NR_syscalls
 #undef __NR_syscalls
-#define __NR_syscalls 292
+#define __NR_syscalls 293
 
 
 /*
 /*
  * 32 bit systems traditionally used different
  * 32 bit systems traditionally used different

+ 8 - 4
include/uapi/linux/aio_abi.h

@@ -29,6 +29,7 @@
 
 
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
+#include <linux/signal.h>
 #include <asm/byteorder.h>
 #include <asm/byteorder.h>
 
 
 typedef __kernel_ulong_t aio_context_t;
 typedef __kernel_ulong_t aio_context_t;
@@ -38,10 +39,8 @@ enum {
 	IOCB_CMD_PWRITE = 1,
 	IOCB_CMD_PWRITE = 1,
 	IOCB_CMD_FSYNC = 2,
 	IOCB_CMD_FSYNC = 2,
 	IOCB_CMD_FDSYNC = 3,
 	IOCB_CMD_FDSYNC = 3,
-	/* These two are experimental.
-	 * IOCB_CMD_PREADX = 4,
-	 * IOCB_CMD_POLL = 5,
-	 */
+	/* 4 was the experimental IOCB_CMD_PREADX */
+	IOCB_CMD_POLL = 5,
 	IOCB_CMD_NOOP = 6,
 	IOCB_CMD_NOOP = 6,
 	IOCB_CMD_PREADV = 7,
 	IOCB_CMD_PREADV = 7,
 	IOCB_CMD_PWRITEV = 8,
 	IOCB_CMD_PWRITEV = 8,
@@ -108,5 +107,10 @@ struct iocb {
 #undef IFBIG
 #undef IFBIG
 #undef IFLITTLE
 #undef IFLITTLE
 
 
+struct __aio_sigset {
+	sigset_t __user	*sigmask;
+	size_t		sigsetsize;
+};
+
 #endif /* __LINUX__AIO_ABI_H */
 #endif /* __LINUX__AIO_ABI_H */
 
 

+ 0 - 4
include/uapi/linux/types.h

@@ -49,11 +49,7 @@ typedef __u32 __bitwise __wsum;
 #define __aligned_be64 __be64 __attribute__((aligned(8)))
 #define __aligned_be64 __be64 __attribute__((aligned(8)))
 #define __aligned_le64 __le64 __attribute__((aligned(8)))
 #define __aligned_le64 __le64 __attribute__((aligned(8)))
 
 
-#ifdef __CHECK_POLL
 typedef unsigned __bitwise __poll_t;
 typedef unsigned __bitwise __poll_t;
-#else
-typedef unsigned __poll_t;
-#endif
 
 
 #endif /*  __ASSEMBLY__ */
 #endif /*  __ASSEMBLY__ */
 #endif /* _UAPI_LINUX_TYPES_H */
 #endif /* _UAPI_LINUX_TYPES_H */

+ 2 - 0
kernel/sys_ni.c

@@ -43,7 +43,9 @@ COND_SYSCALL(io_submit);
 COND_SYSCALL_COMPAT(io_submit);
 COND_SYSCALL_COMPAT(io_submit);
 COND_SYSCALL(io_cancel);
 COND_SYSCALL(io_cancel);
 COND_SYSCALL(io_getevents);
 COND_SYSCALL(io_getevents);
+COND_SYSCALL(io_pgetevents);
 COND_SYSCALL_COMPAT(io_getevents);
 COND_SYSCALL_COMPAT(io_getevents);
+COND_SYSCALL_COMPAT(io_pgetevents);
 
 
 /* fs/xattr.c */
 /* fs/xattr.c */
 
 

+ 1 - 1
mm/memcontrol.c

@@ -3849,7 +3849,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 	if (ret)
 	if (ret)
 		goto out_put_css;
 		goto out_put_css;
 
 
-	efile.file->f_op->poll(efile.file, &event->pt);
+	vfs_poll(efile.file, &event->pt);
 
 
 	spin_lock(&memcg->event_list_lock);
 	spin_lock(&memcg->event_list_lock);
 	list_add(&event->list, &memcg->event_list);
 	list_add(&event->list, &memcg->event_list);

+ 4 - 14
net/9p/trans_fd.c

@@ -231,7 +231,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 static __poll_t
 static __poll_t
 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
 {
 {
-	__poll_t ret, n;
+	__poll_t ret;
 	struct p9_trans_fd *ts = NULL;
 	struct p9_trans_fd *ts = NULL;
 
 
 	if (client && client->status == Connected)
 	if (client && client->status == Connected)
@@ -243,19 +243,9 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
 		return EPOLLERR;
 		return EPOLLERR;
 	}
 	}
 
 
-	if (!ts->rd->f_op->poll)
-		ret = DEFAULT_POLLMASK;
-	else
-		ret = ts->rd->f_op->poll(ts->rd, pt);
-
-	if (ts->rd != ts->wr) {
-		if (!ts->wr->f_op->poll)
-			n = DEFAULT_POLLMASK;
-		else
-			n = ts->wr->f_op->poll(ts->wr, pt);
-		ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
-	}
-
+	ret = vfs_poll(ts->rd, pt);
+	if (ts->rd != ts->wr)
+		ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
 	return ret;
 	return ret;
 }
 }
 
 

+ 1 - 1
net/appletalk/ddp.c

@@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= atalk_getname,
 	.getname	= atalk_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= atalk_ioctl,
 	.ioctl		= atalk_ioctl,
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= atalk_compat_ioctl,
 	.compat_ioctl	= atalk_compat_ioctl,

+ 3 - 8
net/atm/common.c

@@ -648,16 +648,11 @@ out:
 	return error;
 	return error;
 }
 }
 
 
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
-	struct atm_vcc *vcc;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	vcc = ATM_SD(sock);
+	struct atm_vcc *vcc = ATM_SD(sock);
+	__poll_t mask = 0;
 
 
 	/* exceptional events */
 	/* exceptional events */
 	if (sk->sk_err)
 	if (sk->sk_err)

+ 1 - 1
net/atm/common.h

@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
 int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		int flags);
 		int flags);
 int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
 int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_setsockopt(struct socket *sock, int level, int optname,
 int vcc_setsockopt(struct socket *sock, int level, int optname,

+ 1 - 1
net/atm/pvc.c

@@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	pvc_getname,
 	.getname =	pvc_getname,
-	.poll =		vcc_poll,
+	.poll_mask =	vcc_poll_mask,
 	.ioctl =	vcc_ioctl,
 	.ioctl =	vcc_ioctl,
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = vcc_compat_ioctl,
 	.compat_ioctl = vcc_compat_ioctl,

+ 1 - 1
net/atm/svc.c

@@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	svc_accept,
 	.accept =	svc_accept,
 	.getname =	svc_getname,
 	.getname =	svc_getname,
-	.poll =		vcc_poll,
+	.poll_mask =	vcc_poll_mask,
 	.ioctl =	svc_ioctl,
 	.ioctl =	svc_ioctl,
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
 	.compat_ioctl =	svc_compat_ioctl,
 	.compat_ioctl =	svc_compat_ioctl,

+ 1 - 1
net/ax25/af_ax25.c

@@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= ax25_accept,
 	.accept		= ax25_accept,
 	.getname	= ax25_getname,
 	.getname	= ax25_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= ax25_ioctl,
 	.ioctl		= ax25_ioctl,
 	.listen		= ax25_listen,
 	.listen		= ax25_listen,
 	.shutdown	= ax25_shutdown,
 	.shutdown	= ax25_shutdown,

+ 2 - 5
net/bluetooth/af_bluetooth.c

@@ -437,16 +437,13 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
 	return 0;
 	return 0;
 }
 }
 
 
-__poll_t bt_sock_poll(struct file *file, struct socket *sock,
-			  poll_table *wait)
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 	__poll_t mask = 0;
 
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 	BT_DBG("sock %p, sk %p", sock, sk);
 
 
-	poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == BT_LISTEN)
 	if (sk->sk_state == BT_LISTEN)
 		return bt_accept_poll(sk);
 		return bt_accept_poll(sk);
 
 
@@ -478,7 +475,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
 
 
 	return mask;
 	return mask;
 }
 }
-EXPORT_SYMBOL(bt_sock_poll);
+EXPORT_SYMBOL(bt_sock_poll_mask);
 
 
 int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 {

+ 0 - 1
net/bluetooth/bnep/sock.c

@@ -175,7 +175,6 @@ static const struct proto_ops bnep_sock_ops = {
 	.getname	= sock_no_getname,
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.setsockopt	= sock_no_setsockopt,

+ 0 - 1
net/bluetooth/cmtp/sock.c

@@ -178,7 +178,6 @@ static const struct proto_ops cmtp_sock_ops = {
 	.getname	= sock_no_getname,
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.setsockopt	= sock_no_setsockopt,

+ 1 - 1
net/bluetooth/hci_sock.c

@@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = {
 	.sendmsg	= hci_sock_sendmsg,
 	.sendmsg	= hci_sock_sendmsg,
 	.recvmsg	= hci_sock_recvmsg,
 	.recvmsg	= hci_sock_recvmsg,
 	.ioctl		= hci_sock_ioctl,
 	.ioctl		= hci_sock_ioctl,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= hci_sock_setsockopt,
 	.setsockopt	= hci_sock_setsockopt,

+ 0 - 1
net/bluetooth/hidp/sock.c

@@ -208,7 +208,6 @@ static const struct proto_ops hidp_sock_ops = {
 	.getname	= sock_no_getname,
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.setsockopt	= sock_no_setsockopt,

+ 1 - 1
net/bluetooth/l2cap_sock.c

@@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = {
 	.getname	= l2cap_sock_getname,
 	.getname	= l2cap_sock_getname,
 	.sendmsg	= l2cap_sock_sendmsg,
 	.sendmsg	= l2cap_sock_sendmsg,
 	.recvmsg	= l2cap_sock_recvmsg,
 	.recvmsg	= l2cap_sock_recvmsg,
-	.poll		= bt_sock_poll,
+	.poll_mask	= bt_sock_poll_mask,
 	.ioctl		= bt_sock_ioctl,
 	.ioctl		= bt_sock_ioctl,
 	.mmap		= sock_no_mmap,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,

+ 1 - 1
net/bluetooth/rfcomm/sock.c

@@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = {
 	.setsockopt	= rfcomm_sock_setsockopt,
 	.setsockopt	= rfcomm_sock_setsockopt,
 	.getsockopt	= rfcomm_sock_getsockopt,
 	.getsockopt	= rfcomm_sock_getsockopt,
 	.ioctl		= rfcomm_sock_ioctl,
 	.ioctl		= rfcomm_sock_ioctl,
-	.poll		= bt_sock_poll,
+	.poll_mask	= bt_sock_poll_mask,
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.mmap		= sock_no_mmap
 	.mmap		= sock_no_mmap
 };
 };

+ 1 - 1
net/bluetooth/sco.c

@@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = {
 	.getname	= sco_sock_getname,
 	.getname	= sco_sock_getname,
 	.sendmsg	= sco_sock_sendmsg,
 	.sendmsg	= sco_sock_sendmsg,
 	.recvmsg	= sco_sock_recvmsg,
 	.recvmsg	= sco_sock_recvmsg,
-	.poll		= bt_sock_poll,
+	.poll_mask	= bt_sock_poll_mask,
 	.ioctl		= bt_sock_ioctl,
 	.ioctl		= bt_sock_ioctl,
 	.mmap		= sock_no_mmap,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,

+ 4 - 8
net/caif/caif_socket.c

@@ -934,15 +934,11 @@ static int caif_release(struct socket *sock)
 }
 }
 
 
 /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
 /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static __poll_t caif_poll(struct file *file,
-			      struct socket *sock, poll_table *wait)
+static __poll_t caif_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
-	__poll_t mask;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 
 	/* exceptional events? */
 	/* exceptional events? */
 	if (sk->sk_err)
 	if (sk->sk_err)
@@ -976,7 +972,7 @@ static const struct proto_ops caif_seqpacket_ops = {
 	.socketpair = sock_no_socketpair,
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.accept = sock_no_accept,
 	.getname = sock_no_getname,
 	.getname = sock_no_getname,
-	.poll = caif_poll,
+	.poll_mask = caif_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
 	.listen = sock_no_listen,
 	.shutdown = sock_no_shutdown,
 	.shutdown = sock_no_shutdown,
@@ -997,7 +993,7 @@ static const struct proto_ops caif_stream_ops = {
 	.socketpair = sock_no_socketpair,
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.accept = sock_no_accept,
 	.getname = sock_no_getname,
 	.getname = sock_no_getname,
-	.poll = caif_poll,
+	.poll_mask = caif_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
 	.listen = sock_no_listen,
 	.shutdown = sock_no_shutdown,
 	.shutdown = sock_no_shutdown,

+ 1 - 1
net/can/bcm.c

@@ -1657,7 +1657,7 @@ static const struct proto_ops bcm_ops = {
 	.socketpair    = sock_no_socketpair,
 	.socketpair    = sock_no_socketpair,
 	.accept        = sock_no_accept,
 	.accept        = sock_no_accept,
 	.getname       = sock_no_getname,
 	.getname       = sock_no_getname,
-	.poll          = datagram_poll,
+	.poll_mask     = datagram_poll_mask,
 	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.listen        = sock_no_listen,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
 	.shutdown      = sock_no_shutdown,

+ 1 - 1
net/can/raw.c

@@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = {
 	.socketpair    = sock_no_socketpair,
 	.socketpair    = sock_no_socketpair,
 	.accept        = sock_no_accept,
 	.accept        = sock_no_accept,
 	.getname       = raw_getname,
 	.getname       = raw_getname,
-	.poll          = datagram_poll,
+	.poll_mask     = datagram_poll_mask,
 	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.listen        = sock_no_listen,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
 	.shutdown      = sock_no_shutdown,

+ 4 - 9
net/core/datagram.c

@@ -819,9 +819,8 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
 
 
 /**
 /**
  * 	datagram_poll - generic datagram poll
  * 	datagram_poll - generic datagram poll
- *	@file: file struct
  *	@sock: socket
  *	@sock: socket
- *	@wait: poll table
+ *	@events to wait for
  *
  *
  *	Datagram poll: Again totally generic. This also handles
  *	Datagram poll: Again totally generic. This also handles
  *	sequenced packet sockets providing the socket receive queue
  *	sequenced packet sockets providing the socket receive queue
@@ -831,14 +830,10 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
  *	and you use a different write policy from sock_writeable()
  *	and you use a different write policy from sock_writeable()
  *	then please supply your own write_space callback.
  *	then please supply your own write_space callback.
  */
  */
-__poll_t datagram_poll(struct file *file, struct socket *sock,
-			   poll_table *wait)
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 
 	/* exceptional events? */
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -871,4 +866,4 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
 
 
 	return mask;
 	return mask;
 }
 }
-EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(datagram_poll_mask);

+ 0 - 6
net/core/sock.c

@@ -2567,12 +2567,6 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
 }
 }
 EXPORT_SYMBOL(sock_no_getname);
 EXPORT_SYMBOL(sock_no_getname);
 
 
-__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
-{
-	return 0;
-}
-EXPORT_SYMBOL(sock_no_poll);
-
 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 {
 	return -EOPNOTSUPP;
 	return -EOPNOTSUPP;

+ 1 - 2
net/dccp/dccp.h

@@ -316,8 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		 int flags, int *addr_len);
 		 int flags, int *addr_len);
 void dccp_shutdown(struct sock *sk, int how);
 void dccp_shutdown(struct sock *sk, int how);
 int inet_dccp_listen(struct socket *sock, int backlog);
 int inet_dccp_listen(struct socket *sock, int backlog);
-__poll_t dccp_poll(struct file *file, struct socket *sock,
-		       poll_table *wait);
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events);
 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void dccp_req_err(struct sock *sk, u64 seq);
 void dccp_req_err(struct sock *sk, u64 seq);
 
 

+ 1 - 1
net/dccp/ipv4.c

@@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = {
 	.accept		   = inet_accept,
 	.accept		   = inet_accept,
 	.getname	   = inet_getname,
 	.getname	   = inet_getname,
 	/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
 	/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
-	.poll		   = dccp_poll,
+	.poll_mask	   = dccp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.ioctl		   = inet_ioctl,
 	/* FIXME: work on inet_listen to rename it to sock_common_listen */
 	/* FIXME: work on inet_listen to rename it to sock_common_listen */
 	.listen		   = inet_dccp_listen,
 	.listen		   = inet_dccp_listen,

+ 1 - 1
net/dccp/ipv6.c

@@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.accept		   = inet_accept,
 	.getname	   = inet6_getname,
 	.getname	   = inet6_getname,
-	.poll		   = dccp_poll,
+	.poll_mask	   = dccp_poll_mask,
 	.ioctl		   = inet6_ioctl,
 	.ioctl		   = inet6_ioctl,
 	.listen		   = inet_dccp_listen,
 	.listen		   = inet_dccp_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,

+ 2 - 11
net/dccp/proto.c

@@ -312,20 +312,11 @@ int dccp_disconnect(struct sock *sk, int flags)
 
 
 EXPORT_SYMBOL_GPL(dccp_disconnect);
 EXPORT_SYMBOL_GPL(dccp_disconnect);
 
 
-/*
- *	Wait for a DCCP event.
- *
- *	Note that we don't need to lock the socket, as the upper poll layers
- *	take care of normal races (between the test and the event) and we don't
- *	go look at any of the socket buffers directly.
- */
-__poll_t dccp_poll(struct file *file, struct socket *sock,
-		       poll_table *wait)
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	__poll_t mask;
 	__poll_t mask;
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == DCCP_LISTEN)
 	if (sk->sk_state == DCCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 		return inet_csk_listen_poll(sk);
 
 
@@ -367,7 +358,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
 	return mask;
 	return mask;
 }
 }
 
 
-EXPORT_SYMBOL_GPL(dccp_poll);
+EXPORT_SYMBOL_GPL(dccp_poll_mask);
 
 
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 {

+ 3 - 3
net/decnet/af_decnet.c

@@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
 }
 }
 
 
 
 
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table  *wait)
+static __poll_t dn_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
 	struct dn_scp *scp = DN_SK(sk);
-	__poll_t mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 
 
 	if (!skb_queue_empty(&scp->other_receive_queue))
 	if (!skb_queue_empty(&scp->other_receive_queue))
 		mask |= EPOLLRDBAND;
 		mask |= EPOLLRDBAND;
@@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	dn_accept,
 	.accept =	dn_accept,
 	.getname =	dn_getname,
 	.getname =	dn_getname,
-	.poll =		dn_poll,
+	.poll_mask =	dn_poll_mask,
 	.ioctl =	dn_ioctl,
 	.ioctl =	dn_ioctl,
 	.listen =	dn_listen,
 	.listen =	dn_listen,
 	.shutdown =	dn_shutdown,
 	.shutdown =	dn_shutdown,

+ 2 - 2
net/ieee802154/socket.c

@@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.accept		   = sock_no_accept,
 	.getname	   = sock_no_getname,
 	.getname	   = sock_no_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = ieee802154_sock_ioctl,
 	.ioctl		   = ieee802154_sock_ioctl,
 	.listen		   = sock_no_listen,
 	.listen		   = sock_no_listen,
 	.shutdown	   = sock_no_shutdown,
 	.shutdown	   = sock_no_shutdown,
@@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.accept		   = sock_no_accept,
 	.getname	   = sock_no_getname,
 	.getname	   = sock_no_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = ieee802154_sock_ioctl,
 	.ioctl		   = ieee802154_sock_ioctl,
 	.listen		   = sock_no_listen,
 	.listen		   = sock_no_listen,
 	.shutdown	   = sock_no_shutdown,
 	.shutdown	   = sock_no_shutdown,

+ 4 - 4
net/ipv4/af_inet.c

@@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.accept		   = inet_accept,
 	.getname	   = inet_getname,
 	.getname	   = inet_getname,
-	.poll		   = tcp_poll,
+	.poll_mask	   = tcp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.ioctl		   = inet_ioctl,
 	.listen		   = inet_listen,
 	.listen		   = inet_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,
@@ -1018,7 +1018,7 @@ const struct proto_ops inet_dgram_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.accept		   = sock_no_accept,
 	.getname	   = inet_getname,
 	.getname	   = inet_getname,
-	.poll		   = udp_poll,
+	.poll_mask	   = udp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sock_no_listen,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,
@@ -1039,7 +1039,7 @@ EXPORT_SYMBOL(inet_dgram_ops);
 
 
 /*
 /*
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
- * udp_poll
+ * udp_poll_mask
  */
  */
 static const struct proto_ops inet_sockraw_ops = {
 static const struct proto_ops inet_sockraw_ops = {
 	.family		   = PF_INET,
 	.family		   = PF_INET,
@@ -1050,7 +1050,7 @@ static const struct proto_ops inet_sockraw_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.accept		   = sock_no_accept,
 	.getname	   = inet_getname,
 	.getname	   = inet_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sock_no_listen,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,

+ 6 - 17
net/ipv4/tcp.c

@@ -494,32 +494,21 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
 }
 }
 
 
 /*
 /*
- *	Wait for a TCP event.
- *
- *	Note that we don't need to lock the socket, as the upper poll layers
- *	take care of normal races (between the test and the event) and we don't
- *	go look at any of the socket buffers directly.
+ * Socket is not locked. We are protected from async events by poll logic and
+ * correct handling of state changes made by other threads is impossible in
+ * any case.
  */
  */
-__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
 {
 {
-	__poll_t mask;
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
+	__poll_t mask = 0;
 	int state;
 	int state;
 
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	state = inet_sk_state_load(sk);
 	state = inet_sk_state_load(sk);
 	if (state == TCP_LISTEN)
 	if (state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 		return inet_csk_listen_poll(sk);
 
 
-	/* Socket is not locked. We are protected from async events
-	 * by poll logic and correct handling of state changes
-	 * made by other threads is impossible in any case.
-	 */
-
-	mask = 0;
-
 	/*
 	/*
 	 * EPOLLHUP is certainly not done right. But poll() doesn't
 	 * EPOLLHUP is certainly not done right. But poll() doesn't
 	 * have a notion of HUP in just one direction, and for a
 	 * have a notion of HUP in just one direction, and for a
@@ -600,7 +589,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 
 	return mask;
 	return mask;
 }
 }
-EXPORT_SYMBOL(tcp_poll);
+EXPORT_SYMBOL(tcp_poll_mask);
 
 
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 {

+ 5 - 5
net/ipv4/udp.c

@@ -2501,7 +2501,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
  * 	udp_poll - wait for a UDP event.
  * 	udp_poll - wait for a UDP event.
  *	@file - file struct
  *	@file - file struct
  *	@sock - socket
  *	@sock - socket
- *	@wait - poll table
+ *	@events - events to wait for
  *
  *
  *	This is same as datagram poll, except for the special case of
  *	This is same as datagram poll, except for the special case of
  *	blocking sockets. If application is using a blocking fd
  *	blocking sockets. If application is using a blocking fd
@@ -2510,23 +2510,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
  *	but then block when reading it. Add special case code
  *	but then block when reading it. Add special case code
  *	to work around these arguably broken applications.
  *	to work around these arguably broken applications.
  */
  */
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events)
 {
 {
-	__poll_t mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 
 
 	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
 	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 
 	/* Check for false positives due to checksum errors */
 	/* Check for false positives due to checksum errors */
-	if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+	if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) &&
 	    !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
 	    !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
 		mask &= ~(EPOLLIN | EPOLLRDNORM);
 		mask &= ~(EPOLLIN | EPOLLRDNORM);
 
 
 	return mask;
 	return mask;
 
 
 }
 }
-EXPORT_SYMBOL(udp_poll);
+EXPORT_SYMBOL(udp_poll_mask);
 
 
 int udp_abort(struct sock *sk, int err)
 int udp_abort(struct sock *sk, int err)
 {
 {

+ 2 - 2
net/ipv6/af_inet6.c

@@ -571,7 +571,7 @@ const struct proto_ops inet6_stream_ops = {
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.accept		   = inet_accept,		/* ok		*/
 	.accept		   = inet_accept,		/* ok		*/
 	.getname	   = inet6_getname,
 	.getname	   = inet6_getname,
-	.poll		   = tcp_poll,			/* ok		*/
+	.poll_mask	   = tcp_poll_mask,		/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.listen		   = inet_listen,		/* ok		*/
 	.listen		   = inet_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
@@ -601,7 +601,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.accept		   = sock_no_accept,		/* a do nothing	*/
 	.accept		   = sock_no_accept,		/* a do nothing	*/
 	.getname	   = inet6_getname,
 	.getname	   = inet6_getname,
-	.poll		   = udp_poll,			/* ok		*/
+	.poll_mask	   = udp_poll_mask,		/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.listen		   = sock_no_listen,		/* ok		*/
 	.listen		   = sock_no_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/

+ 2 - 2
net/ipv6/raw.c

@@ -1334,7 +1334,7 @@ void raw6_proc_exit(void)
 }
 }
 #endif	/* CONFIG_PROC_FS */
 #endif	/* CONFIG_PROC_FS */
 
 
-/* Same as inet6_dgram_ops, sans udp_poll.  */
+/* Same as inet6_dgram_ops, sans udp_poll_mask.  */
 const struct proto_ops inet6_sockraw_ops = {
 const struct proto_ops inet6_sockraw_ops = {
 	.family		   = PF_INET6,
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
 	.owner		   = THIS_MODULE,
@@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = {
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.accept		   = sock_no_accept,		/* a do nothing	*/
 	.accept		   = sock_no_accept,		/* a do nothing	*/
 	.getname	   = inet6_getname,
 	.getname	   = inet6_getname,
-	.poll		   = datagram_poll,		/* ok		*/
+	.poll_mask	   = datagram_poll_mask,	/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.listen		   = sock_no_listen,		/* ok		*/
 	.listen		   = sock_no_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/

+ 2 - 5
net/iucv/af_iucv.c

@@ -1488,14 +1488,11 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
 	return 0;
 	return 0;
 }
 }
 
 
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
-			    poll_table *wait)
+static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 	__poll_t mask = 0;
 
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == IUCV_LISTEN)
 	if (sk->sk_state == IUCV_LISTEN)
 		return iucv_accept_poll(sk);
 		return iucv_accept_poll(sk);
 
 
@@ -2388,7 +2385,7 @@ static const struct proto_ops iucv_sock_ops = {
 	.getname	= iucv_sock_getname,
 	.getname	= iucv_sock_getname,
 	.sendmsg	= iucv_sock_sendmsg,
 	.sendmsg	= iucv_sock_sendmsg,
 	.recvmsg	= iucv_sock_recvmsg,
 	.recvmsg	= iucv_sock_recvmsg,
-	.poll		= iucv_sock_poll,
+	.poll_mask	= iucv_sock_poll_mask,
 	.ioctl		= sock_no_ioctl,
 	.ioctl		= sock_no_ioctl,
 	.mmap		= sock_no_mmap,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,

+ 5 - 5
net/kcm/kcmsock.c

@@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
 	struct list_head *head;
 	struct list_head *head;
 	int index = 0;
 	int index = 0;
 
 
-	/* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
-	 * we set sk_state, otherwise epoll_wait always returns right away with
-	 * EPOLLHUP
+	/* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state,
+	 * so  we set sk_state, otherwise epoll_wait always returns right away
+	 * with EPOLLHUP
 	 */
 	 */
 	kcm->sk.sk_state = TCP_ESTABLISHED;
 	kcm->sk.sk_state = TCP_ESTABLISHED;
 
 
@@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	sock_no_getname,
 	.getname =	sock_no_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	kcm_ioctl,
 	.ioctl =	kcm_ioctl,
 	.listen =	sock_no_listen,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.shutdown =	sock_no_shutdown,
@@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	sock_no_getname,
 	.getname =	sock_no_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	kcm_ioctl,
 	.ioctl =	kcm_ioctl,
 	.listen =	sock_no_listen,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.shutdown =	sock_no_shutdown,

+ 1 - 1
net/key/af_key.c

@@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = {
 
 
 	/* Now the operations that really occur. */
 	/* Now the operations that really occur. */
 	.release	=	pfkey_release,
 	.release	=	pfkey_release,
-	.poll		=	datagram_poll,
+	.poll_mask	=	datagram_poll_mask,
 	.sendmsg	=	pfkey_sendmsg,
 	.sendmsg	=	pfkey_sendmsg,
 	.recvmsg	=	pfkey_recvmsg,
 	.recvmsg	=	pfkey_recvmsg,
 };
 };

+ 1 - 1
net/l2tp/l2tp_ip.c

@@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.accept		   = sock_no_accept,
 	.getname	   = l2tp_ip_getname,
 	.getname	   = l2tp_ip_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sock_no_listen,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,

+ 1 - 1
net/l2tp/l2tp_ip6.c

@@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.accept		   = sock_no_accept,
 	.getname	   = l2tp_ip6_getname,
 	.getname	   = l2tp_ip6_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = inet6_ioctl,
 	.ioctl		   = inet6_ioctl,
 	.listen		   = sock_no_listen,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,

+ 1 - 1
net/l2tp/l2tp_ppp.c

@@ -1788,7 +1788,7 @@ static const struct proto_ops pppol2tp_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= pppol2tp_getname,
 	.getname	= pppol2tp_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= pppol2tp_setsockopt,
 	.setsockopt	= pppol2tp_setsockopt,

+ 1 - 1
net/llc/af_llc.c

@@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = {
 	.socketpair  = sock_no_socketpair,
 	.socketpair  = sock_no_socketpair,
 	.accept      = llc_ui_accept,
 	.accept      = llc_ui_accept,
 	.getname     = llc_ui_getname,
 	.getname     = llc_ui_getname,
-	.poll	     = datagram_poll,
+	.poll_mask   = datagram_poll_mask,
 	.ioctl       = llc_ui_ioctl,
 	.ioctl       = llc_ui_ioctl,
 	.listen      = llc_ui_listen,
 	.listen      = llc_ui_listen,
 	.shutdown    = llc_ui_shutdown,
 	.shutdown    = llc_ui_shutdown,

+ 1 - 1
net/netlink/af_netlink.c

@@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	netlink_getname,
 	.getname =	netlink_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	netlink_ioctl,
 	.ioctl =	netlink_ioctl,
 	.listen =	sock_no_listen,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.shutdown =	sock_no_shutdown,

+ 1 - 1
net/netrom/af_netrom.c

@@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = {
 	.socketpair	=	sock_no_socketpair,
 	.socketpair	=	sock_no_socketpair,
 	.accept		=	nr_accept,
 	.accept		=	nr_accept,
 	.getname	=	nr_getname,
 	.getname	=	nr_getname,
-	.poll		=	datagram_poll,
+	.poll_mask	=	datagram_poll_mask,
 	.ioctl		=	nr_ioctl,
 	.ioctl		=	nr_ioctl,
 	.listen		=	nr_listen,
 	.listen		=	nr_listen,
 	.shutdown	=	sock_no_shutdown,
 	.shutdown	=	sock_no_shutdown,

+ 3 - 6
net/nfc/llcp_sock.c

@@ -548,16 +548,13 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
 	return 0;
 	return 0;
 }
 }
 
 
-static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
-				   poll_table *wait)
+static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 	__poll_t mask = 0;
 
 
 	pr_debug("%p\n", sk);
 	pr_debug("%p\n", sk);
 
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == LLCP_LISTEN)
 	if (sk->sk_state == LLCP_LISTEN)
 		return llcp_accept_poll(sk);
 		return llcp_accept_poll(sk);
 
 
@@ -899,7 +896,7 @@ static const struct proto_ops llcp_sock_ops = {
 	.socketpair     = sock_no_socketpair,
 	.socketpair     = sock_no_socketpair,
 	.accept         = llcp_sock_accept,
 	.accept         = llcp_sock_accept,
 	.getname        = llcp_sock_getname,
 	.getname        = llcp_sock_getname,
-	.poll           = llcp_sock_poll,
+	.poll_mask      = llcp_sock_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.ioctl          = sock_no_ioctl,
 	.listen         = llcp_sock_listen,
 	.listen         = llcp_sock_listen,
 	.shutdown       = sock_no_shutdown,
 	.shutdown       = sock_no_shutdown,
@@ -919,7 +916,7 @@ static const struct proto_ops llcp_rawsock_ops = {
 	.socketpair     = sock_no_socketpair,
 	.socketpair     = sock_no_socketpair,
 	.accept         = sock_no_accept,
 	.accept         = sock_no_accept,
 	.getname        = llcp_sock_getname,
 	.getname        = llcp_sock_getname,
-	.poll           = llcp_sock_poll,
+	.poll_mask      = llcp_sock_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.ioctl          = sock_no_ioctl,
 	.listen         = sock_no_listen,
 	.listen         = sock_no_listen,
 	.shutdown       = sock_no_shutdown,
 	.shutdown       = sock_no_shutdown,

+ 2 - 2
net/nfc/rawsock.c

@@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = {
 	.socketpair     = sock_no_socketpair,
 	.socketpair     = sock_no_socketpair,
 	.accept         = sock_no_accept,
 	.accept         = sock_no_accept,
 	.getname        = sock_no_getname,
 	.getname        = sock_no_getname,
-	.poll           = datagram_poll,
+	.poll_mask      = datagram_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.ioctl          = sock_no_ioctl,
 	.listen         = sock_no_listen,
 	.listen         = sock_no_listen,
 	.shutdown       = sock_no_shutdown,
 	.shutdown       = sock_no_shutdown,
@@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = {
 	.socketpair     = sock_no_socketpair,
 	.socketpair     = sock_no_socketpair,
 	.accept         = sock_no_accept,
 	.accept         = sock_no_accept,
 	.getname        = sock_no_getname,
 	.getname        = sock_no_getname,
-	.poll           = datagram_poll,
+	.poll_mask      = datagram_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.ioctl          = sock_no_ioctl,
 	.listen         = sock_no_listen,
 	.listen         = sock_no_listen,
 	.shutdown       = sock_no_shutdown,
 	.shutdown       = sock_no_shutdown,

+ 4 - 5
net/packet/af_packet.c

@@ -4110,12 +4110,11 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 	return 0;
 	return 0;
 }
 }
 
 
-static __poll_t packet_poll(struct file *file, struct socket *sock,
-				poll_table *wait)
+static __poll_t packet_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
 	struct packet_sock *po = pkt_sk(sk);
-	__poll_t mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 
 
 	spin_lock_bh(&sk->sk_receive_queue.lock);
 	spin_lock_bh(&sk->sk_receive_queue.lock);
 	if (po->rx_ring.pg_vec) {
 	if (po->rx_ring.pg_vec) {
@@ -4457,7 +4456,7 @@ static const struct proto_ops packet_ops_spkt = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	packet_getname_spkt,
 	.getname =	packet_getname_spkt,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	packet_ioctl,
 	.ioctl =	packet_ioctl,
 	.listen =	sock_no_listen,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.shutdown =	sock_no_shutdown,
@@ -4478,7 +4477,7 @@ static const struct proto_ops packet_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	packet_getname,
 	.getname =	packet_getname,
-	.poll =		packet_poll,
+	.poll_mask =	packet_poll_mask,
 	.ioctl =	packet_ioctl,
 	.ioctl =	packet_ioctl,
 	.listen =	sock_no_listen,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.shutdown =	sock_no_shutdown,

+ 3 - 6
net/phonet/socket.c

@@ -340,15 +340,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
 	return sizeof(struct sockaddr_pn);
 	return sizeof(struct sockaddr_pn);
 }
 }
 
 
-static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
-					poll_table *wait)
+static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct pep_sock *pn = pep_sk(sk);
 	struct pep_sock *pn = pep_sk(sk);
 	__poll_t mask = 0;
 	__poll_t mask = 0;
 
 
-	poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == TCP_CLOSE)
 	if (sk->sk_state == TCP_CLOSE)
 		return EPOLLERR;
 		return EPOLLERR;
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 	if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -448,7 +445,7 @@ const struct proto_ops phonet_dgram_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= pn_socket_getname,
 	.getname	= pn_socket_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= pn_socket_ioctl,
 	.ioctl		= pn_socket_ioctl,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
@@ -473,7 +470,7 @@ const struct proto_ops phonet_stream_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= pn_socket_accept,
 	.accept		= pn_socket_accept,
 	.getname	= pn_socket_getname,
 	.getname	= pn_socket_getname,
-	.poll		= pn_socket_poll,
+	.poll_mask	= pn_socket_poll_mask,
 	.ioctl		= pn_socket_ioctl,
 	.ioctl		= pn_socket_ioctl,
 	.listen		= pn_socket_listen,
 	.listen		= pn_socket_listen,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,

+ 1 - 1
net/qrtr/qrtr.c

@@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = {
 	.recvmsg	= qrtr_recvmsg,
 	.recvmsg	= qrtr_recvmsg,
 	.getname	= qrtr_getname,
 	.getname	= qrtr_getname,
 	.ioctl		= qrtr_ioctl,
 	.ioctl		= qrtr_ioctl,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.shutdown	= sock_no_shutdown,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.setsockopt	= sock_no_setsockopt,
 	.getsockopt	= sock_no_getsockopt,
 	.getsockopt	= sock_no_getsockopt,

+ 1 - 1
net/rose/af_rose.c

@@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = {
 	.socketpair	=	sock_no_socketpair,
 	.socketpair	=	sock_no_socketpair,
 	.accept		=	rose_accept,
 	.accept		=	rose_accept,
 	.getname	=	rose_getname,
 	.getname	=	rose_getname,
-	.poll		=	datagram_poll,
+	.poll_mask	=	datagram_poll_mask,
 	.ioctl		=	rose_ioctl,
 	.ioctl		=	rose_ioctl,
 	.listen		=	rose_listen,
 	.listen		=	rose_listen,
 	.shutdown	=	sock_no_shutdown,
 	.shutdown	=	sock_no_shutdown,

+ 3 - 7
net/rxrpc/af_rxrpc.c

@@ -734,15 +734,11 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
 /*
 /*
  * permit an RxRPC socket to be polled
  * permit an RxRPC socket to be polled
  */
  */
-static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
-			       poll_table *wait)
+static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 
 	/* the socket is readable if there are any messages waiting on the Rx
 	/* the socket is readable if there are any messages waiting on the Rx
 	 * queue */
 	 * queue */
@@ -949,7 +945,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
 	.socketpair	= sock_no_socketpair,
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= sock_no_getname,
 	.getname	= sock_no_getname,
-	.poll		= rxrpc_poll,
+	.poll_mask	= rxrpc_poll_mask,
 	.ioctl		= sock_no_ioctl,
 	.ioctl		= sock_no_ioctl,
 	.listen		= rxrpc_listen,
 	.listen		= rxrpc_listen,
 	.shutdown	= rxrpc_shutdown,
 	.shutdown	= rxrpc_shutdown,

+ 1 - 1
net/sctp/ipv6.c

@@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.accept		   = inet_accept,
 	.getname	   = sctp_getname,
 	.getname	   = sctp_getname,
-	.poll		   = sctp_poll,
+	.poll_mask	   = sctp_poll_mask,
 	.ioctl		   = inet6_ioctl,
 	.ioctl		   = inet6_ioctl,
 	.listen		   = sctp_inet_listen,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,
 	.shutdown	   = inet_shutdown,

+ 1 - 1
net/sctp/protocol.c

@@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = {
 	.socketpair	   = sock_no_socketpair,
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.accept		   = inet_accept,
 	.getname	   = inet_getname,	/* Semantics are different.  */
 	.getname	   = inet_getname,	/* Semantics are different.  */
-	.poll		   = sctp_poll,
+	.poll_mask	   = sctp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sctp_inet_listen,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,	/* Looks harmless.  */
 	.shutdown	   = inet_shutdown,	/* Looks harmless.  */

+ 1 - 3
net/sctp/socket.c

@@ -7722,14 +7722,12 @@ out:
  * here, again, by modeling the current TCP/UDP code.  We don't have
  * here, again, by modeling the current TCP/UDP code.  We don't have
  * a good way to test with it yet.
  * a good way to test with it yet.
  */
  */
-__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_sock *sp = sctp_sk(sk);
 	__poll_t mask;
 	__poll_t mask;
 
 
-	poll_wait(file, sk_sleep(sk), wait);
-
 	sock_rps_record_flow(sk);
 	sock_rps_record_flow(sk);
 
 
 	/* A TCP-style listening socket becomes readable when the accept queue
 	/* A TCP-style listening socket becomes readable when the accept queue

+ 40 - 15
net/socket.c

@@ -117,8 +117,10 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 
 static int sock_close(struct inode *inode, struct file *file);
 static int sock_close(struct inode *inode, struct file *file);
-static __poll_t sock_poll(struct file *file,
-			      struct poll_table_struct *wait);
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+		__poll_t events);
+static __poll_t sock_poll_mask(struct file *file, __poll_t);
+static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file,
 static long compat_sock_ioctl(struct file *file,
@@ -141,6 +143,8 @@ static const struct file_operations socket_file_ops = {
 	.llseek =	no_llseek,
 	.llseek =	no_llseek,
 	.read_iter =	sock_read_iter,
 	.read_iter =	sock_read_iter,
 	.write_iter =	sock_write_iter,
 	.write_iter =	sock_write_iter,
+	.get_poll_head = sock_get_poll_head,
+	.poll_mask =	sock_poll_mask,
 	.poll =		sock_poll,
 	.poll =		sock_poll,
 	.unlocked_ioctl = sock_ioctl,
 	.unlocked_ioctl = sock_ioctl,
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
@@ -1114,27 +1118,48 @@ out_release:
 }
 }
 EXPORT_SYMBOL(sock_create_lite);
 EXPORT_SYMBOL(sock_create_lite);
 
 
-/* No kernel lock held - perfect */
-static __poll_t sock_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+		__poll_t events)
 {
 {
-	__poll_t busy_flag = 0;
-	struct socket *sock;
+	struct socket *sock = file->private_data;
+
+	if (!sock->ops->poll_mask)
+		return NULL;
+	sock_poll_busy_loop(sock, events);
+	return sk_sleep(sock->sk);
+}
+
+static __poll_t sock_poll_mask(struct file *file, __poll_t events)
+{
+	struct socket *sock = file->private_data;
 
 
 	/*
 	/*
-	 *      We can't return errors to poll, so it's either yes or no.
+	 * We need to be sure we are in sync with the socket flags modification.
+	 *
+	 * This memory barrier is paired in the wq_has_sleeper.
 	 */
 	 */
-	sock = file->private_data;
+	smp_mb();
+
+	/* this socket can poll_ll so tell the system call */
+	return sock->ops->poll_mask(sock, events) |
+		(sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
+}
 
 
-	if (sk_can_busy_loop(sock->sk)) {
-		/* this socket can poll_ll so tell the system call */
-		busy_flag = POLL_BUSY_LOOP;
+/* No kernel lock held - perfect */
+static __poll_t sock_poll(struct file *file, poll_table *wait)
+{
+	struct socket *sock = file->private_data;
+	__poll_t events = poll_requested_events(wait), mask = 0;
 
 
-		/* once, only if requested by syscall */
-		if (wait && (wait->_key & POLL_BUSY_LOOP))
-			sk_busy_loop(sock->sk, 1);
+	if (sock->ops->poll) {
+		sock_poll_busy_loop(sock, events);
+		mask = sock->ops->poll(file, sock, wait);
+	} else if (sock->ops->poll_mask) {
+		sock_poll_wait(file, sock_get_poll_head(file, events), wait);
+		mask = sock->ops->poll_mask(sock, events);
 	}
 	}
 
 
-	return busy_flag | sock->ops->poll(file, sock, wait);
+	return mask | sock_poll_busy_flag(sock);
 }
 }
 
 
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)

+ 5 - 9
net/tipc/socket.c

@@ -692,10 +692,9 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 }
 }
 
 
 /**
 /**
- * tipc_poll - read and possibly block on pollmask
+ * tipc_poll - read pollmask
  * @file: file structure associated with the socket
  * @file: file structure associated with the socket
  * @sock: socket for which to calculate the poll bits
  * @sock: socket for which to calculate the poll bits
- * @wait: ???
  *
  *
  * Returns pollmask value
  * Returns pollmask value
  *
  *
@@ -709,15 +708,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
  * imply that the operation will succeed, merely that it should be performed
  * imply that the operation will succeed, merely that it should be performed
  * and will not block.
  * and will not block.
  */
  */
-static __poll_t tipc_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
+static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	__poll_t revents = 0;
 	__poll_t revents = 0;
 
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
 		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -3028,7 +3024,7 @@ static const struct proto_ops msg_ops = {
 	.socketpair	= tipc_socketpair,
 	.socketpair	= tipc_socketpair,
 	.accept		= sock_no_accept,
 	.accept		= sock_no_accept,
 	.getname	= tipc_getname,
 	.getname	= tipc_getname,
-	.poll		= tipc_poll,
+	.poll_mask	= tipc_poll_mask,
 	.ioctl		= tipc_ioctl,
 	.ioctl		= tipc_ioctl,
 	.listen		= sock_no_listen,
 	.listen		= sock_no_listen,
 	.shutdown	= tipc_shutdown,
 	.shutdown	= tipc_shutdown,
@@ -3049,7 +3045,7 @@ static const struct proto_ops packet_ops = {
 	.socketpair	= tipc_socketpair,
 	.socketpair	= tipc_socketpair,
 	.accept		= tipc_accept,
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
 	.getname	= tipc_getname,
-	.poll		= tipc_poll,
+	.poll_mask	= tipc_poll_mask,
 	.ioctl		= tipc_ioctl,
 	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
 	.shutdown	= tipc_shutdown,
@@ -3070,7 +3066,7 @@ static const struct proto_ops stream_ops = {
 	.socketpair	= tipc_socketpair,
 	.socketpair	= tipc_socketpair,
 	.accept		= tipc_accept,
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
 	.getname	= tipc_getname,
-	.poll		= tipc_poll,
+	.poll_mask	= tipc_poll_mask,
 	.ioctl		= tipc_ioctl,
 	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
 	.shutdown	= tipc_shutdown,

+ 11 - 19
net/unix/af_unix.c

@@ -638,9 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
 static int unix_socketpair(struct socket *, struct socket *);
 static int unix_socketpair(struct socket *, struct socket *);
 static int unix_accept(struct socket *, struct socket *, int, bool);
 static int unix_accept(struct socket *, struct socket *, int, bool);
 static int unix_getname(struct socket *, struct sockaddr *, int);
 static int unix_getname(struct socket *, struct sockaddr *, int);
-static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
-static __poll_t unix_dgram_poll(struct file *, struct socket *,
-				    poll_table *);
+static __poll_t unix_poll_mask(struct socket *, __poll_t);
+static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 static int unix_shutdown(struct socket *, int);
 static int unix_shutdown(struct socket *, int);
 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -681,7 +680,7 @@ static const struct proto_ops unix_stream_ops = {
 	.socketpair =	unix_socketpair,
 	.socketpair =	unix_socketpair,
 	.accept =	unix_accept,
 	.accept =	unix_accept,
 	.getname =	unix_getname,
 	.getname =	unix_getname,
-	.poll =		unix_poll,
+	.poll_mask =	unix_poll_mask,
 	.ioctl =	unix_ioctl,
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
 	.shutdown =	unix_shutdown,
@@ -704,7 +703,7 @@ static const struct proto_ops unix_dgram_ops = {
 	.socketpair =	unix_socketpair,
 	.socketpair =	unix_socketpair,
 	.accept =	sock_no_accept,
 	.accept =	sock_no_accept,
 	.getname =	unix_getname,
 	.getname =	unix_getname,
-	.poll =		unix_dgram_poll,
+	.poll_mask =	unix_dgram_poll_mask,
 	.ioctl =	unix_ioctl,
 	.ioctl =	unix_ioctl,
 	.listen =	sock_no_listen,
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
 	.shutdown =	unix_shutdown,
@@ -726,7 +725,7 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.socketpair =	unix_socketpair,
 	.socketpair =	unix_socketpair,
 	.accept =	unix_accept,
 	.accept =	unix_accept,
 	.getname =	unix_getname,
 	.getname =	unix_getname,
-	.poll =		unix_dgram_poll,
+	.poll_mask =	unix_dgram_poll_mask,
 	.ioctl =	unix_ioctl,
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
 	.shutdown =	unix_shutdown,
@@ -2630,13 +2629,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return err;
 	return err;
 }
 }
 
 
-static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 
 	/* exceptional events? */
 	/* exceptional events? */
 	if (sk->sk_err)
 	if (sk->sk_err)
@@ -2665,15 +2661,11 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
 	return mask;
 	return mask;
 }
 }
 
 
-static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
-				    poll_table *wait)
+static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
 {
 {
 	struct sock *sk = sock->sk, *other;
 	struct sock *sk = sock->sk, *other;
-	unsigned int writable;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	int writable;
+	__poll_t mask = 0;
 
 
 	/* exceptional events? */
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -2699,7 +2691,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 	}
 	}
 
 
 	/* No write status requested, avoid expensive OUT tests. */
 	/* No write status requested, avoid expensive OUT tests. */
-	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
+	if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
 		return mask;
 		return mask;
 
 
 	writable = unix_writable(sk);
 	writable = unix_writable(sk);

+ 6 - 13
net/vmw_vsock/af_vsock.c

@@ -850,18 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
 	return err;
 	return err;
 }
 }
 
 
-static __poll_t vsock_poll(struct file *file, struct socket *sock,
-			       poll_table *wait)
+static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events)
 {
 {
-	struct sock *sk;
-	__poll_t mask;
-	struct vsock_sock *vsk;
-
-	sk = sock->sk;
-	vsk = vsock_sk(sk);
-
-	poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	struct sock *sk = sock->sk;
+	struct vsock_sock *vsk = vsock_sk(sk);
+	__poll_t mask = 0;
 
 
 	if (sk->sk_err)
 	if (sk->sk_err)
 		/* Signify that there has been an error on this socket. */
 		/* Signify that there has been an error on this socket. */
@@ -1091,7 +1084,7 @@ static const struct proto_ops vsock_dgram_ops = {
 	.socketpair = sock_no_socketpair,
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.accept = sock_no_accept,
 	.getname = vsock_getname,
 	.getname = vsock_getname,
-	.poll = vsock_poll,
+	.poll_mask = vsock_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
 	.listen = sock_no_listen,
 	.shutdown = vsock_shutdown,
 	.shutdown = vsock_shutdown,
@@ -1849,7 +1842,7 @@ static const struct proto_ops vsock_stream_ops = {
 	.socketpair = sock_no_socketpair,
 	.socketpair = sock_no_socketpair,
 	.accept = vsock_accept,
 	.accept = vsock_accept,
 	.getname = vsock_getname,
 	.getname = vsock_getname,
-	.poll = vsock_poll,
+	.poll_mask = vsock_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.ioctl = sock_no_ioctl,
 	.listen = vsock_listen,
 	.listen = vsock_listen,
 	.shutdown = vsock_shutdown,
 	.shutdown = vsock_shutdown,

+ 1 - 1
net/x25/af_x25.c

@@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = {
 	.socketpair =	sock_no_socketpair,
 	.socketpair =	sock_no_socketpair,
 	.accept =	x25_accept,
 	.accept =	x25_accept,
 	.getname =	x25_getname,
 	.getname =	x25_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	x25_ioctl,
 	.ioctl =	x25_ioctl,
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = compat_x25_ioctl,
 	.compat_ioctl = compat_x25_ioctl,

+ 1 - 1
virt/kvm/eventfd.c

@@ -397,7 +397,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	 * Check if there was an event already pending on the eventfd
 	 * Check if there was an event already pending on the eventfd
 	 * before we registered, and trigger it as if we didn't miss it.
 	 * before we registered, and trigger it as if we didn't miss it.
 	 */
 	 */
-	events = f.file->f_op->poll(f.file, &irqfd->pt);
+	events = vfs_poll(f.file, &irqfd->pt);
 
 
 	if (events & EPOLLIN)
 	if (events & EPOLLIN)
 		schedule_work(&irqfd->inject);
 		schedule_work(&irqfd->inject);