From: Davide Libenzi <davidel@xmailserver.org>

The attached patch implements the one-shot support for epoll.  Because of
the way epoll works (hooking f_op->poll()) the ET behavior is not really ET
because it might happen that, while data is still available to read (for
the EPOLLIN case), another chunk will become available triggering another
event.

While those conditions can be easily be handled in userspace, the absolute
triviality of the patch and the avoidance of user/kernel space switches and
f_op->poll() calls, make IMHO worth doing this inside epoll itself.



 fs/eventpoll.c            |   14 ++++++++++++++
 include/linux/eventpoll.h |    3 +++
 2 files changed, 17 insertions(+)

diff -puN fs/eventpoll.c~epoll-oneshot-support fs/eventpoll.c
--- 25/fs/eventpoll.c~epoll-oneshot-support	2003-12-27 15:51:03.000000000 -0800
+++ 25-akpm/fs/eventpoll.c	2003-12-27 15:51:03.000000000 -0800
@@ -93,6 +93,8 @@
 #define EPI_SLAB_DEBUG 0
 #endif /* #if DEBUG_EPI != 0 */
 
+/* Epoll private bits inside the event mask */
+#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
 
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
@@ -1306,6 +1308,15 @@ static int ep_poll_callback(wait_queue_t
 
 	write_lock_irqsave(&ep->lock, flags);
 
+	/*
+	 * If the event mask does not contain any poll(2) event, we consider the
+	 * descriptor to be disabled. This condition is likely the effect of the
+	 * EPOLLONESHOT bit that disables the descriptor when an event is received,
+	 * until the next EPOLL_CTL_MOD will be issued.
+	 */
+	if (!(epi->event.events & ~EP_PRIVATE_BITS))
+		goto is_disabled;
+
 	/* If this file is already in the ready list we exit soon */
 	if (EP_IS_LINKED(&epi->rdllink))
 		goto is_linked;
@@ -1322,6 +1333,7 @@ is_linked:
 	if (waitqueue_active(&ep->poll_wait))
 		pwake++;
 
+is_disabled:
 	write_unlock_irqrestore(&ep->lock, flags);
 
 	/* We have to call this outside the lock */
@@ -1458,6 +1470,8 @@ static int ep_send_events(struct eventpo
 				eventcnt += eventbuf;
 				eventbuf = 0;
 			}
+			if (epi->event.events & EPOLLONESHOT)
+				epi->event.events &= EP_PRIVATE_BITS;
 		}
 	}
 
diff -puN include/linux/eventpoll.h~epoll-oneshot-support include/linux/eventpoll.h
--- 25/include/linux/eventpoll.h~epoll-oneshot-support	2003-12-27 15:51:03.000000000 -0800
+++ 25-akpm/include/linux/eventpoll.h	2003-12-27 15:51:03.000000000 -0800
@@ -22,6 +22,9 @@
 #define EPOLL_CTL_DEL 2
 #define EPOLL_CTL_MOD 3
 
+/* Set the One Shot behaviour for the target file descriptor */
+#define EPOLLONESHOT (1 << 30)
+
 /* Set the Edge Triggered behaviour for the target file descriptor */
 #define EPOLLET (1 << 31)
 

_