From: Christoph Hellwig <hch@infradead.org>

Originally by David Mosberger, testing by Roger Luethi.  From the ia32 tree.

Basically, it avoids going to memory all the time.  What this does is make
life a lot easier for gcc, so it can actually do a decent amount of
optimization.  The restructuring clearly is less important for out-of-order
CPUs, but even there it gives some benefits.

More specifically, the loop is now structured to operate one "unsigned long"
at a time, rather than one bit at a time.  Of course, you still need to
process all the bits, but most of the relevant state in the inner loop can be
kept in registers.

Roger Luethi measured the routine on a bunch of different machines (mostly
x86, IIRC: P5, P6, Crusoe, Athlons) and performance improved there, too (and
it should definitely improve performance on any RISC-like architecture).


Roger's benchmarking results (vs number of fd's):

                                       File                   TCP
Numbfer of fd's:                  10   250  500          10   250   500

UP, Pentium MMX 233MHz original	 8.2 108.5 212.8	11.0 180.0 356.5
UP, Pentium MMX 233MHz w/patch	 7.4  87.6 171.1	10.4 163.6 323.4

MP, Pentium MMX 233MHz original	15.7 283.8 562.8	18.9 354.4 705.5
MP, Pentium MMX 233MHz w/patch	14.6 255.6 506.5	17.8 332.8 664.1

UP, Athlon 1394 MHz original	 1.3  13.4  26.1	 1.9  24.7  48.6
UP, Athlon 1394 MHz w/patch	 1.2  11.0  21.5	 1.6  22.3  43.8

MP, Athlon 1394 MHz original	 1.6  22.4  44.6	 1.9  30.9  60.5
MP, Athlon 1394 MHz w/patch	 1.5  21.2  41.7	 1.9  30.2  59.6



 25-akpm/fs/select.c |   75 ++++++++++++++++++++++++++++++++--------------------
 1 files changed, 47 insertions(+), 28 deletions(-)

diff -puN fs/select.c~select-speedup fs/select.c
--- 25/fs/select.c~select-speedup	Thu Apr 17 16:53:41 2003
+++ 25-akpm/fs/select.c	Thu Apr 17 16:53:41 2003
@@ -176,7 +176,7 @@ int do_select(int n, fd_set_bits *fds, l
 {
 	struct poll_wqueues table;
 	poll_table *wait;
-	int retval, i, off;
+	int retval, i;
 	long __timeout = *timeout;
 
  	spin_lock(&current->files->file_lock);
@@ -193,38 +193,57 @@ int do_select(int n, fd_set_bits *fds, l
 		wait = NULL;
 	retval = 0;
 	for (;;) {
+		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
+
 		set_current_state(TASK_INTERRUPTIBLE);
-		for (i = 0 ; i < n; i++) {
-			unsigned long bit = BIT(i);
-			unsigned long mask;
-			struct file *file;
 
-			off = i / __NFDBITS;
-			if (!(bit & BITS(fds, off)))
+		inp = fds->in; outp = fds->out; exp = fds->ex;
+		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
+
+		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
+			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
+			unsigned long res_in = 0, res_out = 0, res_ex = 0;
+			struct file_operations *f_op = NULL;
+			struct file *file = NULL;
+
+			in = *inp++; out = *outp++; ex = *exp++;
+			all_bits = in | out | ex;
+			if (all_bits == 0)
 				continue;
-			file = fget(i);
-			mask = POLLNVAL;
-			if (file) {
+
+			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
+				if (i >= n)
+					break;
+				if (!(bit & all_bits))
+					continue;
+				file = fget(i);
+				if (file)
+					f_op = file->f_op;
 				mask = DEFAULT_POLLMASK;
-				if (file->f_op && file->f_op->poll)
-					mask = file->f_op->poll(file, wait);
-				fput(file);
-			}
-			if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
-				SET(bit, __RES_IN(fds,off));
-				retval++;
-				wait = NULL;
-			}
-			if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
-				SET(bit, __RES_OUT(fds,off));
-				retval++;
-				wait = NULL;
-			}
-			if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
-				SET(bit, __RES_EX(fds,off));
-				retval++;
-				wait = NULL;
+				if (file) {
+					if (f_op && f_op->poll)
+						mask = (*f_op->poll)(file, retval ? NULL : wait);
+					fput(file);
+					if ((mask & POLLIN_SET) && (in & bit)) {
+						res_in |= bit;
+						retval++;
+					}
+					if ((mask & POLLOUT_SET) && (out & bit)) {
+						res_out |= bit;
+						retval++;
+					}
+					if ((mask & POLLEX_SET) && (ex & bit)) {
+						res_ex |= bit;
+						retval++;
+					}
+				}
 			}
+			if (res_in)
+				*rinp = res_in;
+			if (res_out)
+				*routp = res_out;
+			if (res_ex)
+				*rexp = res_ex;
 		}
 		wait = NULL;
 		if (retval || !__timeout || signal_pending(current))

_