/* * $Id: comm_select.c,v 1.53.2.1 2002/04/27 08:48:42 hno Exp $ * * DEBUG: section 5 Socket Functions * * SQUID Web Proxy Cache http://www.squid-cache.org/ * ---------------------------------------------------------- * * Squid is the result of efforts by numerous individuals from * the Internet community; see the CONTRIBUTORS file for full * details. Many organizations have provided support for Squid's * development; see the SPONSORS file for full details. Squid is * Copyrighted (C) 2001 by the Regents of the University of * California; see the COPYRIGHT file for full details. Squid * incorporates software developed and/or copyrighted by other * sources; see the CREDITS file for full details. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. * */ #include "squid.h" #define HAVE_EPOLL 1 #ifdef HAVE_EPOLL #define DEBUG_EPOLL 1 #include #include #include #endif static int MAX_POLL_TIME = 1000; /* see also comm_quick_poll_required() */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #ifndef NBBY #define NBBY 8 #endif #define FD_MASK_BYTES sizeof(fd_mask) #define FD_MASK_BITS (FD_MASK_BYTES*NBBY) /* STATIC */ static int fdIsHttp(int fd); static int commDeferRead(int fd); static void checkTimeouts(void); #ifdef HAVE_EPOLL int global_kdpfd; char *global_map; int global_events[SQUID_MAXFD]; unsigned int global_nevents; unsigned int global_npending; #endif static int commDeferRead(int fd) { fde *F = &fd_table[fd]; if (F->defer_check == NULL) return 0; return F->defer_check(fd, F->defer_data); } static int fdIsHttp(int fd) { int j; for (j = 0; j < NHttpSockets; j++) { if (fd == HttpSockets[j]) return 1; } return 0; } int comm_epoll_walk_fdtable() { int i = 0; unsigned int nfds = 0; int maxfd; struct pollfd pfd; maxfd = Biggest_FD + 1; global_npending = 0; global_nevents = 0; for (i = 0; i < maxfd; i++) { int events; events = 0; /* Reset notifications if a fd is not open */ /* if((!fd_table[i].flags.open) && fd_table[i].revents) { fd_table[i].revents = 0; fd_table[i].events = 0; continue; } */ /* Check each open socket for a handler. */ if (fd_table[i].read_handler) { switch (commDeferRead(i)) { case 0: events |= POLLRDNORM; break; case 1: break; default: fatalf("bad return value from commDeferRead(FD %d)\n", i); } } if (fd_table[i].write_handler) events |= POLLWRNORM; if (events) { pfd.events = POLLHUP | POLLERR | POLLRDNORM | POLLWRNORM | POLLNVAL; pfd.revents = 0; pfd.fd = i; /* debug(5, DEBUG_EPOLL ? 0 : 8) ("=> fd=%d events=%d\n",i,events); */ if (write(global_kdpfd, &pfd, sizeof(pfd)) != sizeof(pfd)) fatalf("comm_poll: write to /dev/poll failure: %s\n", xstrerror()); if(!fd_table[i].events) { /* --FIXME-- --HACK-- */ /* We are missing the write notifications on incoming sockets, which happen, presumably * before we can start monitoring the socket ... we need to * find a way to get that missing event into the revents list from the * beginning somehow..... The following is a hack to try and get around this... */ poll(&pfd,1,0); debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: got poll() fd=%d events=%d revents=%d\n",pfd.fd,pfd.events,pfd.revents); fd_table[i].revents = pfd.revents; } nfds++; if ((events & POLLRDNORM) && fd_table[i].flags.read_pending) global_npending++; } /* Keep a record of what events we are actually looking for at the moment */ fd_table[i].events = events; /* Add old events to our list so we can check them later to see if they can be handled yet */ if(fd_table[i].revents & events) { global_events[global_nevents] = i; global_nevents++; } } /* debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: Got here nfds=%d\n",nfds); */ return nfds; } int comm_poll(int msec) { struct evpoll evp; struct pollfd *ppfds; PF *hdl = NULL; int fd; int i; unsigned int nfds; int num; static time_t last_timeout = 0; double timeout = current_dtime + (msec / 1000.0); do { #if !ALARM_UPDATES_TIME double start; getCurrentTime(); start = current_dtime; #endif /* Handle any fs callbacks that need doing */ storeDirCallback(); nfds = comm_epoll_walk_fdtable(); if (nfds == 0) { assert(shutting_down); return COMM_SHUTDOWN; } if (global_npending) msec = 0; if (msec > MAX_POLL_TIME) msec = MAX_POLL_TIME; for (;;) { statCounter.syscalls.polls++; evp.ep_timeout = msec; evp.ep_resoff = 0; num = ioctl(global_kdpfd, EP_POLL, &evp); statCounter.select_loops++; if(num < 0) fatalf("comm_poll: ioctl() failed: %s\n",xstrerror); ppfds = (struct pollfd *) (global_map + evp.ep_resoff); if(num >= 0 || global_npending > 0 || global_nevents > 0) break; if (ignoreErrno(errno)) continue; debug(5, 0) ("comm_poll: /dev/epoll ioctl failure: %s\n", xstrerror()); return COMM_ERROR; } debug(5, num ? 5 : 8) ("comm_poll: %d+%ld FDs ready\n", num, global_npending); statHistCount(&statCounter.select_fds_hist, num); /* Check timeout handlers ONCE each second. */ if (squid_curtime > last_timeout) { last_timeout = squid_curtime; checkTimeouts(); } if ((num == 0) && (global_npending == 0) && (global_nevents == 0)) continue; /* debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: new=%u old=%d revents\n",num,global_nevents); */ /* Add in the new event notifications we got from /dev/epoll */ for (i = 0; i < num; i++,ppfds++) { debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: got ppfds-> fd=%d events=%d revents=%d\n",ppfds->fd,ppfds->events,ppfds->revents); if(!(fd_table[ppfds->fd].revents & fd_table[ppfds->fd].events)) { /* Add new event notification */ global_events[global_nevents] = ppfds->fd; global_nevents++; } fd_table[ppfds->fd].revents |= ppfds->revents; } /* handle all of the events */ for (i = 0; i < global_nevents; i++) { fde *F; fd = global_events[i]; if (fd == -1) continue; F = &fd_table[fd]; debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: fd=%d revents=%d events=%d\n", fd, F->revents, F->events ); if (F->events == 0) continue; if (F->flags.read_pending) F->revents |= POLLIN; if (F->revents == 0) continue; /* Handle read events */ if (F->revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) { debug(5, 6) ("comm_poll: FD %d ready for reading\n", fd); if(fdIsHttp(fd) && commDeferRead(fd)) continue; if ((hdl = F->read_handler)) { int nbytes = F->bytes_read; F->read_handler = NULL; hdl(fd, F->read_data); nbytes = F->bytes_read - nbytes; if(nbytes) debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: fd=%d bytes_read=%d\n",fd,nbytes); if(nbytes && (F->revents & POLLIN)) F->revents ^= POLLIN; statCounter.select_fds++; } } /* Handle write events */ if (F->revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) { debug(5, 5) ("comm_poll: FD %d ready for writing\n", fd); if ((hdl = F->write_handler)) { int nbytes = F->bytes_written; F->write_handler = NULL; hdl(fd, F->write_data); nbytes = F->bytes_written - nbytes; if(nbytes) debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_poll: fd=%d bytes_written=%d\n",fd,nbytes); if(nbytes && (F->revents & POLLOUT)) F->revents ^= POLLOUT; statCounter.select_fds++; } } /* Handle bad fd???? */ if (F->revents & POLLNVAL) { close_handler *ch; debug(5, 0) ("WARNING: FD %d has handlers, but it's invalid.\n", fd); debug(5, 0) ("FD %d is a %s\n", fd, fdTypeStr[F->type]); debug(5, 0) ("--> %s\n", F->desc); debug(5, 0) ("tmout:%p read:%p write:%p\n", F->timeout_handler, F->read_handler, F->write_handler); for (ch = F->close_handler; ch; ch = ch->next) debug(5, 0) (" close handler: %p\n", ch->handler); if (F->close_handler) { commCallCloseHandlers(fd); } else if (F->timeout_handler) { debug(5, 0) ("comm_poll: Calling Timeout Handler\n"); F->timeout_handler(fd, F->timeout_data); } F->close_handler = NULL; F->timeout_handler = NULL; F->read_handler = NULL; F->write_handler = NULL; if (F->flags.open) fd_close(fd); } } #if !ALARM_UPDATES_TIME getCurrentTime(); statCounter.select_time += (current_dtime - start); #endif return COMM_OK; } while (timeout > current_dtime); debug(5, 8) ("comm_poll: time out: %ld.\n", (long int) squid_curtime); return COMM_TIMEOUT; } void comm_select_init(void) { if ((global_kdpfd = open("/dev/epoll", O_RDWR)) == -1) { fatalf("comm_select_init: opening /dev/epoll failed!: %s\n",xstrerror()); } if (ioctl(global_kdpfd, EP_ALLOC, SQUID_MAXFD) == -1) { fatalf("comm_select_init: ioctl() failed!: %s\n",xstrerror()); } if ((global_map = (char *) mmap(NULL, EP_MAP_SIZE(SQUID_MAXFD), PROT_READ, MAP_PRIVATE, global_kdpfd, 0)) == (char *) -1) { fatal("comm_select_init: mmap() failed!\n"); } global_nevents = 0; } static void checkTimeouts(void) { int fd; fde *F = NULL; PF *callback; for (fd = 0; fd <= Biggest_FD; fd++) { F = &fd_table[fd]; if (!F->flags.open) continue; if (F->timeout == 0) continue; if (F->timeout > squid_curtime) continue; debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd); if (F->timeout_handler) { debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd); callback = F->timeout_handler; F->timeout_handler = NULL; callback(fd, F->timeout_data); } else { debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd); comm_close(fd); } } } /* Called by async-io or diskd to speed up the polling */ void comm_quick_poll_required(void) { MAX_POLL_TIME = 10; } void commUpdateReadBits(int fd, PF * handler) { return; } void commUpdateWriteBits(int, PF * handler) { return; }