Retry requests that failed due to a persistent connection race instead of replying with ERR_ZERO_SIZE_OBJECT "Bad Gateway". The ERR_ZERO_SIZE_OBJECT errors were visible to the client when the destination had only one address because serverDestinations.shift() made the list of destination empty and startConnectionOrFail() failed. The "wasIdle" state is remembered in the Connection object instead of FwdState itself because we need to maintain that information for a pinned connection as well (and FwdState is not preserved between a request that pins a connection and a request that uses the pinned connection). This may need more work: * I am not sure the FTP and Gopher cases are correct. The current code assumes that those protocols do not have pconn [races]. * I do not think we should set wasIdle for pinned connection by default because when pinned connections are used for authentication, we do not (and cannot) re-pin a failed connection, right? Setting wasIdle will probably lead to assertions in forward.cc as we find no pinned connection to go with a PINNED peer. Bump-server-first code will repin and, hence, should set wasIdle for pinned connections. === modified file 'src/client_side.cc' --- src/client_side.cc 2012-02-03 18:44:57 +0000 +++ src/client_side.cc 2012-02-14 18:01:14 +0000 @@ -4356,40 +4356,44 @@ // when pinning an SSL bumped connection, the request may be NULL const char *pinnedHost = "[unknown]"; if (request) { pinning.host = xstrdup(request->GetHost()); pinning.port = request->port; pinnedHost = pinning.host; } else { pinning.port = pinServer->remote.GetPort(); } pinning.pinned = true; if (aPeer) pinning.peer = cbdataReference(aPeer); pinning.auth = auth; char stmp[MAX_IPSTRLEN]; snprintf(desc, FD_DESC_SZ, "%s pinned connection for %s (%d)", (auth || !aPeer) ? pinnedHost : aPeer->name, clientConnection->remote.ToURL(stmp,MAX_IPSTRLEN), clientConnection->fd); fd_note(pinning.serverConnection->fd, desc); + // The server may close while we wait for (or send) the next request. + // If pconn race is detected, we will decide whether to repin and retry. + pinning.serverConnection->wasIdle = true; + typedef CommCbMemFunT Dialer; pinning.closeHandler = JobCallback(33, 5, Dialer, this, ConnStateData::clientPinnedConnectionClosed); comm_add_close_handler(pinning.serverConnection->fd, pinning.closeHandler); } const Comm::ConnectionPointer ConnStateData::validatePinnedConnection(HttpRequest *request, const struct peer *aPeer) { bool valid = true; if (!Comm::IsConnOpen(pinning.serverConnection)) valid = false; if (pinning.auth && request && strcasecmp(pinning.host, request->GetHost()) != 0) { valid = false; } if (request && pinning.port != request->port) { valid = false; } if (pinning.peer && !cbdataReferenceValid(pinning.peer)) { valid = false; === modified file 'src/comm/Connection.cc' --- src/comm/Connection.cc 2011-09-06 08:38:27 +0000 +++ src/comm/Connection.cc 2012-02-14 02:59:21 +0000 @@ -1,67 +1,69 @@ #include "config.h" #include "cbdata.h" #include "comm.h" #include "comm/Connection.h" #include "fde.h" #include "SquidTime.h" bool Comm::IsConnOpen(const Comm::ConnectionPointer &conn) { return conn != NULL && conn->isOpen(); } Comm::Connection::Connection() : local(), remote(), peerType(HIER_NONE), fd(-1), tos(0), flags(COMM_NONBLOCKING), + wasIdle(false), _peer(NULL) { *rfc931 = 0; // quick init the head. the rest does not matter. } static int64_t lost_conn = 0; Comm::Connection::~Connection() { if (fd >= 0) { debugs(5, 0, "BUG: Orphan Comm::Connection: " << *this); debugs(5, 0, "NOTE: " << ++lost_conn << " Orphans since last started."); close(); } cbdataReferenceDone(_peer); } Comm::ConnectionPointer Comm::Connection::copyDetails() const { ConnectionPointer c = new Comm::Connection; c->local = local; c->remote = remote; c->peerType = peerType; c->tos = tos; c->flags = flags; + // wasIdle is not a preserved detail // ensure FD is not open in the new copy. c->fd = -1; // ensure we have a cbdata reference to _peer not a straight ptr copy. c->_peer = cbdataReference(getPeer()); return c; } void Comm::Connection::close() { if (isOpen()) { comm_close(fd); fd = -1; if (getPeer()) getPeer()->stats.conn_open--; } } === modified file 'src/comm/Connection.h' --- src/comm/Connection.h 2011-07-12 05:52:07 +0000 +++ src/comm/Connection.h 2012-02-14 02:59:57 +0000 @@ -130,69 +130,74 @@ Ip::Address local; /** Address for the Remote end of a TCP link. */ Ip::Address remote; /** Hierarchy code for this connection link */ hier_code peerType; /** Socket used by this connection. Negative if not open. */ int fd; /** Quality of Service TOS values currently sent on this connection */ tos_t tos; /** Netfilter MARK values currently sent on this connection */ nfmark_t nfmark; /** COMM flags set on this connection */ int flags; + /** Whether a pconn race may exist (i.e., server closed while we idled). */ + bool wasIdle; + char rfc931[USER_IDENT_SZ]; #if USE_SQUID_EUI Eui::Eui48 remoteEui48; Eui::Eui64 remoteEui64; #endif private: // XXX: we need to call this member peer_ but the struct peer_ global type // behind peer* clashes despite our private Comm:: namespace // (it being global gets inherited here too). /** cache_peer data object (if any) */ peer *_peer; }; MEMPROXY_CLASS_INLINE(Connection); }; // namespace Comm // NP: Order and namespace here is very important. // * The second define inlines the first. // * Stream inheritance overloading is searched in the global scope first. inline std::ostream & operator << (std::ostream &os, const Comm::Connection &conn) { os << "local=" << conn.local << " remote=" << conn.remote; if (conn.fd >= 0) os << " FD " << conn.fd; if (conn.flags != COMM_UNSET) os << " flags=" << conn.flags; + if (conn.wasIdle) + os << " pconn"; #if USE_IDENT if (*conn.rfc931) os << " IDENT::" << conn.rfc931; #endif return os; } inline std::ostream & operator << (std::ostream &os, const Comm::ConnectionPointer &conn) { if (conn != NULL) os << *conn; return os; } #endif === modified file 'src/forward.cc' --- src/forward.cc 2012-02-05 21:55:51 +0000 +++ src/forward.cc 2012-02-14 17:49:25 +0000 @@ -553,41 +553,45 @@ default: return false; } return true; } void FwdState::serverClosed(int fd) { debugs(17, 2, HERE << "FD " << fd << " " << entry->url()); retryOrBail(); } void FwdState::retryOrBail() { if (checkRetry()) { debugs(17, 3, HERE << "re-forwarding (" << n_tries << " tries, " << (squid_curtime - start_t) << " secs)"); - serverDestinations.shift(); // last one failed. try another. + // we should retry the same destination if it failed due to pconn race + if (serverConn != NULL && !serverConn->wasIdle) + serverDestinations.shift(); // last one failed. try another. + else + debugs(17, 4, HERE << "retrying the same destination"); startConnectionOrFail(); return; } // TODO: should we call completed() here and move doneWithRetries there? doneWithRetries(); if (self != NULL && !err && shutting_down) { ErrorState *anErr = errorCon(ERR_SHUTTING_DOWN, HTTP_SERVICE_UNAVAILABLE, request); errorAppendEntry(entry, anErr); } self = NULL; // refcounted } // If the Server quits before nibbling at the request body, the body sender // will not know (so that we can retry). Call this if we will not retry. We // will notify the sender so that it does not get stuck waiting for space. void FwdState::doneWithRetries() @@ -913,41 +917,46 @@ if (pinned_connection->pinnedAuth()) request->flags.auth = 1; comm_add_close_handler(serverConn->fd, fwdServerClosedWrapper, this); dispatch(); return; } /* Failure. Fall back on next path */ debugs(17,2,HERE << " Pinned connection " << pinned_connection << " not valid."); serverDestinations.shift(); startConnectionOrFail(); return; } // Use pconn to avoid opening a new connection. const char *host; if (serverDestinations[0]->getPeer()) { host = serverDestinations[0]->getPeer()->host; } else { host = request->GetHost(); } - Comm::ConnectionPointer temp = fwdPconnPool->pop(serverDestinations[0], host, checkRetriable()); + + // force new connection for non-retriable requests and after pconn races + const bool sawPconnRace = n_tries > 0 && serverConn != NULL && serverConn->wasIdle; + const bool pconnAllowed = checkRetriable() && !sawPconnRace; + + Comm::ConnectionPointer temp = fwdPconnPool->pop(serverDestinations[0], host, pconnAllowed); // if we found an open persistent connection to use. use it. if (Comm::IsConnOpen(temp)) { serverConn = temp; debugs(17, 3, HERE << "reusing pconn " << serverConnection()); n_tries++; if (!serverConnection()->getPeer()) origin_tries++; comm_add_close_handler(serverConnection()->fd, fwdServerClosedWrapper, this); /* Update server side TOS and Netfilter mark on the connection. */ if (Ip::Qos::TheConfig.isAclTosActive()) { temp->tos = GetTosToServer(request); Ip::Qos::setSockTos(temp, temp->tos); } #if SO_MARK if (Ip::Qos::TheConfig.isAclNfmarkActive()) { temp->nfmark = GetNfmarkToServer(request); === modified file 'src/ftp.cc' --- src/ftp.cc 2011-10-21 16:20:42 +0000 +++ src/ftp.cc 2012-02-14 03:19:45 +0000 @@ -477,40 +477,42 @@ /* NP: failure recovery may be possible when its only a data.conn failure. * if the ctrl.conn is still fine, we can send ABOR down it and retry. * Just need to watch out for wider Squid states like shutting down or reconfigure. */ } FtpStateData::FtpStateData(FwdState *theFwdState, const Comm::ConnectionPointer &conn) : AsyncJob("FtpStateData"), ServerStateData(theFwdState) { const char *url = entry->url(); debugs(9, 3, HERE << "'" << url << "'" ); statCounter.server.all.requests++; statCounter.server.ftp.requests++; theSize = -1; mdtm = -1; if (Config.Ftp.passive && !flags.pasv_failed) flags.pasv_supported = 1; flags.rest_supported = 1; + conn->wasIdle = false; // no pconns for FTP + typedef CommCbMemFunT Dialer; AsyncCall::Pointer closer = JobCallback(9, 5, Dialer, this, FtpStateData::ctrlClosed); ctrl.opened(conn, closer); if (request->method == METHOD_PUT) flags.put = 1; } FtpStateData::~FtpStateData() { debugs(9, 3, HERE << entry->url() ); if (reply_hdr) { memFree(reply_hdr, MEM_8K_BUF); reply_hdr = NULL; } if (data.opener != NULL) { data.opener->cancel("FtpStateData destructed"); data.opener = NULL; === modified file 'src/gopher.cc' --- src/gopher.cc 2011-11-22 12:00:59 +0000 +++ src/gopher.cc 2012-02-14 03:14:19 +0000 @@ -981,25 +981,27 @@ /* Index URL without query word */ /* We have to generate search page back to client. No need for connection */ gopherMimeCreate(gopherState); if (gopherState->type_id == GOPHER_INDEX) { gopherState->conversion = gopher_ds::HTML_INDEX_PAGE; } else { if (gopherState->type_id == GOPHER_CSO) { gopherState->conversion = gopher_ds::HTML_CSO_PAGE; } else { gopherState->conversion = gopher_ds::HTML_INDEX_PAGE; } } gopherToHTML(gopherState, (char *) NULL, 0); fwd->complete(); return; } gopherState->serverConn = fwd->serverConnection(); + gopherState->serverConn->wasIdle = false; // no pconns for Gopher + gopherSendRequest(fwd->serverConnection()->fd, gopherState); AsyncCall::Pointer timeoutCall = commCbCall(5, 4, "gopherTimeout", CommTimeoutCbPtrFun(gopherTimeout, gopherState)); commSetConnTimeout(fwd->serverConnection(), Config.Timeout.read, timeoutCall); } === modified file 'src/http.cc' --- src/http.cc 2011-10-21 16:20:42 +0000 +++ src/http.cc 2012-02-14 03:10:05 +0000 @@ -1070,40 +1070,44 @@ } // handle I/O errors if (io.flag != COMM_OK || len < 0) { debugs(11, 2, HERE << io.conn << ": read failure: " << xstrerror() << "."); if (ignoreErrno(io.xerrno)) { flags.do_next_read = 1; } else { ErrorState *err; err = errorCon(ERR_READ_ERROR, HTTP_BAD_GATEWAY, fwd->request); err->xerrno = io.xerrno; fwd->fail(err); flags.do_next_read = 0; serverConnection->close(); } return; } + // no pconn races are possible if we have received something from server + if (serverConnection != NULL && len > 0) + serverConnection->wasIdle = false; + // update I/O stats if (len > 0) { readBuf->appended(len); reply_bytes_read += len; #if USE_DELAY_POOLS DelayId delayId = entry->mem_obj->mostBytesAllowed(); delayId.bytesIn(len); #endif kb_incr(&statCounter.server.all.kbytes_in, len); kb_incr(&statCounter.server.http.kbytes_in, len); IOStats.Http.reads++; for (clen = len - 1, bin = 0; clen; bin++) clen >>= 1; IOStats.Http.read_hist[bin]++; // update peer response time stats (%hier.peer_http_request_sent; === modified file 'src/pconn.cc' --- src/pconn.cc 2011-11-08 13:40:26 +0000 +++ src/pconn.cc 2012-02-14 03:01:29 +0000 @@ -155,40 +155,42 @@ size_ -= n; } if (parent_ && size_ == 0) { debugs(48, 3, HERE << "deleting " << hashKeyStr(&hash)); delete this; } } void IdleConnList::clearHandlers(const Comm::ConnectionPointer &conn) { debugs(48, 3, HERE << "removing close handler for " << conn); comm_read_cancel(conn->fd, IdleConnList::Read, this); commUnsetConnTimeout(conn); } void IdleConnList::push(const Comm::ConnectionPointer &conn) { + conn->wasIdle = true; + if (size_ == capacity_) { debugs(48, 3, HERE << "growing idle Connection array"); capacity_ <<= 1; const Comm::ConnectionPointer *oldList = theList_; theList_ = new Comm::ConnectionPointer[capacity_]; for (int index = 0; index < size_; index++) theList_[index] = oldList[index]; delete[] oldList; } if (parent_) parent_->noteConnectionAdded(); theList_[size_++] = conn; AsyncCall::Pointer readCall = commCbCall(5,4, "IdleConnList::Read", CommIoCbPtrFun(IdleConnList::Read, this)); comm_read(conn, fakeReadBuf_, sizeof(fakeReadBuf_), readCall); AsyncCall::Pointer timeoutCall = commCbCall(5,4, "IdleConnList::Timeout", CommTimeoutCbPtrFun(IdleConnList::Timeout, this));