/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 * Portions of this software are based upon public domain software
 * originally written at the National Center for Supercomputing Applications,
 * University of Illinois, Urbana-Champaign.
 */

/* HTTP routines for Apache proxy */

#include "mod_proxy.h"
#include "http_log.h"
#include "http_main.h"
#include "http_core.h"
#include "util_date.h"

/*
 * Canonicalise http-like URLs.
 *  scheme is the scheme for the URL
 *  url    is the URL starting with the first '/'
 *  def_port is the default port for this scheme.
 */
int ap_proxy_http_canon(request_rec *r, char *url, const char *scheme, int def_port)
{
    char *host, *path, *search, sport[7];
    const char *err;
    int port;

    /* do syntatic check.
     * We break the URL into host, port, path, search
     */
    port = def_port;
    err = ap_proxy_canon_netloc(r->pool, &url, NULL, NULL, &host, &port);
    if (err)
        return HTTP_BAD_REQUEST;

    /* now parse path/search args, according to rfc1738 */
    /* N.B. if this isn't a true proxy request, then the URL _path_
     * has already been decoded.  True proxy requests have r->uri
     * == r->unparsed_uri, and no others have that property.
     */
    if (r->uri == r->unparsed_uri) {
        search = strchr(url, '?');
        if (search != NULL)
            *(search++) = '\0';
    }
    else
        search = r->args;

    /* process path */
    path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path,
                             r->proxyreq);
    if (path == NULL)
        return HTTP_BAD_REQUEST;

    if (port != def_port)
        ap_snprintf(sport, sizeof(sport), ":%d", port);
    else
        sport[0] = '\0';

    r->filename = ap_pstrcat(r->pool, "proxy:", scheme, "://", host, sport, "/",
                   path, (search) ? "?" : "", (search) ? search : "", NULL);
    return OK;
}

/* handle the conversion of URLs in the ProxyPassReverse function */
static const char *proxy_location_reverse_map(request_rec *r, const char *url)
{
    void *sconf;
    proxy_server_conf *conf;
    struct proxy_alias *ent;
    int i, l1, l2;
    char *u;

    sconf = r->server->module_config;
    conf = (proxy_server_conf *)ap_get_module_config(sconf, &proxy_module);
    l1 = strlen(url);
    ent = (struct proxy_alias *)conf->raliases->elts;
    for (i = 0; i < conf->raliases->nelts; i++) {
        l2 = strlen(ent[i].real);
        if (l1 >= l2 && strncmp(ent[i].real, url, l2) == 0) {
            u = ap_pstrcat(r->pool, ent[i].fake, &url[l2], NULL);
            return ap_construct_url(r->pool, u, r);
        }
    }
    return url;
}

/*
 * This handles http:// URLs, and other URLs using a remote proxy over http
 * If proxyhost is NULL, then contact the server directly, otherwise
 * go via the proxy.
 * Note that if a proxy is used, then URLs other than http: can be accessed,
 * also, if we have trouble which is clearly specific to the proxy, then
 * we return DECLINED so that we can try another proxy. (Or the direct
 * route.)
 */
int ap_proxy_http_handler(request_rec *r, cache_req *c, char *url,
                       const char *proxyhost, int proxyport)
{
    const char *strp;
    char *strp2;
    const char *err, *desthost;
    int i, j, sock, len, backasswards;
    table *req_hdrs, *resp_hdrs;
    array_header *reqhdrs_arr;
    table_entry *reqhdrs_elts;
    struct sockaddr_in server;
    struct in_addr destaddr;
    struct hostent server_hp;
    BUFF *f;
    char buffer[HUGE_STRING_LEN];
    char portstr[32];
    pool *p = r->pool;
    int destport = 0;
    char *destportstr = NULL;
    const char *urlptr = NULL;
    const char *datestr, *urlstr;

    void *sconf = r->server->module_config;
    proxy_server_conf *conf =
    (proxy_server_conf *) ap_get_module_config(sconf, &proxy_module);
    struct noproxy_entry *npent = (struct noproxy_entry *) conf->noproxies->elts;
    struct nocache_entry *ncent = (struct nocache_entry *) conf->nocaches->elts;
    int nocache = 0;

    if (conf->cache.root == NULL) nocache = 1;

    memset(&server, '\0', sizeof(server));
    server.sin_family = AF_INET;

    /* We break the URL into host, port, path-search */

    urlptr = strstr(url, "://");
    if (urlptr == NULL)
        return HTTP_BAD_REQUEST;
    urlptr += 3;
    destport = DEFAULT_HTTP_PORT;
    strp = strchr(urlptr, '/');
    if (strp == NULL) {
        desthost = ap_pstrdup(p, urlptr);
        urlptr = "/";
    }
    else {
        char *q = ap_palloc(p, strp - urlptr + 1);
        memcpy(q, urlptr, strp - urlptr);
        q[strp - urlptr] = '\0';
        urlptr = strp;
        desthost = q;
    }

    strp2 = strchr(desthost, ':');
    if (strp2 != NULL) {
        *(strp2++) = '\0';
        if (ap_isdigit(*strp2)) {
            destport = atoi(strp2);
            destportstr = strp2;
        }
    }

    /* check if ProxyBlock directive on this host */
    destaddr.s_addr = ap_inet_addr(desthost);
    for (i = 0; i < conf->noproxies->nelts; i++) {
        if (destaddr.s_addr == npent[i].addr.s_addr ||
            (npent[i].name != NULL &&
              (npent[i].name[0] == '*' || strstr(desthost, npent[i].name) != NULL)))
            return ap_proxyerror(r, HTTP_FORBIDDEN,
                                 "Connect to remote machine blocked");
    }

    if (proxyhost != NULL) {
        server.sin_port = htons((unsigned short)proxyport);
        err = ap_proxy_host2addr(proxyhost, &server_hp);
        if (err != NULL)
            return DECLINED;    /* try another */
    }
    else {
        server.sin_port = htons((unsigned short)destport);
        err = ap_proxy_host2addr(desthost, &server_hp);
        if (err != NULL)
            return ap_proxyerror(r, HTTP_INTERNAL_SERVER_ERROR, err);
    }


    /* we have worked out who exactly we are going to connect to, now
     * make that connection...
     */
    sock = ap_psocket(p, PF_INET, SOCK_STREAM, IPPROTO_TCP);
    if (sock == -1) {
        ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
                    "proxy: error creating socket");
        return HTTP_INTERNAL_SERVER_ERROR;
    }

#if !defined(TPF) && !defined(BEOS)
    if (conf->recv_buffer_size) {
        if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
                       (const char *) &conf->recv_buffer_size, sizeof(int))
            == -1) {
            ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
                         "setsockopt(SO_RCVBUF): Failed to set ProxyReceiveBufferSize, using default");
        }
    }
#endif

#ifdef SINIX_D_RESOLVER_BUG
    {
        struct in_addr *ip_addr = (struct in_addr *) *server_hp.h_addr_list;

        for (; ip_addr->s_addr != 0; ++ip_addr) {
            memcpy(&server.sin_addr, ip_addr, sizeof(struct in_addr));
            i = ap_proxy_doconnect(sock, &server, r);
            if (i == 0)
                break;
        }
    }
#else
    j = 0;
    while (server_hp.h_addr_list[j] != NULL) {
        memcpy(&server.sin_addr, server_hp.h_addr_list[j],
               sizeof(struct in_addr));
        i = ap_proxy_doconnect(sock, &server, r);
        if (i == 0)
            break;
        j++;
    }
#endif
    if (i == -1) {
        if (proxyhost != NULL)
            return DECLINED;    /* try again another way */
        else
            return ap_proxyerror(r, HTTP_BAD_GATEWAY, ap_pstrcat(r->pool,
                                "Could not connect to remote machine: ",
                                strerror(errno), NULL));
    }

    /* record request_time for HTTP/1.1 age calculation */
    c->req_time = time(NULL);

    /* build upstream-request headers by stripping r->headers_in from
     * connection specific headers.
     * We must not remove the Connection: header from r->headers_in,
     * we still have to react to Connection: close
     */
    req_hdrs = ap_copy_table(r->pool, r->headers_in);
    ap_proxy_clear_connection(r->pool, req_hdrs);

    /* At this point, we start sending the HTTP/1.1 request to the
     * remote server (proxy or otherwise).
     */
    f = ap_bcreate(p, B_RDWR | B_SOCKET);
    ap_bpushfd(f, sock, sock);

    ap_hard_timeout("proxy send", r);
    ap_bvputs(f, r->method, " ", proxyhost ? url : urlptr, " HTTP/1.0" CRLF,
           NULL);
    /* Send Host: now, adding it to req_hdrs wouldn't be much better */
    if (destportstr != NULL && destport != DEFAULT_HTTP_PORT)
        ap_bvputs(f, "Host: ", desthost, ":", destportstr, CRLF, NULL);
    else
        ap_bvputs(f, "Host: ", desthost, CRLF, NULL);

    if (conf->viaopt == via_block) {
        /* Block all outgoing Via: headers */
        ap_table_unset(req_hdrs, "Via");
    } else if (conf->viaopt != via_off) {
        /* Create a "Via:" request header entry and merge it */
        i = ap_get_server_port(r);
        if (ap_is_default_port(i,r)) {
            strcpy(portstr,"");
        } else {
            ap_snprintf(portstr, sizeof portstr, ":%d", i);
        }
        /* Generate outgoing Via: header with/without server comment: */
        ap_table_mergen(req_hdrs, "Via",
                    (conf->viaopt == via_full)
                        ? ap_psprintf(p, "%d.%d %s%s (%s)",
                                HTTP_VERSION_MAJOR(r->proto_num),
                                HTTP_VERSION_MINOR(r->proto_num),
                                ap_get_server_name(r), portstr,
                                SERVER_BASEVERSION)
                        : ap_psprintf(p, "%d.%d %s%s",
                                HTTP_VERSION_MAJOR(r->proto_num),
                                HTTP_VERSION_MINOR(r->proto_num),
                                ap_get_server_name(r), portstr)
                        );
    }

    /* Add X-Forwarded-For: so that the upstream has a chance to
       determine, where the original request came from. */
    ap_table_mergen(req_hdrs, "X-Forwarded-For", r->connection->remote_ip);
    
    /* we don't yet support keepalives - but we will soon, I promise! */
    ap_table_set(req_hdrs, "Connection", "close");

    reqhdrs_arr = ap_table_elts(req_hdrs);
    reqhdrs_elts = (table_entry *) reqhdrs_arr->elts;
    for (i = 0; i < reqhdrs_arr->nelts; i++) {
        if (reqhdrs_elts[i].key == NULL || reqhdrs_elts[i].val == NULL

        /* Clear out hop-by-hop request headers not to send:
         * RFC2616 13.5.1 says we should strip these headers:
         */
            || !strcasecmp(reqhdrs_elts[i].key, "Host") /* Already sent */
            || !strcasecmp(reqhdrs_elts[i].key, "Keep-Alive")
            || !strcasecmp(reqhdrs_elts[i].key, "TE")
            || !strcasecmp(reqhdrs_elts[i].key, "Trailer")
            || !strcasecmp(reqhdrs_elts[i].key, "Transfer-Encoding")
            || !strcasecmp(reqhdrs_elts[i].key, "Upgrade")

            /* XXX: @@@ FIXME: "Proxy-Authorization" should *only* be 
             * suppressed if THIS server requested the authentication,
             * not when a frontend proxy requested it!
             *
             * The solution to this problem is probably to strip out
             * the Proxy-Authorisation header in the authorisation
             * code itself, not here. This saves us having to signal
             * somehow whether this request was authenticated or not.
             */
            || !strcasecmp(reqhdrs_elts[i].key, "Proxy-Authorization"))
            continue;
        ap_bvputs(f, reqhdrs_elts[i].key, ": ", reqhdrs_elts[i].val, CRLF, NULL);
    }

    /* the obligatory empty line to mark the end of the headers */
    ap_bputs(CRLF, f);

    /* send the request data, if any. */
    if (ap_should_client_block(r)) {
        while ((i = ap_get_client_block(r, buffer, sizeof buffer)) > 0)
            ap_bwrite(f, buffer, i);
    }
    ap_bflush(f);
    ap_kill_timeout(r);


    /* Right - now it's time to listen for a response.
     */
    ap_hard_timeout("proxy receive", r);

    len = ap_bgets(buffer, sizeof buffer - 1, f);
    if (len == -1) {
        ap_bclose(f);
        ap_kill_timeout(r);
        ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
                     "ap_bgets() - proxy receive - Error reading from remote server %s (length %d)",
                     proxyhost ? proxyhost : desthost, len);
        return ap_proxyerror(r, HTTP_BAD_GATEWAY,
                             "Error reading from remote server");
    } else if (len == 0) {
        ap_bclose(f);
        ap_kill_timeout(r);
        return ap_proxyerror(r, HTTP_BAD_GATEWAY,
                             "Document contains no data");
    }

    /* Is it an HTTP/1 response?
     * Do some sanity checks on the response.
     * (This is buggy if we ever see an HTTP/1.10)
     */
    if (ap_checkmask(buffer, "HTTP/#.# ###*")) {
        int major, minor;
        if (2 != sscanf(buffer, "HTTP/%u.%u", &major, &minor)) {
            /* if no response, default to HTTP/1.1 - is this correct? */
            major = 1;
            minor = 1;
        }

        /* If not an HTTP/1 message or if the status line was > 8192 bytes */
        if (buffer[5] != '1' || buffer[len - 1] != '\n') {
            ap_bclose(f);
            ap_kill_timeout(r);
            return HTTP_BAD_GATEWAY;
        }
        backasswards = 0;
        buffer[--len] = '\0';

        buffer[12] = '\0';
        r->status = atoi(&buffer[9]);
        buffer[12] = ' ';
        r->status_line = ap_pstrdup(p, &buffer[9]);

        /* read the response headers. */
        /* N.B. for HTTP/1.0 clients, we have to fold line-wrapped headers */
        /* Also, take care with headers with multiple occurences. */

        resp_hdrs = ap_proxy_read_headers(r, buffer, HUGE_STRING_LEN, f);
        if (resp_hdrs == NULL) {
            ap_log_error(APLOG_MARK, APLOG_WARNING|APLOG_NOERRNO, r->server,
                 "proxy: Bad HTTP/%d.%d header returned by %s (%s)",
                 major, minor, r->uri, r->method);
            resp_hdrs = ap_make_table(p, 20);
            nocache = 1;    /* do not cache this broken file */
        }

        /* handle Via header in the response */
        if (conf->viaopt != via_off && conf->viaopt != via_block) {
            /* Create a "Via:" response header entry and merge it */
            i = ap_get_server_port(r);
            if (ap_is_default_port(i,r)) {
                strcpy(portstr,"");
            } else {
                ap_snprintf(portstr, sizeof portstr, ":%d", i);
            }
            ap_table_mergen((table *)resp_hdrs, "Via",
                            (conf->viaopt == via_full)
                            ? ap_psprintf(p, "%d.%d %s%s (%s)",
                                major, minor,
                                ap_get_server_name(r), portstr,
                                SERVER_BASEVERSION)
                            : ap_psprintf(p, "%d.%d %s%s",
                                major, minor,
                                ap_get_server_name(r), portstr)
                            );
        }

        /* strip hop-by-hop headers defined by Connection */
        ap_proxy_clear_connection(p, resp_hdrs);
        /* Now add out bound headers set by other modules */
        resp_hdrs = ap_overlay_tables(r->pool, r->err_headers_out, resp_hdrs);
}
    else {
        /* an http/0.9 response */
        backasswards = 1;
        r->status = 200;
        r->status_line = "200 OK";

        /* no headers */
        resp_hdrs = ap_make_table(p, 20);
    }

    ap_kill_timeout(r);

    /*
     * HTTP/1.1 requires us to accept 3 types of dates, but only generate
     * one type
     */
    if ((datestr = ap_table_get(resp_hdrs, "Date")) != NULL)
        ap_table_set(resp_hdrs, "Date", ap_proxy_date_canon(p, datestr));
    if ((datestr = ap_table_get(resp_hdrs, "Last-Modified")) != NULL)
        ap_table_set(resp_hdrs, "Last-Modified", ap_proxy_date_canon(p, datestr));
    if ((datestr = ap_table_get(resp_hdrs, "Expires")) != NULL)
        ap_table_set(resp_hdrs, "Expires", ap_proxy_date_canon(p, datestr));

    /* handle the ProxyPassReverse mappings */
    if ((urlstr = ap_table_get(resp_hdrs, "Location")) != NULL)
      ap_table_set(resp_hdrs, "Location", proxy_location_reverse_map(r, urlstr));
    if ((urlstr = ap_table_get(resp_hdrs, "URI")) != NULL)
      ap_table_set(resp_hdrs, "URI", proxy_location_reverse_map(r, urlstr));
    if ((urlstr = ap_table_get(resp_hdrs, "Content-Location")) != NULL)
      ap_table_set(resp_hdrs, "Content-Location", proxy_location_reverse_map(r , urlstr));

/* check if NoCache directive on this host */
    if (nocache == 0) {
        for (i = 0; i < conf->nocaches->nelts; i++) {
            if (destaddr.s_addr == ncent[i].addr.s_addr ||
                (ncent[i].name != NULL &&
                  (ncent[i].name[0] == '*' ||
                   strstr(desthost, ncent[i].name) != NULL))) {
               nocache = 1;
               break;
            }
        }

    /* update the cache file, possibly even fulfilling the request if
     * it turns out a conditional allowed us to serve the object from the
     * cache...
     */
    i = ap_proxy_cache_update(c, resp_hdrs, !backasswards, nocache);
    if (i != DECLINED) {
        ap_bclose(f);
        return i;
    }

        /* write status line and headers to the cache file */
        ap_proxy_write_headers(c, ap_pstrcat(p, "HTTP/1.1 ", r->status_line, NULL), resp_hdrs);
    }

    /* Setup the headers for our client from upstreams response-headers */
    ap_overlap_tables(r->headers_out, resp_hdrs, AP_OVERLAP_TABLES_SET);
    /* Add X-Cache header */
    ap_table_setn(r->headers_out, "X-Cache",
                  ap_pstrcat(r->pool, "MISS from ",
                             ap_get_server_name(r), NULL));
    /* The Content-Type of this response is the upstream one. */
    r->content_type = ap_table_get (r->headers_out, "Content-Type");
    ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server, "Content-Type: %s", r->content_type);
    /* finally output the headers to the client */
    ap_send_http_header(r);

    /* Is it an HTTP/0.9 respose? If so, send the extra data we read
       from upstream as the start of the reponse to client */
    if (backasswards) {
        ap_hard_timeout("proxy send assbackward", r);

        ap_bwrite(r->connection->client, buffer, len);
        if (c != NULL && c->fp != NULL && ap_bwrite(c->fp, buffer, len) != len) {
            ap_log_rerror(APLOG_MARK, APLOG_ERR, c->req,
                "proxy: error writing extra data to %s", c->tempfile);
            c = ap_proxy_cache_error(c);
        }
        ap_kill_timeout(r);
    }


#ifdef CHARSET_EBCDIC
    /* What we read/write after the header should not be modified
     * (i.e., the cache copy is ASCII, not EBCDIC, even for text/html)
     */
    ap_bsetflag(f, B_ASCII2EBCDIC|B_EBCDIC2ASCII, 0);
    ap_bsetflag(r->connection->client, B_ASCII2EBCDIC|B_EBCDIC2ASCII, 0);
#endif

/* send body */
/* if header only, then cache will be NULL */
/* HTTP/1.0 tells us to read to EOF, rather than content-length bytes */
/* XXX CHANGEME: We want to eventually support keepalives, which means
 * we must read content-length bytes... */
    if (!r->header_only) {
/* we need to set this for ap_proxy_send_fb()... */
        c->cache_completion = conf->cache.cache_completion;

/* XXX CHECKME: c->len should be the expected content length, or -1 if the
 * content length is not known. We need to make 100% sure c->len is always
 * set correctly before we get here to correctly do keepalive.
 */
        ap_proxy_send_fb(f, r, c, c->len, 0);
    }

    ap_proxy_cache_tidy(c);

    ap_bclose(f);

    ap_proxy_garbage_coll(r);
    return OK;
}