/* simplified http client - I intend to migrate to WWW Library in due course!

   Note supports HTRQ/V1.0 and is backward compatible with earlier servers.
   Well known file extensions are recognised explicitly.

   Otherwise HTML and Postscript documents are recognised by the presence
   of key strings at/near the start of their data contents.

   For older servers which send a <PLAINTEXT>/r/n tag at the start of non-html
   files, this is treated as part of the header and not-passed to the display code.
*/

#include <X11/Xlib.h>
#include "www.h"

#include <sys/types.h>
#include <sys/socket.h>
#include <stdio.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <string.h>

extern int debug;
extern int UseHTTP2;
extern int AbortFlag;
extern int Authorize;
extern char *gateway;
extern char *user;

#define TEXTDOCUMENT    0
#define HTMLDOCUMENT    1

#define BUFSIZE     8192
#define THRESHOLD   1024

char *scheme[9] =
{
    "file",
    "http",
    "ftp",
    "news",
    "gopher",
    "telnet",
    "rlogin",
    "wais",
    "finger"
};


Doc NewDoc, CurrentDoc;
char *url;

/* return name of this host */

char *MyHostName(void)
{
    static char myhost[HOSTSIZ];

    if (*myhost)
        return myhost;

    if (gethostname(myhost, HOSTSIZ) != 0)
    {
        perror("can't get gethostname");
        return "";
    }

    return (char *)myhost;
}

/* check if the host is me */
int IsMyName(char *host)
{
    char *p, *me;

    me = MyHostName();

    /* full match on name, e.g. dragget.hpl.hp.com */

    if (strcasecmp(host, me) == 0)
        return 1;

    p = strchr(host, '.');

    /* match abbreviated host name: dragget with dragget.hpl.hp.com */

    if (!p && strncasecmp(host, me, strlen(host)) == 0)
        return 1;

    return 0;
}

/* safe version of malloc() */

char *safemalloc(int n)
{
    char *p;

    p = malloc(n);

    if (!p)
    {
        Beep();
        fprintf(stderr, "ERROR: malloc failed, memory exhausted!\n");
        exit(1);
    }

    return p;
}

/* duplicate  string s and ensure it is zero terminated
   where n is the maximum number of chars copied from s */

char *strndup(char *s, int n)
{
    char *p;

    p = malloc(n+1);

    if (!p)
    {
        Beep();
        fprintf(stderr, "ERROR: malloc failed, memory exhausted!\n");
        exit(1);
    }

    strncpy(p, s, n);
    p[n] = '\0';

    return p;
}

/* set current host, scheme etc to self  */

void InitCurrent(char *path)
{
    char *p;

    p = safemalloc(HOSTSIZ+1);

    if (gethostname(p, HOSTSIZ) != 0)
    {
        perror("gethostname");
        exit(1);
    }

    CurrentDoc.buffer = NULL;
    CurrentDoc.hdrlen = 0;
    CurrentDoc.length = 0;
    CurrentDoc.host = p;
    CurrentDoc.port = OLD_PORT;
    CurrentDoc.protocol = MYFILE;
    CurrentDoc.path = strdup(path);
    CurrentDoc.anchor = 0;
}


/* if host name contains "." check that it ends with ".hp.com" */

int not_hp_domain(char *name)
{
    char *p;

    p = strrchr(name, '.');

    if (p)
    {
        /* check for HP host address as 15.127.78.3 */

        if (strncmp(name, "15.", 3) == 0)
            return 0;

        /* otherwise check for host name ending with ".hp.com" */

        p -= 3;

        if (p >= name)
            return (strcasecmp(p, ".hp.com") == 0 ? 0 : 1);

        return 1;
    }

    return 0; /* no domain fields - probably a local hp node */
}

char *ProtcolName(int protocol)
{
    if (0 <= protocol && protocol < NPROTOCOLS)
        return scheme[protocol];

    return "unknown";
}

char *UnivRefLoc(Doc *doc)
{
    int n;
    char buf[512];

    if (doc->protocol == NEWS)
        sprintf(buf, "news:%s%n", doc->path, &n);
    else if (doc->protocol == FTP)
        sprintf(buf, "ftp://%s%s%n", doc->host, doc->path, &n);
    else if (doc->protocol == MYFILE)
    {
        sprintf(buf, "file:%s", doc->path);
        n = strlen(buf);
        
    }
    else if (doc->host)
        sprintf(buf, "%s://%s:%d%s%n", scheme[doc->protocol], doc->host, doc->port, doc->path, &n);

    if (doc->anchor && *doc->anchor != '\0')
        sprintf(buf+n, "#%s", doc->anchor);

    if (doc->url)
    {
        free(doc->url);
        doc->url = 0;
    }

    return strdup(buf);
}

/* how many chars to copy from current (absolute) path
   to get the required directory name

    n = 1       ./
    n = 2       ../
    n = 3       ../../

    etc.

   The number returned DOES include the final '/'
*/

int ParentDirCnt(char *path, int n)
{
    char *p;

    p = path + strlen(path);

    while (n > 0)
    {
        if (p == path)
            break;

        if (*--p == '/')
            --n;
    }

    return (1 + p - path);
}

/* parse document reference, setting up global params
   and returning pointer to expanded reference */

char *ParseReference(char *s, int where)
{
    char *p, *r, *anchor;
    int c, i, m, n, k, has_domain;

    FreeDoc(&NewDoc);  /* plug memory leaks */

    /* first look for a scheme: the text which occurs
       before a ":" and which must occur before any "/" char */

    if (where == LOCAL)
    {
        NewDoc.where = LOCAL;
        NewDoc.protocol = MYFILE;
    }
    else
    {
        NewDoc.where = CurrentDoc.where;
        NewDoc.protocol = CurrentDoc.protocol;
    }

    p = s;

    while (c = *p)
    {
        if (c == '/')
        {
            c = *s;
            goto parsehost;
        }

        if (c == ':')
        {
            NewDoc.where = REMOTE;
            n = p - s;
            *p = '\0';

            for (i = 0; i < NPROTOCOLS; ++i)
            {
                if (strcasecmp(s, scheme[i]) == 0)
                {
                    NewDoc.protocol = i;
                    break;
                }
            }

            *p = ':';

            if (i == TELNET)
            {
                Warn("telnet not yet supported: %s", s);
                return NULL;
            }

            if (i == NPROTOCOLS)
            {
                if (debug)
                    fprintf(stderr, "Unknown protocol in %s\n", s);

                Warn("Unknown protocol in %s", s);
                return NULL;
            }

            s = p+1;  /* s points to just after the ':' */

            /* check for local file reference "file:/tmp/fred.html"
                or misuse of file: in place of ftp: */

            if (NewDoc.protocol == MYFILE)
            {
                /* local file ref ? */

                if ( !(p[1] == '/' && p[2] == '/') )
                {
                    NewDoc.port = 0;
                    NewDoc.host = strdup(MyHostName());
                    NewDoc.where = LOCAL;   /* could cause problems */
                    goto parse_path;
                }

                NewDoc.protocol = FTP;
            }

            /* check for news: scheme */

            if (NewDoc.protocol == NEWS && strncmp(s, "//", 2) != 0)
            {
                n = strlen(s);
                NewDoc.path = malloc(n+1);
                strcpy(NewDoc.path, s);
                NewDoc.port = 0;
                NewDoc.host = strdup(NEWS_SERVER);
                NewDoc.anchor = NULL;
                NewDoc.url = UnivRefLoc(&NewDoc);

                return NewDoc.url;
            }

            c = *s;
            goto parsehost;
        }

        ++p;
    }

    NewDoc.protocol = CurrentDoc.protocol;

  parsehost:

    /* the chars following "//" and preceding a ":" or a "/" */

    if (c == '/' && s[1] == '/')
    {
        p = s + 2;
        has_domain = 0;

        for (i = 0; (c = *p) && c != '/' && c != ':'; ++i)
        {
            ++p;

            if (c == '.')
                has_domain = 1;
        }

        /* check if we need to add local domain */

        if (has_domain)
        {
            NewDoc.host = safemalloc(i+1);
            memcpy(NewDoc.host, s+2, i);
            NewDoc.host[i] = '\0';
        }
        else
        {
            r = strchr(MyHostName(), '.');

            if (r)
            {
                n = strlen(r);
                NewDoc.host = safemalloc(i+n+1);
                memcpy(NewDoc.host, s+2, i);
                strcpy(NewDoc.host+i, r);
            }
            else /* my host doesn't contain domain name */
            {
                fprintf(stderr, "ERROR - local hostname needs full internet domain\n");
                NewDoc.host = strndup(s+2, i);
            }
        }

        s = p;
    }
    else if (NewDoc.protocol == MYFILE || NewDoc.protocol == HTTP)
        NewDoc.host = strdup(CurrentDoc.host);

    /* at this stage c == *s and s points to ':' of port name or '/' of path */

    if (c == ':')  /* then parse port */
    {
        sscanf(s+1, "%d", &NewDoc.port);

        /* and skip to next '/' char */

        while ((c = *s), c != '/')
            ++s;
    }
    else if (NewDoc.protocol == GOPHER)
        NewDoc.port = GOPHERPORT;
    else if (strcasecmp(NewDoc.host, CurrentDoc.host) == 0)
        NewDoc.port = CurrentDoc.port;
    else
        NewDoc.port = HTTP_PORT;

  parse_path:

    anchor = strchr(s, '#');

    if (anchor)
    {
        k = anchor - s;
        NewDoc.anchor = strdup(anchor+1);
    }
    else
    {
        k = strlen(s);
        NewDoc.anchor = 0;
    }

    /* the path is absolute if scheme and host were present

       if missing, the path may start with "./" or perhaps
       "../" or even "../../../" etc.

       If the current path is absolute then use it to
       convert the new path to its absolute form */


    if ( s[0] == '/' || NewDoc.protocol != CurrentDoc.protocol ||
             (NewDoc.protocol != HTTP &&
              NewDoc.protocol != MYFILE && strcmp(NewDoc.path, CurrentDoc.path) != 0) )
    {   /* then absolute path */
        NewDoc.path = safemalloc(1+k);
        memcpy(NewDoc.path, s, k);
        NewDoc.path[k] = '\0';
    }
    else if (s[0] == '.' && s[1] == '/')  /* current directory */
    {
        if (CurrentDoc.path[0] == '/')  /* absolute path */
        {
            n = ParentDirCnt(CurrentDoc.path, 1);
            NewDoc.path = safemalloc(n + k - 2 + 1);
            memcpy(NewDoc.path, CurrentDoc.path, n);
            memcpy(NewDoc.path+n, s+2, k - 2);
            NewDoc.path[n + k - 2] = '\0';
        }
        else
        {
            NewDoc.path = safemalloc(1+k);
            memcpy(NewDoc.path, s, k);
            NewDoc.path[k] = '\0';
        }
    }
    else if (s[0] == '.' && s[1] == '.' && s[2] == '/') /* parent directory */
    {
        m = 1;
        p = s;

        do
        {
            ++m;
            p += 3;
        }
        while (p[0] == '.' && p[1] == '.' && p[2] == '/');

        if (CurrentDoc.path[0] == '/')  /* absolute path */
        {
            n = ParentDirCnt(CurrentDoc.path, m);
            NewDoc.path = safemalloc(n + k - (p - s) + 1);
            memcpy(NewDoc.path, CurrentDoc.path, n);
            memcpy(NewDoc.path+n, p, k - (p - s));
            NewDoc.path[n + k - (p - s)] = '\0';
        }
        else
        {
            NewDoc.path = safemalloc(1+k);
            memcpy(NewDoc.path, s, k);
            NewDoc.path[k] = '\0';
        }
    }
    else if (NewDoc.protocol == GOPHER)
        NewDoc.path = strdup("/");
    else /* must be in current directory */
    {
        if (CurrentDoc.path[0] == '/')  /* absolute path */
        {
            n = ParentDirCnt(CurrentDoc.path, 1);
            NewDoc.path = safemalloc(n + k + 1);
            memcpy(NewDoc.path, CurrentDoc.path, n);
            memcpy(NewDoc.path+n, s, k);
            NewDoc.path[n + k] = '\0';
        }
        else
        {
            NewDoc.path = safemalloc(1+k);
            memcpy(NewDoc.path, s, k);
            NewDoc.path[k] = '\0';
        }
    }

    NewDoc.url = UnivRefLoc(&NewDoc);

    return NewDoc.url;
}

#define ToHex(c)    (c > 9 ? c + 'A' - 10 : c + '0')

#define IsSearchSym(c)  (c == '$' || c == '_' || c == '@' || c == '!' || c == '%' \
     || c == '^' || c == '&' || c == '*' || c == '(' || c == ')' || c == '.')

/* SearchRef - uses current params plus keywords to build search reference */
char *SearchRef(char *keywords)
{
    int c, d;
    char *p;
    static char buf[256];

    sprintf(buf, "http://%s:%d%s?", CurrentDoc.host, CurrentDoc.port, CurrentDoc.path);

    c = strlen(buf);

    /* avoid double question mark for case when base document ends with "?" */

    if (buf[c-2] == '?')
        --c;

    p = buf + c;

    /* skip leading white space */

    while ((c = *keywords) && (c == ' '))
        ++keywords;

    while ((c = *keywords++))
    {
        if (p - buf > 254)
            break;

        /* comma or space char separates keywords */

        if (c == ',' || c == ' ')
        {
            while ((c = *keywords++) && (c == ' ' || c == ','));

            if (c == '\0')
                break;

            *p++ = '+';
        }

        if (isalpha(c) || isdigit(c) || IsSearchSym(c))
        {
            *p++ = c;
            continue;
        }

        d = c & 0xF;
        c = (c >> 4) & 0xF;

        *p++ = '%';
        *p++ = ToHex(c);
        *p++ = ToHex(d);
    }

    *p = '\0';

    return buf;
}

char *HTRQrequestString(Doc *doc, int *len, char *who)
{
    int c, n, dn;
    static char buf[512];

    strcpy(buf, "GET ");
    n = 4;
/*
    strcpy(buf, "HEAD ");
    n = 5;
*/
    sprintf(buf+n, "%s%n", doc->path, &dn);
    n += dn;

    /* Gopher servers don't understand HTRQ/V1.0 */

    if (!UseHTTP2 || doc->port == 70 || doc->protocol == GOPHER)
    {
        sprintf(buf+n, "\r\n%n", &dn);
        n += dn;
    }
    else
    {
        sprintf(buf+n, " HTRQ/V1.0\r\n%n", &dn);
        n += dn;

        if (who)
	{
            sprintf(buf+n, "Authorization: user %s\r\n\r\n%n", who, &dn);
            n += dn;
        }
        else if (user)
	{
            sprintf(buf+n, "Authorization: user %s\r\n\r\n%n", user, &dn);
            n += dn;
        }
        else
	{
            sprintf(buf+n, "\r\n\r\n%n", &dn);
            n += dn;
        }
    }

    *len = n;
    return buf;
}

/* search for <H1> or <TITLE> tags in first 1K of text */

int IsHTMLDoc(char *p, int len)
{
    int i;

    i = 1024;

    for (;;)
    {
        if (len-- <= 0)
            return 0;

        if (i-- <= 0)
            return 0;

        if (*p != '<')
        {
            ++p;
            continue;
        }

        if (strncasecmp(p, "</title>", 7) == 0)
            return 1;

        if (strncasecmp(p, "<h1>", 4) == 0)
            return 1;

        if (strncasecmp(p, "<li>", 4) == 0)
            return 1;

        if (strncasecmp(p, "<xmp>", 5) == 0)
            return 1;

        if (strncasecmp(p, "<pre>", 5) == 0)
            return 1;

        if (strncasecmp(p, "<isindex>", 9) == 0)
            return 1;

        ++p;
    }
}

int RecogniseMimeType(char *p)
{
    if (strncmp(p, "text/html", 9) == 0)
        return HTMLDOCUMENT;

    if (strncmp(p, "image/gif", 9) == 0)
        return XVDOCUMENT;
}

int HeaderLength(char *buf, int *type)
{
    int c;
    char *p;

    if (strncmp(buf, "HTTP", 4) != 0)
        return 0;

    p = buf;

    while (p)
    {
        p = strchr(p, '\n');

        ++p;

        if (strncasecmp(p, "content-type: ", 14) == 0)
            *type = RecogniseMimeType(p+14);

        if (*p == '\n')
            return 1 + p - buf;

        if (*p == '\r' && *++p == '\n')
            return 1 + p - buf;

    }

    return 0;
}

/* Get specified href according to where parameter:

        a)  LOCAL defaults to direct file access
        b)  REMOTE defaults to CurrentDoc.where

    who is NULL or a usename:password pair for authorization
    where specifies if href is to be interpreted as a local file name.

    The global NewDoc is setup for all fields except for height.
    The global url is set to the normalised href.
*/

char *GetDocument(char *href, char *who, int where)
{
    int s, n, hlen, len, vg;
    long cp;
    char *p, *request, *buf;

    NewDoc.length = 0;

    /* parse href to unpack fields and and create absolute URL */

    if (!ParseReference(href, where))   /* itself responsible for warnings */
        return NULL;

    if (NewDoc.where)
        return GetFile(href);

    /* manage name:password */

    if (who)
        StoreNamePW(who);
    else
        who = RetrieveNamePW();

    /* next check if document is in cache - if not
       if sets NewDoc.cache to suitable filename
       to store retrieved data in shared cache */

    if (NewDoc.where == REMOTE && (buf = GetCachedDoc()))
        return buf;

    ShowAbortButton(1);

    if (NewDoc.protocol == NEWS)
    {
        buf = GetNewsDocument(NewDoc.host, NewDoc.path);
        RegisterDoc(buf);
    }

    if (NewDoc.protocol == FTP)
    {
        buf = GetFTPdocument(NewDoc.host, NewDoc.path, who);
        RegisterDoc(buf);
    }

    /* check if we could get the document directly - note kludge for own system */

    if (NewDoc.protocol == MYFILE || ((NewDoc.protocol == HTTP || NewDoc.protocol == FTP) && IsMyName(NewDoc.host)))
    {
        ShowAbortButton(0);
        return GetFile(NewDoc.path);
    }

    /* create a socket */

    s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);

    if (s == -1)
    {
        ShowAbortButton(0);
        Warn("Couldn't create a socket for %s!\n", NewDoc.host);
        return NULL;
    }

    Announce("Connecting to %s on port %d", NewDoc.host, NewDoc.port);

    vg = 0;

    if (!Connect(s, NewDoc.host, NewDoc.port, &vg))
    {
        if (NewDoc.port != OLD_PORT)
        {
            if (!Authorize)
                ShowAbortButton(0);

            return NULL;
        }

        Announce("Retrying %s on port %d", NewDoc.host, HTTP_PORT);

        s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);

        if (s == -1)
        {
            ShowAbortButton(0);
            Warn("Couldn't create a socket for %s!\n", NewDoc.host);
            return NULL;
        }

        if (!Connect(s, NewDoc.host, HTTP_PORT, &vg))
        {
            if (!Authorize)
                ShowAbortButton(0);

            return NULL;
        }

        NewDoc.port = HTTP_PORT;
        NewDoc.url = UnivRefLoc(&NewDoc);
    }

    /* set up request string: "GET http://fred.hp.com/hypertext/pub/junk.html" */

    request = HTRQrequestString(&NewDoc, &len, who);

    Announce(TextLine(request));

    /* and send it to server */

    if (XPSend(s, request, len, 0) != len)
    {
        ShowAbortButton(0);
        Warn("Couldn't make connection with %s on port %d!\n", NewDoc.host, NewDoc.port);
        return NULL;
    }

    NewDoc.buffer = GetData(s, &len);

    if (debug)
        printf("received a total of %d bytes\n", len);

    if (!NewDoc.buffer)
    {
        ShowAbortButton(0);
        Beep();
        SetStatusString(NULL);
        return NULL;
    }

    if (len == 0)
    {
        ShowAbortButton(0);
        Warn("No data available for %s", NewDoc.url);
        FreeDoc(&NewDoc);
        return NULL;
    }

    /* check for HTTP status code of 401 i.e. unauthorized access */

    if ((strncmp(NewDoc.buffer, "HTTP/", 5) == 0) && (p = strchr(NewDoc.buffer, ' ')))
    {
        sscanf(p+1, "%d", &n);

        if (n == 401)
        {
            Beep();
            GetAuthorization(REMOTE, NewDoc.url);
            return NULL;
        }
        else if (n != 200)
            Beep();

        NewDoc.hdrlen = HeaderLength(NewDoc.buffer, &NewDoc.type);
    }
    else if (strncasecmp(NewDoc.buffer, "<plaintext>", 11) == 0)
    {  /* strip <PLAINTEXT>/r/n tag at start of text files */
        n = 11;

        if (NewDoc.buffer[n] == '\r')
            ++n;

        if (NewDoc.buffer[n] == '\n')
            ++n;

        NewDoc.hdrlen = n;

        if (NewDoc.type == HTMLDOCUMENT)
            NewDoc.type = TEXTDOCUMENT;
    }

    ShowAbortButton(0);

  /* buffer and length will alter if file is decompressed */

    NewDoc.length = len;
    NewDocumentType();  /* determine document type */

    RegisterDoc(NewDoc.buffer);
    Announce(NewDoc.url);
    return NewDoc.buffer;
}