/*	Find name of host by calling nameserver
**	---------------------------------------
**
** Language:	ANSI C
*/

#include <stdio.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <libc.h>

#include "../../Implementation/HTParse.h"

#define HOST_OFFSET 25		/* Offset to host number in status line */
#define END_OF_DATE 10		/* Number of chars in date */
#define SKIP 4			/* Boring part of request "GET " */

/* int WWW_TraceFlag = 0; */		/* Control diagnostic output */
int more_trace = 0;		/* Higher level */
FILE *	host_input = NULL;
FILE *	host_output = NULL;
FILE *	host_append = NULL;
char *  host_append_filename = NULL;

/*	IP address info
*/

typedef struct _host_details {
	struct _host_details * next;
	unsigned long	addr;
	char * 		name;  /* If zero, name is unknown */
	int		count;
	int 		status;		
} host_details;

#define HASH_SIZE 1001
static host_details *hosts[HASH_SIZE];


/*		Initialize host names
*/
void initialize_hosts(void)
{
    int i;
    for (i=0; i<HASH_SIZE; i++) hosts[i] = NULL;
}


/*		print error message for hostname lookup
*/
void pherror(int e)
{

    switch (e) {
    
    case HOST_NOT_FOUND: fprintf(stderr, 
    " HOST NOT FOUND: no such host is known\n");
	break;
    
    case TRY_AGAIN: fprintf(stderr, " TRY AGAIN.\n");
	if (more_trace) fprintf(stderr, 
	"   This is usually a temporary  error and means  that\n"
	"   the local server did not receive a response from  an\n"
	"   authoritative server.   A retry at some later time may \n"
	"   succeed.\n");
	break;
	
    case NO_RECOVERY : fprintf(stderr,
    	" NO_RECOVERY.\n");
	if (more_trace) fprintf(stderr, 
	"   Some  unexpected server failure was encountered.\n"
	"   This is a non-recoverable error.\n");
	break;
	
    case NO_DATA : fprintf(stderr, "NO DATA.\n");
	if (more_trace) fprintf(stderr, 
	"  The requested name is valid but does  not  have  an IP address;\n"
	"  this is not a temporary error. This means that the name is known \n"
	"  to the name server but there is  no  address  associated  with\n"
	"  this  name.  Another type of request to\n"
	"  the name server using this domain  name\n"
	"  will  result in an answer; for example,\n"
	"  a mail-forwarder may be registered  for\n"
	"  this domain.\n");
	break;
		    
    default: fprintf(stderr, "Unknown error from gethostbyaddr: %d\n", h_errno);
	
    }
}


/*		Record a host's cannonical name
**		-------------------------------
**
**	Returns address of allocated copy of name
*/

char * record_host_name(unsigned long addr, char * name, int status, int trace)
{
    host_details * p = (host_details *) (hosts + (addr % HASH_SIZE));
    for (;p->next;p=p->next) {
        if (p->next->addr == addr) return; /* got it */
    }
    /* Not found: p->next is NULL */
    p->next = (host_details *) malloc(sizeof(*p));
    if (!p->next) {
        fprintf(stderr, "Out of memory!\n\n");
	exit(2);
    }
    p->next->next = NULL;
    p->next->addr = addr;
    p->next->status = status;
    if (name) {
	p->next->name = malloc(strlen(name)+1);
	if (!p->next->name) {
	    fprintf(stderr, "Out of memory!\n\n");
	    exit (3);
	}
	strcpy(p->next->name, name);
    } else p->next->name = NULL;
    
    p->next->count = 1;
    if (trace) fprintf(stderr, "New host %u.%u.%u.%u  is  %s\n",
			((unsigned char *)(&addr))[0],
			((unsigned char *)(&addr))[1],
			((unsigned char *)(&addr))[2],
			((unsigned char *)(&addr))[3],
    			name ? name : "UNKNOWN");
			
    if (host_append) {
	fprintf(host_append, "%03d.%03d.%03d.%03d   %2d %4d  %s \n", 
	    ((unsigned char *)(&addr))[0],
	    ((unsigned char *)(&addr))[1],
	    ((unsigned char *)(&addr))[2],
	    ((unsigned char *)(&addr))[3],
	    status,
	    0,
	    name ? name : "?"
	    );
    }
    return p->next->name;
}

/*		Load a host file
**		================
*/
void load_hosts(const char * fn)
{
    FILE * fp = fopen(fn, "r");
    char line[512];
    char addr_string[80];
    char name[256];
    char * result;
    unsigned long addr, last_addr;
    char date[END_OF_DATE+1];
    char host[255];
    int count, status;
    int lines = 0;
    
    if (!fp) {
	fprintf(stderr, "Can't open hosts input file %s\n", fn);
	exit(6);
    }
    for (;;) {
	result = fgets(line, sizeof(line), fp);
	if (!result) break;
	
	if (line[0] == '#') continue;	/* Ignore comment lines */
	sscanf(line, "%s%u%u%s", addr_string, &status, &count, name);
	record_host_name(inet_addr(addr_string), name, status, more_trace);
	lines++;
    }
    fclose(fp);
    if (TRACE) fprintf(stderr, "Total of %d hosts in %s\n", lines, fn);
}


/*		Find a host name
**		----------------
*/

char * host_name(unsigned long addr)
{
    struct hostent *phost;
    host_details * p = (host_details *) (hosts + (addr % HASH_SIZE));
    for (;p->next;p=p->next) {
        if (p->next->addr == addr) {
	    p->next->count++;
	    return p->next->name; /* got it */
	}
    }
    /* Not found: p->next is NULL */

    phost = gethostbyaddr((char*)&addr, sizeof(addr), AF_INET);
    if (phost) return record_host_name(addr, phost->h_name, 0, WWW_TraceFlag);
    
    record_host_name(addr, NULL, h_errno, WWW_TraceFlag); /* Don't try again */
    if (TRACE) {
        fprintf(stderr, "*** Can't look up %u.%u.%u.%u:  ",
			((unsigned char *)(&addr))[0],
			((unsigned char *)(&addr))[1],
			((unsigned char *)(&addr))[2],
			((unsigned char *)(&addr))[3]);
	pherror(h_errno);
    }
    return NULL;
}


/*		Analyse the file
**		================
*/

void analyse(void)

{
    char line[512];
    char * result;
    unsigned long addr, last_addr;
    char date[END_OF_DATE+1];
    char host[255];
    int requests = 0;
    
    date[0]=0;			/* invalidate */
    last_addr = 0;
    for (;;) {
    	char * end_of_host;
        result = fgets(line, sizeof(line), stdin);
	if (!result) return;
	
	if (line[0] == '*') continue;	/* Ignore error lines */
	if (line[0] <= ' ') continue;	/* Ignore starting with white space */
	
	end_of_host = strchr(&line[HOST_OFFSET], ' ');
	line[HOST_OFFSET-1] =0;	/* Chop line up */
	if (!end_of_host)
		end_of_host = &line[HOST_OFFSET]+strlen(&line[HOST_OFFSET]);
	else
	    *end_of_host = 0;
	line[END_OF_DATE] = 0;
	addr = inet_addr(&line[HOST_OFFSET]);

	/* get new host name */
        
	if (addr!=last_addr) {
/*	    phost = gethostbyaddr((char*)&addr, sizeof(addr), AF_INET);
	    if (phost) strcpy(host, phost->h_name);
	    else strcpy(host, &line[HOST_OFFSET]);
*/
	    char * name = host_name(addr);
	    strcpy(host, name? name : &line[HOST_OFFSET]);
	    last_addr = addr;	
	}

	/* Print */
	
#ifdef SUMMARY
	if (strcmp(date, line)!=0) {
	    if (*date) printf("    %d requests on %s\n\n", requests, date);
	    requests = 0;
	    strcpy(date, line);
	    printf("%s %s %-25s %s",
	    	date, &line[END_OF_DATE+1], host, end_of_host+1+SKIP);
	} else {
	    printf("           %s %-25s %s",
	    	&line[END_OF_DATE+1], host, end_of_host+1+SKIP);
	}
#else
	if (strlen(end_of_host+1) >  SKIP) {
	    HTSimplify(end_of_host+1+SKIP);
	    printf("%s %s %-25s %s",
	    	line, &line[END_OF_DATE+1], host, end_of_host+1+SKIP);
	} else {
	    printf("%s %s %-25s %s",
	    	line, &line[END_OF_DATE+1], host, "** BAD_COMMAND\n");
	}
#endif
	requests++;
	if (TRACE && (requests %1000)==0) fprintf(stderr,
		"Processed %d requests ....\n", requests);
    } 
}

/*	Main program
**	============
*/

main(int argc, char*argv[])
{
    struct hostent *phost;
    unsigned long addr;
/*
    struct sockaddr_in sa;
*/

    int a;
    
    initialize_hosts();
    
    for(a=1; a<argc; a++){
        if (!strcmp(argv[a], "-v")) {
		WWW_TraceFlag = 1; /* Verbose */
	
        } else if (!strcmp(argv[a], "-vv")) {
		WWW_TraceFlag = 1; /* Verbose */
		more_trace = 1;		/* very verbose */
		
	} else if (!strcmp(argv[a], "-hi") && ++a<argc) {
	    load_hosts(argv[a]);
	    
	} else if (!strcmp(argv[a], "-ho") && ++a<argc) {
	    host_output = fopen(argv[a], "w");
	    if (!host_output) {
	        fprintf(stderr, "can't open %s for host output.\n", argv[a]);
		exit (4);
	    }
	    
	} else if (!strcmp(argv[a], "-ha") && ++a<argc) {
	    host_append_filename = argv[a];
	    
	 /* -a addr just looks up one address */
	    
	} else if (!strcmp(argv[a], "-a") && ++a<argc) {
	    unsigned long addr = inet_addr(argv[a]);
	    printf("%s\n", host_name(addr));
	    exit (0);
	    
	}
    
    } /* end scan argument list */
    
    if (host_append_filename) {
        load_hosts(host_append_filename);
	host_append = fopen(host_append_filename, "a");
	if (!host_append) {
	    fprintf(stderr, "can't open %s for host append.\n", argv[a]);
	    exit (4);
	}
    }

    analyse();

    if (host_append) fclose(host_append);
    

/*	Output host list
*/
    if (host_output) {
        int i;
	host_details *p;
	for(i=0; i<HASH_SIZE; i++) {
	    for(p=hosts[i]; p; p=p->next) {
	        fprintf(host_output, "%03d.%03d.%03d.%03d   %d  %d    %s\n", 
			((unsigned char *)(&p->addr))[0],
			((unsigned char *)(&p->addr))[1],
			((unsigned char *)(&p->addr))[2],
			((unsigned char *)(&p->addr))[3],
			p->status,
			p->count,
			p->name ? p->name : "?"
			);
	    }
	}
	fclose(host_output);
    }

    exit(0);
}

/*
          HOST_NOT_FOUND  No such host is known.

          TRY_AGAIN       This is usually a temporary  error  and
                          means  that  the  local  server did not
                          receive a response from  an  authorita-
                          tive  server.   A  retry  at some later
                          time may succeed.

          NO_RECOVERY     Some  unexpected  server  failure   was
                          encountered.  This is a non-recoverable
                          error.

          NO_DATA         The requested name is  valid  but  does
                          not  have  an IP address; this is not a
                          temporary error. This  means  that  the
                          name  is  known  to the name server but
                          there is  no  address  associated  with
                          this  name.  Another type of request to
                          the name server using this domain  name
                          will  result in an answer; for example,
                          a mail-forwarder may be registered  for
                          this domain.

*/