
/*** glog.c -- analysis tool for Unix gopherd logs ***/

/***
 *** Usage : glog < logfile > reportfile
 ***/

/*** Description: glog munges through a Unix gopherd log and extracts
 *** important looking statistics. It catalogs all hosts that have connected
 *** to the gopherd during the logging period and sorts them according to the
 *** number of accesses. It does the same for each directory and file accessed,
 *** and ranks them according to popularity.
 ***/

/***
 *** by: Chuck Shotton - U of Texas Health Science Center - Houston,
 ***                     Office of Academic Computing
 ***                     cshotton@oac.hsc.uth.tmc.edu
 ***                     6/17/92
 ***/


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifdef THINK_C
#include <console.h>
#endif

#define GLOG_VERSION "Gopher Log Analyzer v.1.0\n"

typedef struct node_rec {
	char data[64];
	int hits;
	struct node_rec *left, *right;
} NODE_REC;

typedef NODE_REC *NODE_PTR;

/*******************************/

NODE_PTR hosts, docs;
char day[4], month[4], date[3], hours[9], year[5], pid[6],
	hostname[64], message1[12], message2[20], path[64];
char start_date[20], stop_date[20];

/*******************************/

main(argc, argv)
int argc;
char **argv;
{
char line[256];

#ifdef THINK_C
	argc = ccommand(&argv);
#endif

	printf(GLOG_VERSION);
	
	Initialize();
	
	/*process the first line here to get the date info*/
	fgets(line, 256, stdin);
	ProcessLine(line);
	sprintf(start_date, "%s %s, %s", month, date, year);
	
	while (!feof(stdin)) {
		fgets(line, 256, stdin);
		ProcessLine(line);
	}
	
	sprintf(stop_date, "%s %s, %s", month, date, year);
	
	ShowStats();
		
	exit(0);
}

/*******************************/

Initialize()
{
	hosts = docs = NULL;
}

/*******************************/
/* Read a line from the log file, parse it up, and insert the */
/* info into the appropriate tables.                          */

ProcessLine(line)
char *line;
{
int i;
	
	path[0]='\0';
	i = sscanf(line, "%s %s %s %s %s %s %s : %s %s %s\n",
			day, month, date, hours, year, pid, hostname, 
			message1, message2, path);

	Insert(&hosts, hostname);
	
	if (!strncmp(message1, "Root", 4))
		Insert(&docs, "Root Connections");
	else
		Insert(&docs, path);
}

/*******************************/
/* Insert "str" into the appropriate symbol table. Increment the */
/* number of hits if the "str" is already present.               */

Insert(tree, str)
NODE_PTR *tree;
char *str;
{
NODE_PTR temp;
int i;
	if (*tree == NULL) {
		temp = (NODE_PTR) malloc(sizeof(NODE_REC));
		if (temp) {
			temp->left = temp->right = NULL;
			temp->hits = 1;
			strcpy (temp->data, str);
			*tree = temp;
		}
		else
			printf("Memory error\n");
	}
	else {
		i=strcmp(str, (*tree)->data);
		if (i > 0)
			Insert(&((*tree)->right), str);
		else if (i<0) 
			Insert(&((*tree)->left), str);
		else
			(*tree)->hits += 1;
	}
}

/*******************************/
int total_hits, total_nodes;
NODE_PTR by_num;

/*******************************/
/* Dump out the contents of the given symbol table and sort by */
/* number of "hits" on the fly.                                */

DumpTree(tree)
NODE_PTR tree;
{
	if (tree == NULL) 
		return;
	else {
		DumpTree(tree->left);
		printf("%-50.50s %5d\n", tree->data, tree->hits);
		total_hits += tree->hits;
		total_nodes++;
		InsertByNum(tree);
		DumpTree(tree->right);
	}
}

/*******************************/

DumpStats(tree)
NODE_PTR tree;
{
	total_hits = 0;
	total_nodes= 0;
	by_num = NULL;
	DumpTree(tree);
}

/*******************************/
/* Turn a tree node into an element in a linked list */

InsertByNum(node)
NODE_PTR node;
{
NODE_PTR temp, temp2;
	if (by_num == NULL) {
		by_num = node;
		node->left == NULL;
	}
	else {
		temp = by_num;
		temp2 = temp->left;
		if (node->hits >= temp->hits) {
			node->left = temp;
			by_num = node;
		}
		else {
			while (temp2 != NULL) {
				if (node->hits > temp2->hits) {
					temp->left = node;
					node->left = temp2;
					return;
				}
				else {
					temp = temp2;
					temp2 = temp->left;
				}
			}
			temp->left = node;
			node->left = NULL;
		}
	}
}

/*******************************/
/* Dump out the linked list contents */

DumpByNum(tree)
NODE_PTR tree;
{
	while (tree != NULL) {
		printf("%-50.50s %5d (%0.2f%%)\n", tree->data, tree->hits, 
			(float) tree->hits/(float) total_hits);
		tree = tree->left;
	}
}

/*******************************/
/* Show all the stats gleaned from the log file */

ShowStats()
{
	printf("Report Period: %s to %s\n", start_date, stop_date);
	printf("\nAll Hosts:\n-----------------------------\n");
	DumpStats(hosts);
	printf("\nMost Active Hosts:\n-----------------------------\n");
	DumpByNum(by_num);
	printf("------------------------\n");
	printf("Total Hosts:       %d\n", total_nodes);
	printf("Total Connections: %d\n", total_hits);

	printf("\nAll Data Accesses:\n-----------------------------\n");
	DumpStats(docs);
	printf("\nMost Popular Data:\n-----------------------------\n");
	DumpByNum(by_num);
	printf("------------------------\n");
	printf("Total Data Accesses: %d\n", total_hits);
}

