/********************************************************************
 * $Author: lindner $
 * $Revision: 1.4 $
 * $Date: 1993/01/05 02:41:28 $
 * $Source: /home/mudhoney/GopherSrc/release1.11/gopherd/RCS/Waisindex.c,v $
 * $State: Rel $
 *
 * Paul Lindner, University of Minnesota CIS.
 *
 * Copyright 1991, 1992 by the Regents of the University of Minnesota
 * see the file "Copyright" in the distribution for conditions of use.
 *********************************************************************
 * MODULE: Waisindex.c
 * Routines to translate wais indexes on disk to gopher
 *********************************************************************
 * Revision History:
 * $Log: Waisindex.c,v $
 * Revision 1.4  1993/01/05  02:41:28  lindner
 * .cap files are now ignored by the indexer
 *
 * Revision 1.3  1993/01/01  00:12:41  lindner
 * Fixed parameters to GDnew()
 *
 * Revision 1.2  1992/12/21  20:36:44  lindner
 * Added #include for cutil.h (from dgg)
 *
 * Revision 1.1  1992/12/10  23:13:27  lindner
 * gopher 1.1 release
 *
 *
 *********************************************************************/

#if defined(WAISSEARCH)

/* WIDE AREA INFORMATION SERVER SOFTWARE
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.    
   Brewster@think.com

   Heavily hacked by Paul Lindner (lindner@boombox.micro.umn.edu)
   Do you even recognize this Brewster? :-)

*/

int ShowDate = 0;

#define _search_c

#include "gopherd.h"


#if defined(_AIX)
#define ANSI_LIKE
#endif

#include "../ir/irext.h"
#include "../ir/irsearch.h"
#include "../ir/docid.h"
#include "../ir/irtfiles.h"
#include "../ir/cutil.h"    /** fix for -DBIO wais needs.. **/
#include <math.h>


FILE *logfile = NULL; /* the logfile */
char *log_file_name = NULL;

static char *DefaultDB = "index";
static char *MonthStr[] = {
     "Jan", "Feb", "Mar", "Apr", "May", "June", "July", "Sept", "Oct",
      "Nov", "Dec"
};

#if defined(void)
#undef void
#endif



int
Process_Veronica(besthit, gs)
  hit *besthit;
  GopherObj *gs;
{
     FILE *ZeFile;
     char veronicabuf[1024];
     char *data, *cp;

     /*** Open up the file and seek to the right position ***/

     ZeFile = ufopen(besthit->filename, "r");

     if (ZeFile == NULL)
	  return(-1);

     fseek(ZeFile, besthit->start_character, 0);

     bzero(veronicabuf, sizeof(veronicabuf));
     fread(veronicabuf, 1, besthit->end_character - besthit->start_character,
	   ZeFile);
     veronicabuf[besthit->end_character - besthit->start_character+1] = '\0';
     
     
     data = veronicabuf;
     GSsetType(gs, *data);
     
     ZapCRLF(data);
     
     cp = strchr(data, '\t');
     *cp = '\0';
     GSsetTitle(gs, data+1);
     
     data = cp+1;
     cp = strchr(data, '\t');
     *cp = '\0';
     GSsetPath(gs, data);
     
     data = cp + 1;
     cp = strchr(data, '\t');
     *cp = '\0';
     GSsetHost(gs, data);
     
     GSsetPort(gs, atoi(cp+1));

     fclose(ZeFile);
     return(0);
}

void
WaisIndexQuery(sockfd, index_directory, SearchWords, new_db_name, INDEXHost, INDEXPort, INDEXPath)
  int sockfd;
  char *index_directory;
  char *SearchWords;
  char *new_db_name;
  char *INDEXHost;
  int  INDEXPort;
  char *INDEXPath;
{ 
     database* db;
     long maxRawScore;
     long normalScore;
     char *cp;
     char *Selstrout;
     char dateline[10];
     long i;
     query_parameter_type parameters;
     boolean search_result;
     char score[6];
     static char ReturnLine[512];
                                        
     char * sidename;                    /* mtm 11-23-92 */
     FILE * SideFile = NULL;             /* mtm 11-23-92 */

     GopherDirObj *gd;
     GopherObj    *gs;
     

     gs = GSnew();
     gd = GDnew(32);

     if (DEBUG)  {
	  fprintf(stderr, "IndexPath: %s\n", INDEXPath);
	  logfile = stderr;   /** Log wais error messages to console **/
     }     else {
	  logfile = ufopen("/dev/null", "w+");
     }

     if (new_db_name == NULL) {
	  new_db_name = DefaultDB;
     }

     if (uchdir(index_directory)) {
	  Abortoutput(sockfd, "Couldn't change to index directory...");
	  return;
     }

     if (SearchWords != NULL && strlen(SearchWords) == 0) {
	  EveryWAISdocument(new_db_name);
	  return;
     }

     db = openDatabase(new_db_name, false, true);
     
     if (db == NULL) {
	  sprintf(ReturnLine, "Failed to open database %s in index dir %s", new_db_name, index_directory);
	  Abortoutput(sockfd, ReturnLine);
	  writestring(sockfd, ".\r\n"); /** be polite **/
	  return;
     }
     
#ifdef BIO            /* dgg */
{
     char *cp= read_delimiters( db);  /* use data-specific delim, available */

     if (cp != NULL) {
	  strcpy( gDelimiters, cp);
	  wordDelimiter= wordbreak_user;
     }
     else
	  wordDelimiter= wordbreak_notalnum;
}
#endif

     parameters.max_hit_retrieved = 256;

     set_query_parameter(SET_MAX_RETRIEVED_MASK, &parameters);
     
     search_result = false;
     search_result |= search_for_words(SearchWords, db, 0);
     
     if (search_result == true) {
	  /* the search went ok */
	  hit best_hit;
	  
	  finished_search_word(db);
	  if (DEBUG)
	       printf("After finished_search\n");

	  uchdir(Data_Dir); /* necessary to find side files */

	  for (i = 0; i < parameters.max_hit_retrieved; i++){ 
	       if (0 != next_best_hit(&best_hit, db))
		    break;		/* out of hits */
	       if (i == 0)
		    maxRawScore = best_hit.weight;
	       if (best_hit.weight > 0 && 
		   strstr(best_hit.filename, ".cache")==NULL &&
		   strstr(best_hit.filename, ".cap/")==NULL){
		    long lines,length;

		    char** type = NULL;
		    
		    normalScore = (long)floor((((double)best_hit.weight) /
					       ((double)maxRawScore)) *	
					      (MAX_NORMAL_SCORE + 1));

		    if (normalScore > MAX_NORMAL_SCORE)
			 normalScore = MAX_NORMAL_SCORE;
		    

		    /*** Strip off the first part of the path in the filename*/
		    /*** Plus it gets rid of weird automount things... ***/
		    Selstrout =strstr(best_hit.filename, INDEXPath);
		    if (Selstrout == NULL)
			 Selstrout = "Error in Hostdata!";
		    else
			 Selstrout += strlen(INDEXPath);
		    

                    sprintf(score,"%3d ",best_hit.weight);

                    waislog(0,99,"%s: Score %3d:%s",SearchWords,best_hit.weight,Selstrout);
		    
		    /** Make the outgoing string **/

		    ZapCRLF(best_hit.headline);
		    
		    /*** Remove the gopher data directory pathname if
		         it's there from the headline
		    ***/

		    if ((cp = strstr(best_hit.headline, INDEXPath)) != NULL) {
			 /*** Dangerous.... ***/
			 strcpy(cp, cp+strlen(INDEXPath));
		    } 
			 
		    GSsetType(gs, '0');
		    GSsetTitle(gs, best_hit.headline);
		    GSsetHost(gs, INDEXHost);
		    GSsetPort(gs, INDEXPort);

		         /* removed "/" from following line (before %s) . 
			    Was getting double slash at least with w8b5bio; 
			    mtm 11-23-92 */

		    sprintf(ReturnLine, "R%d-%d-%s",
			    best_hit.start_character, best_hit.end_character,
			    Selstrout);
		    
		    if (!MacIndex)
			GSsetPath(gs, ReturnLine);
		    else
			GSsetPath(gs, Selstrout);
		    GSsetWeight(gs, best_hit.weight);
		    
                    /* 
		     * Find and process sidefile. 
		     * Allow worst case name length. 
		     */

		    if((sidename = (char *) malloc((unsigned) 
		        strlen(Selstrout) + 
	                strlen("/.cap/") + 1)) != NULL) {
		      if((cp = mtm_basename(Selstrout)) != Selstrout) {
			/*  turn "/foo/bar/baz" into "/foo/bar/.cap/baz" */
			strncpy(sidename,Selstrout,(cp - Selstrout));
			*(sidename + (cp - Selstrout)) = '\0';
			strcat(sidename,".cap/");
			strcat(sidename,cp);
		      }
		      else {
		      /* root of the gopher tree, this is easier... */
			strcpy(sidename,"/.cap/");
			strcat(sidename,Selstrout);
		      }
		      if ((SideFile = rfopen(sidename, "r")) != NULL) {
			if (DEBUG == TRUE)
			  printf("Side file name: %s\n", sidename);
			Process_Side(SideFile, gs);
		      }
		      free(sidename);
		    }
		    
		    if (DEBUG) printf("Doc type is %s\n", best_hit.type);
		    if (strcmp(best_hit.type, "GOPHER")==0) {
			if (DEBUG) printf("Got a veronica style thing %s\n",best_hit.headline);
			Process_Veronica(&best_hit, gs);
		   }			
		    
		    GStoNet(gs,sockfd);

	       }
	       
	       
	       if (DEBUG) {
		    printf("%s\n", ReturnLine);
		    printf("End Byte   = %d\n", best_hit.end_character);
		    printf("Doc length = %d\n", best_hit.document_length);
		    printf("#lines     = %d\n", best_hit.number_of_lines);
	       }
	  }
     }
     else {
	  /* something went awry in the search */
	  LOGGopher(sockfd, "Something went wrong in the search!\r\n");
	  writestring(sockfd, ".\r\n"); /*** be polite, don't screw up the client**/
	  return;
     }
     finished_best_hit(db);

     writestring(sockfd, ".\r\n");

     /* free everything */
     closeDatabase(db);
     return;
}

EveryWAISdocument(sockfd, db, INDEXHost, INDEXPort, INDEXPath)
  int sockfd;
  char *db;
  char *INDEXHost;
  int  INDEXPort;
  char *INDEXPath;
{
     FILE         *dbcatalog;
     char         db_name[MAXPATHLEN];
     char         inputline[512];
     String       *Headline;
     String       *Filename;
     int          StartByte, EndByte;
     GopherObj    *gs;
     GopherDirObj *gd;
     boolean      Headlineset = FALSE;
     boolean      DocIDset    = FALSE;

     gs = GSnew();
     gd = GDnew(32);
     Headline = STRnew();
     Filename = STRnew();

     strcpy(db_name, db);
     strcat(db_name, ".cat");

     dbcatalog = rfopen(db_name, "r");
     
     while (fgets(inputline, sizeof(inputline), dbcatalog) != NULL) {
	  if (strncmp(inputline, "Headline: ", 10)==0) {
	       STRset(Headline, inputline +10);
	       Headlineset = TRUE;
	  }
	  else if (strncmp(inputline, "DocID: ", 7)==0) {
	       char *cp;

	       StartByte = atoi(inputline);
	       cp = strchr(inputline+7, ' ');
	       if (cp == NULL) break;

	       cp++;
	       EndByte = atoi(cp);

	       cp = strchr(inputline+7, ' ');
	       cp++;
	       if (cp == NULL) break;

	       cp =strstr(cp, INDEXPath);
	       if (cp == NULL) break;
	       
	       STRset(Filename, cp);

	       DocIDset = TRUE;
	  }
	  
	  if (DocIDset == TRUE && Headlineset == TRUE) {
	       char tmppath[512];

	       sprintf(tmppath, "R%d-%d-%s", StartByte, EndByte, STRget(Filename));

	       GSsetType(gs, '0');
	       GSsetTitle(gs, STRget(Headline));
	       GSsetHost(gs, INDEXHost);
	       GSsetPort(gs, INDEXPort);
	       GSsetPath(gs, tmppath);

	       GDaddGS(gd, gs);

	       DocIDset = FALSE;
	       Headlineset = FALSE;
	  }
     }	  
}

#endif /** WAISSEARCH **/
