
/*
 * SEE COPYRIGHT NOTE IN THE FILE main.c
 * Version 1.9.2
 */

/****************************  INCLUDES  *************************************/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <stddef.h>		/* Needed on Irix */
#include <errno.h>

#include "nolce.h"

/************************  EXTERN VARIABLES  *********************************/

extern char *curr_dir, *lock_file;
extern T_OPT opt;
extern NODE *root;

/********************** Function gt_malloc ************************************
* Calls malloc, checks if we are out of memory, and returns a pointer to a    *
* block of a certain size ("size") of allocated memory.			      *
******************************************************************************/

void *
gt_malloc (size_t size)
{
  void *ptr;

  ptr = malloc (size);
  if (ptr == NULL)
    perror_exit ("\nnolce:\nThe program can't get necessary memory")

  return ptr;
}

/********************** Function gt_realloc ***********************************
* Calls realloc, checks if we are out of memory, and returns a pointer to a   *
* block of a certain size ("size") of allocated memory.			      *
******************************************************************************/

void *
gt_realloc (void *aap, size_t size)
{
  void *ptr;

  ptr = realloc (aap, size);
  if ((ptr == NULL) && (size != 0))
    perror_exit ("\nnolce:\nThe program can't get necessary memory")

  return ptr;
}

/********************** Function help *****************************************
* Prints the help message and exits successfully.			      *
******************************************************************************/

void
help ()
{
  printf ("nolce " VERSION ", (C) 1997-98 Giuseppe Trovato. See LICENCE file for terms of use."
	  "\n\nUsage: nolce [n_hours] [OPTIONS]..."
	  "\nReads Netscape Navigator (ver. 2 and above) cache files created in"
	  "\nthe last n_hours hours and copies them in a new directory adjusting"
	  "\nfile names and links to permit an off-line navigation of them."
	  "\nIf n_hours isn't supplied, all cached files are processed."
	  "\nOptions:"
	  "\n\n  -c cache_dir\t    directory where cache is, default $HOME/.netscape/cache"
	  "\n  -d dest_dir\t    directory where files are copied, default $HOME/" DEST_DIR
	  "\n  -g sub_string\t    process only files whose URL contains sub_string"
	  "\n  -G sub_string\t    process only files whose URL doesn't contain sub_string"
	  "\n  -i summary_file   file name of summary, it will be created in dest_dir,"
	  "\n\t\t    default is " SUMMARY_FILE
	  "\n  -w\t\t    show pages in the same index window"
	  "\n  -W\t\t    show pages in the list frame"
	  "\n  -s\t\t    execute silently"
	  "\n  -m\t\t    don't eliminate missing images"
	  "\n  -t\t\t    put downloading date of documents in summary file"
	  "\n  -f\t\t    don't process links not satisfying initial conditions"
	  "\n  -p\t\t    cache is generated by Netscape for Windows"
	  "\n  -l\t\t    ignore lock files (use with attention: see docs)"
	  "\n  -k\t\t    make symbolic links for non html files"
	  "\n  --help\t    shows this help\n");
  exit (EXIT_SUCCESS);
}

/********************** Function print_error **********************************
* Prints various messages for various types of errors in command line.        *
******************************************************************************/

void
print_error (char option, char *prg_name, int what_error)
{
  if (what_error == illegal)
    printf ("%s: illegal option -- %c", prg_name, option);
  else if (what_error == need_arg)
    printf ("%s: option requires an argument -- %c", prg_name, option);
  else if (what_error == already_supplied)
    {
      if (!option)
	printf ("%s: already supplied option -- n_hours", prg_name);
      else
	printf ("%s: already supplied option -- %c", prg_name, option);
    }
  printf ("\nTry `%s --help' for more information.\n", prg_name);

  exit (EXIT_FAILURE);
}

/********************** Function cut_path *************************************
* From a complete path ("path"), that is directories + filename, returns      *
* these two components separately (directories in "base" and the filename as  *
* the returned value of the function).					      *
******************************************************************************/

char *
cut_path (char *path, char *base)
{
  char *pos;

  strcpy (base, path);		/* base needs to be allocated from the  */
  pos = strrchr (base, '/');	/* caller, at least strlen(path) + 1.   */
  
  *(pos + 1) = '\0';
  
  return 1 + strrchr (path, '/');
}

/********************** Function check_path ***********************************
* From the base url ("base") of a document and a link ("entry"), returns the  *
* absolute url ("cleaned").						      *
******************************************************************************/

char *
check_path (char *cleaned, char *base, char *entry)
{
  char *l_entry;		/* Local copy of the entry parameter. */
  char *p_l_entry, *pasf;

  if (strstr (entry, "://"))
    return strcpy (cleaned, entry);

  p_l_entry = l_entry = (char *) gt_malloc (strlen (entry) + 1);
  strcpy (l_entry, entry);
  strcpy (cleaned, base);

  if (l_entry[0] == '/')
    {
      char *a, *b;
      int k;

      b = cleaned;
      for (k = 1; *b != '\0' && k <= 3; k++)
	{
	  a = strchr (b, '/');
	  if (k == 3)
	    if (a)
	      *a = '\0';
	  b = a + 1;
	}
    }
  else
    while (strstr (l_entry, "../"))
      {	
	/*
	 * For every "../" in the ref., cuts a directory in the base path. 
	 */
	pasf = strrchr (cleaned, '/');
	if (pasf)
	  *pasf = '\0';
	pasf = strrchr (cleaned, '/');
	if (pasf)
	  *(pasf + 1) = '\0';
	l_entry += 3;
      }

  strcat (cleaned, l_entry);
  free (p_l_entry);

  return cleaned;
}

/********************** Function check_html_url *******************************
* Checks if the url ("str") contains the specification of an html file. If    *
* no, the DEFAULT_HTML suffix is added.			  		      *
******************************************************************************/

bool
check_html_url (char *str, bool only_test)
{		       
  char *ext;
  
  ext = strrchr (str, '.');
  if (ext == NULL || (strcmp (ext, ".html") && strcmp (ext, ".htm") && strcmp (ext, ".HTML") && strcmp (ext, ".HTM")))
    {
      if (!only_test && (str[0] != '\0'))
	{
	  size_t len;

	  len = strlen (str);
	  if (str[len - 1] != '/')
	    strcat (str, "/");
	  strcat (str, DEFAULT_HTML);
	}
      return 1;
    }
  return 0;
}

/********************** Function process_arg **********************************
* Stores a command line parameter ("str") in the appropriate variable         *
* ("store") and, when necessary, turns a relative path into an absolute one.  *
******************************************************************************/

void
process_arg (char *str, char **store, bool need_abs_path)
{

  if (need_abs_path && (str[0] != '/'))
    {
      *store = (char *) gt_malloc (strlen (curr_dir) + 1 + strlen (str) + 1);
      sprintf (*store, "%s/%s", curr_dir, str);
    }
  else
    {
      *store = (char *) gt_malloc (strlen (str) + 1);
      strcpy (*store, str);
    }
}

/********************** Function standardize **********************************
* Deletes possible final '/' or double '/' from an url ("str").		      *
* Also substitutes strange characters with '_', truncates too long urls and   *
* transforms the "://" in "/".						      *
******************************************************************************/

char *
standardize (char *str)	
{
  char *snew, *c, *sp, *token;

  snew = (char *) gt_malloc (strlen (str) + 1);

  sp = strstr (str, "://");
  strncpy (snew, str, sp - str);
  snew[sp - str] = '\0';
  c = snew - 1;
  while (*++c != '\0')
    *c = tolower (*c);

  sp += 3;

  token = strtok (sp, "/");
  strcat (snew, "/");
  strcat (snew, token);

  while ((token = strtok (NULL, "/")) != NULL)
    {
      c = token - 1;
      while (*++c != '\0')
	{
	  if (!isalnum (*c))
	    if (!strchr ("._-+", *c))
	      *c = '_';
	  if (c - token > 128)
	    {
	      *c = '\0';
	      break;
	    }
	}
      if (strcmp (token, "."))
	strcat (strcat (snew, "/"), token);
    }
  strcpy (str, snew);
  free (snew);
  return str;
}

/********************** Function gt_strshift **********************************
* Shifts to left the characters of a string ("str") of a given number ("pos") *
* of positions.					 			      *
******************************************************************************/

char *
gt_strshift (char *name, int pos)
{
  register char *str = name - 1;

  while ((*++str = *(str + pos)));
  return name;
}

/********************** Function relative_position ****************************
* Transforms an absolute url ("url") to a relative one ("ref"), basing on a   *
* given document's base ("base").					      *
******************************************************************************/

char *
relative_position (char *url, char *base, char *ref)
{
  char *b = base - 1;
  ptrdiff_t off = url - base;
  char *suff, *tok, *last_slash = base;
  size_t dirs = 0;
  char c;
  int k;

  do
    {
      c = *++b;
      if (c == '/')
	last_slash = b + 1;
      if (c != b[off])
	{
	  char *l_base;

	  suff = last_slash + off;

	  l_base = (char *) gt_malloc (strlen (last_slash) + 1);
	  strcpy (l_base, last_slash);
	  tok = strtok (l_base, "/");
	  while (tok)
	    {
	      dirs++;
	      tok = strtok (NULL, "/");
	    }
	  break;
	}
    }
  while (c != '\0');

  ref[0] = '\0';
  for (k = 1; k <= dirs; k++)
    strcat (ref, "../");

  strcat (ref, suff);
  return ref;
}

/********************** Function start_summary_files **************************
* Creates the various index-related files under nolce_files.		      *
******************************************************************************/

FILE *
start_summary_files (int what, long *nd)
{
#include "skeletons.c"

  int k;
  FILE *f;
  char line[80];
  char *temp;

  if (what == CONST)
    {
      temp = (char *) gt_malloc (strlen (opt.dest_dir) + strlen (opt.summary_file) + 40);

      sprintf (temp, "%s/%s", opt.dest_dir, opt.summary_file);
      f = fopen (temp, "w");
      check_file_stream (f, temp);
      
      for (k = 0; k < 4; k++)
	fputs (index[k], f);
      fprintf (f, "src=\"nolce_files/banner_%s\"", opt.summary_file);
      fputs (index[4], f);
      fprintf (f, "src=\"nolce_files/wd_%s\"", opt.summary_file);
      for (k = 5; k < 9; k++)
	fputs (index[k], f);
      fclose (f);

      sprintf (temp, "%s/nolce_files/banner_%s", opt.dest_dir, opt.summary_file);      
      f = fopen (temp, "w");
      check_file_stream (f, temp);
      
      for (k = 0; k < 5; k++)
	fputs (banner[k], f);
      fprintf (f, "href=\"wd_%s\"", opt.summary_file);
      for (k = 5; k < 8; k++)
	fputs (banner[k], f);
      fprintf (f, "href=\"full_%s\"", opt.summary_file);
      for (k = 8; k < 23; k++)
	fputs (banner[k], f);
      fclose (f);

      sprintf (temp, "%s/nolce_files/wd_%s", opt.dest_dir, opt.summary_file);
      f = fopen (temp, "w");
      check_file_stream (f, temp);
      
      for (k = 0; k < 3; k++)
	fputs (wd_index[k], f);
      fprintf (f, "src=\"dom_%s\"", opt.summary_file);
      fputs (wd_index[3], f);
      fprintf (f, "src=\"full_%s\"", opt.summary_file);
      for (k = 4; k < 8; k++)
	fputs (wd_index[k], f);
      fclose (f);
    }

  if (what == FULL)
    {
      temp = (char *) gt_malloc (strlen (opt.dest_dir) + strlen (opt.summary_file) + 40);

      sprintf (temp, "%s/nolce_files/full_%s", opt.dest_dir, opt.summary_file);
      f = fopen (temp, "r+");
      
      if (f && !strcmp (fgets (line, 80, f), "<!-- File generated by nolce. Do not edit! -->\n"))
	fseek (f, -15L, SEEK_END);
      else
	{
	  if (f)
	    fclose (f);
	  f = fopen (temp, "w");
	  check_file_stream (f, temp);
	  for (k = 0; k < 6; k++)
	    fputs (full_index[k], f);
	}
    }
  if (what == DOM)
    {
      *nd = 0;

      temp = (char *) gt_malloc (strlen (opt.dest_dir) + strlen (opt.summary_file) + 40);

      sprintf (temp, "%s/nolce_files/dom_%s", opt.dest_dir, opt.summary_file);
      f = fopen (temp, "r+");
      
      if (f && !strcmp (fgets (line, 80, f), "<!-- File generated by nolce. Do not edit! -->\n"))
	{
	  while (fgets (line, 80, f) != NULL)
	    if (!strncmp (line, "<a target=", 10))
	      (*nd)++;
	  fseek (f, -15L, SEEK_CUR);
	}
      else
	{
	  if (f)
	    fclose (f);
	  f = fopen (temp, "w");
	  check_file_stream (f, temp);
	  for (k = 0; k < 4; k++)
	    fputs (dom_index[k], f);
	}
    }

  free (temp);
  if (what != CONST)
    return f;
  else
    return NULL;
}

/********************** Function put_in_summary_files *************************
* Creates an index entry for an html document ("data"). If the document       *
* is the last, closes all index files.					      *
******************************************************************************/

void
put_in_summary_files (NODE * data, bool close_all)
{
  static char *old_domain;
  static size_t times = 0;
  static FILE *f_full, *f_dom;
  static long label;
  static char target[16];
  char red_url[74];
  int k, j;

  if (close_all)
    {
      fputs ("\n</pre>\n<hr size=1 noshade>\n</body>\n</html>", f_dom);
      fputs ("\n</body>\n</html>", f_full);
      fclose (f_dom);
      fclose (f_full);
      
      if (times == 0)
	perror_exit ("\nnolce:\nCouldn't process any html file, summary won't work, exiting...")

      return;
    }

  for (k = 0, j = 0; j < 3; k++)
    if (data->or_url[k] == '/')
      j++;

  if (times++ == 0)
    {
      time_t curr_time;

      old_domain = (char *) gt_malloc (1);
      old_domain[0] = '\0';

      f_dom = start_summary_files (DOM, &label);
      f_full = start_summary_files (FULL, NULL);
      
      curr_time = time (NULL);
      fprintf (f_dom, "\n<font size=2>Retrieved on %s</font><pre>",
	       ctime (&curr_time));

      if (opt.view_window == 0)
	strcpy (target, "target=\"_view\"");
      else if (opt.view_window == 1)
	strcpy (target, "target=\"_top\"");
      else if (opt.view_window == -1)
	*target = '\0';
    }

  if ((strlen (old_domain) != (k - 1)) || strncmp (old_domain, data->or_url, k - 1))
    {
      old_domain = (char *) gt_realloc (old_domain, k);
      strncpy (old_domain, data->or_url, k - 1);
      old_domain[k - 1] = '\0';

      fprintf (f_dom, "\n<a target=list HREF=\"full_%s#%ld\">"
	       "<img src=\"internal-gopher-menu\" hspace=2 "
	       "align=bottom border=0>%s</a>",
	       opt.summary_file, ++label, strchr (old_domain, '/') + 2);

      if (label > 1)
	fprintf (f_full, "<p>\n<font size=1>New domain</font>"
		 "\n<hr align=left width=\"150\" size=1 noshade>\n<a name=\"%ld\">", label);
    }

  strncpy (red_url, data->or_url, 70);
  red_url[70] = red_url[71] = red_url[72] = '.';
  red_url[73] = '\0';
  fprintf (f_full, "<p><table border=0><tr><td align=left><a %s href=\"../%s\"><img src=\"internal-gopher-text \""
	   "align=left border=0></a></td><td nowrap><b><font color=#ff0000>Title: </font></b>"
	   "%s\n<br><b>Url: </b>\n<a target=\"_top\" href=\"%s\">%s</a></td></tr></table>",
	   target, data->r_url, data->title, data->or_url, red_url);
  if ((opt.var & REPORT_TIME))
    fprintf (f_full, "\n<font color=\"#000055\" size=\"-1\">Downloaded on %s</font>", ctime (&(data->mod_time)));
}

/********************** Function url_is_valid *********************************
* Checks if the url ("str") is valid.			    		      *
******************************************************************************/

bool
url_is_valid (char *str)
{
  if (str)
    {
      char *w_prfx;
      
      w_prfx = strstr (str, "://");
      if (!w_prfx)
	return 0;
      w_prfx += 3;
      if (!isalpha (*w_prfx) && !isdigit (*w_prfx))
	return 0;
      return 1;
    }
  else
    return 0;
}

/********************** Function check_file_stream ****************************
* Checks if a file ("path") was opened successfully.	    		      *
******************************************************************************/

void 
check_file_stream (FILE *f, char *path)
{
  if (f == NULL)
    {
      if (errno == EACCES)
	fprintf (stderr, "\n\nnolce:\nCould not create index file `%s', haven't permission.", path);
      else
	fprintf (stderr, "\n\nnolce:\nCould not create index file `%s'", path);
      
      perror_exit ("Exiting...");
    }
}
