#include "getwww.h"

int RECURSIVE_LEVEL = NO;
int FORCE_RECURSIVE_LEVEL = NO;
int USE_EXIST_FILE = NO;
int DEBUG_MODE = NO;
int QUIET_MODE = NO;
int VERY_QUIET_MODE=NO;
int DEPTH_FIRST = NO;
int is_want = NO;
int AUTHORIZATION = NO;
int FORCE_GET = NO;
int USING_PROXY = NO;

char FILE_TEMP[PARSE_STRING_MAX] = ".getwww.tmp";
char FILE_SCAN[PARSE_STRING_MAX] = ".getwww.scan";
char FILE_NEW[PARSE_STRING_MAX] = ".getwww.new";
char FILE_DEBUG[PARSE_STRING_MAX] = ".getwww.deb";
char FILE_ERROR[PARSE_STRING_MAX] = ".getwww.err";

int PORT = -1;
char URL[PARSE_STRING_MAX];
char URL_PORT[PARSE_STRING_MAX];
char URL_NUMBER[PARSE_STRING_MAX];
char URL_NUMBER_PORT[PARSE_STRING_MAX];
char PROXY[PARSE_STRING_MAX];
char LOGIN[PARSE_STRING_MAX];

unsigned long int HOST;
FILE *file_debug;


/****************************************************************************************************/
void
usage (char *err_msg)
{

  if (DEBUG_MODE)
    fclose (file_debug);
  if (err_msg[0] == '#' || err_msg[0] == NULL_CHAR)
    {
      if (err_msg[0] == '#')
	fprintf (stderr, "getwww error --%s\n", &err_msg[1]);
      fprintf (stderr, "usage : getwww [-c][-i][-n][-q][-Q][-v][-h][-P][-f name][-l user[:passwd]]\n");
      fprintf (stderr, "               [-r num][-R num][-d dir][-D dir][-O cgi-file] \n");
      fprintf (stderr, "               [-s string][-S string][-t string][-T string]\n");
      fprintf (stderr, "               [-p http://c.d.e.f:portnum] http://a.b.c.d[:80][/aa.html]\n");
      fprintf (stderr, "option :\n");
      fprintf (stderr, "  -c  : local file clean    -v  : debug mode\n");
      fprintf (stderr, "  -i  : use exist file      -h  : show help message\n");
      fprintf (stderr, "  -n  : dont cache data     -P  : not using proxy   \n");
      fprintf (stderr, "  -q  : quiet mode          -Q  : very quiet mode\n");
      fprintf (stderr, "  -f name                       : temp file name\n");
      fprintf (stderr, "  -l user[:passwd]              : user name and password for Authorization\n");
      fprintf (stderr, "  -O cgi-file-name              : force get cgi file [file_name]\n");
      fprintf (stderr, "  -p http://a.b.c.d:portnum     : using proxy\n");
      fprintf (stderr, "  -r ?                          : reget level (num >= -1)\n");
      fprintf (stderr, "  -R ?                          : don't care not-modified level (num >= -1)\n");
      fprintf (stderr, "  -d dir                        : only get file in [dir] directory \n");
      fprintf (stderr, "  -D dir                        : don't get file in [dir] directory \n");
      fprintf (stderr, "  -s string                     : only get file has [string] in file name \n");
      fprintf (stderr, "  -S string                     : don't get file has [string] in file name \n");
      fprintf (stderr, "  -t string                     : only get end with [string] in file name \n");
      fprintf (stderr, "  -T string                     : don't get end with [string] in file name \n");
    }
  else
    fprintf (stderr, "getwww error --%s\n", err_msg);


  exit (1);
}

Get_www get_www;
Check file_check;

/****************************************************************************************************/
int
main (int argc, char *argv[], char *envp[])
{
  int i, j, k;
  int nbytes;
  int url_exist = NO;
  int local_clean = NO;
  int NOT_USING_PROXY = NO;
  char www_file[PARSE_STRING_MAX];
  char temp_buf[PARSE_STRING_MAX];
  FILE *file_scan;
  unsigned char encoded[256];
  struct hostent *host;
  struct in_addr *addr;
  static char encode_data[64] =
  {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
  };
  static char GETWWW_VERSION[]="get whole www. c++ version 1.4  kisskiss@soback.kornet.nm.kr";
  fprintf (stderr, "%s\n",GETWWW_VERSION);

  if (argc < 2)
    usage ("");

  signal (SIGINT, terminater);
  signal (SIGPIPE, SIG_IGN);
  set_new_handler(new_error);

  for (i = 0; envp[i]; i++)
    {
      if (!strncasecmp (envp[i], "http_proxy", strlen ("http_proxy")))
	{
	  for (j = 0; envp[i][j] != '=' && envp[i][j]; j++)
	    ;
	  if (envp[i][j] != '=')
	    break;
	  strcpy (PROXY, &envp[i][j + 1]);
	  USING_PROXY = YES;
	}
    }
  for (i = 1; i < argc; i++)
    {
      if (!strncasecmp (argv[i], "http://", 7))
	{			/* http://aa.bb/a.html  */
	  url_exist = YES;
	  for (j = 7; argv[i][j]; j++)	/* http://aa.bb   */
	    if (argv[i][j] == '/')	/* http://aa.bb/  */
	      break;
	  if (argv[i][j] == NULL_CHAR)
	    {
	      strncpy (URL, argv[i], j);
	      URL[j] = '/';
	    }
	  else
	    strncpy (URL, argv[i], ++j);

	  strcpy (www_file, &argv[i][j]);
	}
      if (argv[i][0] == '-')
	{
	  switch (argv[i][1])
	    {
	    case 'c':
	      local_clean = YES;
	      break;
	    case 'i':
	      USE_EXIST_FILE = YES;
	      break;
	    case 'h':
	      usage ("");
	      break;
	    case 'n':
	      FORCE_GET = YES;
	      break;
	    case 'P':		/* not using proxy */
	      NOT_USING_PROXY = YES;
	      break;
	    case 'q':
	      QUIET_MODE = YES;
	      break;
	    case 'Q':
	      VERY_QUIET_MODE=YES;
	      break;
	    case 'v':
	      DEBUG_MODE = YES;
	      break;
	    case 'd':		/* -d only get file in [dir] directory */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    {
		      strcpy(temp_buf,"*");
		      strcat (temp_buf, argv[i]);
		      if (temp_buf[strlen (temp_buf) - 1] == '*')
			temp_buf[strlen (temp_buf) - 1] = NULL_CHAR;
		      else if (temp_buf[strlen (temp_buf) - 1] != '/')
			strcat (temp_buf, "/");
		      file_check.Set_is_want(temp_buf);
		    }
		}
	      else
		usage("#invalid only get file in directory [-d] option");
	      i--;
	      break;
	    case 'D':		/* -D don't get file in dir directory */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    {
		      strcpy(temp_buf,"&");
		      strcat (temp_buf, argv[i]);
		      if (temp_buf[strlen (temp_buf) - 1] == '*')
			temp_buf[strlen (temp_buf) - 1] = NULL_CHAR;
		      else if (temp_buf[strlen (temp_buf) - 1] != '/')
			strcat (temp_buf, "/");
		      file_check.Set_is_want(temp_buf);
		    }
		}
	      else
		usage("#invalid don't get file in directory [-D] option");
	      i--;
	      break;
	    case 's':		/* -s only get file has string in file name */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    {
		      strcpy(temp_buf,"?");
		      strcat (temp_buf, argv[i]);
		      file_check.Set_is_want(temp_buf);
		    }
		}
	      else
		usage("#invalid only get file has string in file name [-s] option");
	      i--;
	      break;
	    case 'S':		/* -S don't get file has string in file name */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    {
		      strcpy(temp_buf,"#");
		      strcat (temp_buf, argv[i]);
		      file_check.Set_is_want(temp_buf);
		    }
		}
	      else
		usage("#invalid don't get file has string in file name [-S] option");
	      i--;
	      break;
	    case 't':		/* -t only get end with string in file name */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    {
		      strcpy(temp_buf,"|");
		      strcat (temp_buf, argv[i]);
		      file_check.Set_is_want(temp_buf);
		    }
		}
	      else
		usage("#invalid only get end with string in file name [-t] option");
	      i--;
	      break;
	    case 'T':		/* -T don't get end with string in file name */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    {
		      strcpy(temp_buf,"%");
		      strcat (temp_buf, argv[i]);
		      file_check.Set_is_want(temp_buf);
		    }
		}
	      else
		usage("#invalid don't get end with string in file name [-T] option");
	      i--;
	      break;
	    case 'f':
	      if (++i < argc)
		{
		  strcat (FILE_TEMP, ".");
		  strcat (FILE_TEMP, argv[i]);
		  strcat (FILE_SCAN, ".");
		  strcat (FILE_SCAN, argv[i]);
		  strcat (FILE_NEW, ".");
		  strcat (FILE_NEW, argv[i]);
		  strcat (FILE_DEBUG, ".");
		  strcat (FILE_DEBUG, argv[i]);
		  strcat (FILE_ERROR, ".");
		  strcat (FILE_ERROR, argv[i]);
		}
	      else
		usage("#invalid temp file name [-f] option");
	      break;
	    case 'l':
	      if (++i < argc)
		{
		  AUTHORIZATION = 1;
		  strcpy (LOGIN, argv[i]);
		  for (j = 0; LOGIN[j] && LOGIN[j] != ':'; j++)
		    ;
		  if (!LOGIN[j])
		    {
		      fprintf (stderr, "user password is need, type it :");
		      gets (temp_buf);
		      if (temp_buf[0])
			{
			  strcat (LOGIN, ":");
			  strcat (LOGIN, temp_buf);
			}
		    }
		  nbytes = strlen (LOGIN);
		  for (j = k = 0; j < nbytes;)
		    {
		      encoded[k++] = encode_data[LOGIN[j] >> 2];
		      encoded[k++] = encode_data[((LOGIN[j] << 4) & 0x30) | ((LOGIN[j + 1] >> 4) & 0x0f)];
		      encoded[k++] = encode_data[((LOGIN[j + 1] << 2) & 0x3c) | ((LOGIN[j + 2] >> 6) & 0x03)];
		      encoded[k++] = encode_data[LOGIN[j + 2] & 0x3f];
		      j += 3;
		    }
		  if (j == nbytes + 1)
		    encoded[k - 1] = '=';
		  else if (j == nbytes + 2)
		    encoded[k - 1] = encoded[k - 2] = '=';
		  encoded[k] = NULL_CHAR;
		  strcpy (LOGIN,(char*) encoded);
		}
	      else
		usage("#invalid user name and password [-l] option");
	      break;
	    case 'O':		/* only get cgi */
	      i++;
	      if((i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7))
		{
		  for(;(i<argc) && (argv[i][0]!='-') && strncmp(argv[i],"http://",7);i++)
		    get_www.Set_force_get_cgi(argv[i]);
		}
	      else
		usage("#invalid only get cgi [-O] option");
	      i--;
	      break;
	    case 'p':		/* using proxy */
	      i++;
	      if ((i < argc) && !strncmp(argv[i],"http://",7))
		{
		  USING_PROXY = YES;
		  strcpy (PROXY, argv[i]);
		}
	      else
		usage("#invalid using proxy [-p] option");
	      break;
	    case 'r':	
	      if (++i < argc)
		{
		  for(j=0;argv[i][j];j++)
		    if(!isdigit(argv[i][j]))
		      if(argv[i][j] != '-')
			usage("#invalid recursive level");
		  RECURSIVE_LEVEL = atoi (argv[i]);
		}
	      else
		usage("#invalid recursive level [-r] option");
	      break;
	    case 'R':	
	      if (++i < argc)
		{
		  for(j=0;argv[i][j];j++)
		    if(!isdigit(argv[i][j]))
		      if(argv[i][j] != '-')
			usage("#invalid don't care not-modified level [-R] option");
		  FORCE_RECURSIVE_LEVEL = atoi (argv[i]);
		}
	      else
		usage("#invalid don't care not-modified level [-R] option");
	      break;
	    default :
	      strcpy(temp_buf,"#invalid option ");
	      strcat(temp_buf,argv[i]);
	      usage(temp_buf);
	      break;
	    }
	}
    }

  if (local_clean)
    {
      if ((local_file_clean ()) == 0)
	return (0);
      else
	return (1);
    }

  if (!url_exist)
    usage ("#URL not exist");	/* error  ó, url   ,  */
  if ((RECURSIVE_LEVEL < -1) || (FORCE_RECURSIVE_LEVEL < -1))
    usage ("#recursive level error");	/* RECURSIVE_LEVEL  -1    */

  if (('0' < URL[7] && URL[7] <= '9') &&
      (('0' < URL[8] && URL[8] <= '9') || URL[8] == '.') &&
      (('0' < URL[9] && URL[9] <= '9') || URL[9] == '.'))
    {
      for (i = 7; URL[i] != '/' && URL[i] != ':'; i++)
	;
      if (URL[i] == ':')
	{
	  strcpy (URL_NUMBER_PORT, URL);
	  strncpy (URL_NUMBER, URL, i);
	  URL_NUMBER[i] = '/';
	  URL_NUMBER[i + 1] = NULL_CHAR;
	}
      else
	strcpy (URL_NUMBER, URL);

      for (i = 7, j = 0; URL[i] != '/' && URL[i] != ':'; i++, j++)
	temp_buf[j] = URL[i];
      temp_buf[j] = NULL_CHAR;
      for (j = 0; j < 3; j++)
	{
	  HOST = inet_addr (temp_buf);
	  host = gethostbyaddr ((char *) &HOST, strlen ((char *) &HOST), AF_INET);
	  if (host != NULL)
	    break;
	  sleep (10 * (j + 1));
	}
      if (host == NULL)
	usage ("#can't connect to remote host--invalid hostname");
      strcpy (URL, "http://");
      strcat (URL, host->h_name);
      strcat (URL, "/");
      if (*URL_NUMBER_PORT)
	{
	  strcpy (URL_PORT, "http://");
	  strcat (URL_PORT, host->h_name);
	  for (j = 7; URL_NUMBER_PORT[j] != ':'; j++)
	    ;
	  strcat (URL_PORT, &URL_NUMBER_PORT[j]);
	}
    }
  else
    {
      for (i = 7, j = 0; URL[i] != '/'; i++, j++)
	{
	  temp_buf[j] = URL[i];
	  if (URL[i] == ':')
	    break;
	}
      temp_buf[j] = NULL_CHAR;
      for (j = 0; j < 3; j++)
	{
	  host = gethostbyname (temp_buf);
	  if (host != NULL)
	    {
	      addr = (struct in_addr *) *host->h_addr_list;
	      strcpy (temp_buf, inet_ntoa ((struct in_addr) *addr));
	      break;
	    }
	  sleep (10 * (j + 1));
	}
      if (host == NULL)
	usage ("#can't connect to remote host--invalid hostname");

      strcpy (URL_PORT, URL);
      strcpy (URL_NUMBER, "http://");
      strcpy (URL_NUMBER_PORT, "http://");
      strcat (URL_NUMBER, temp_buf);
      strcat (URL_NUMBER_PORT, temp_buf);
      strcat (URL_NUMBER, "/");
      if (URL[i] == ':')
	{
	  strcat (URL_NUMBER_PORT, &URL[i]);
	  URL_PORT[i] = NULL_CHAR;
	  strcat (URL_PORT, &URL[i]);
	  URL[i] = '/';
	  URL[i + 1] = NULL_CHAR;
	}
      else
	{
	  URL_PORT[i] = NULL_CHAR;
	  strcat (URL_PORT, ":80/");
	  strcat (URL_NUMBER_PORT, ":80/");
	}
    }
  if (NOT_USING_PROXY)
    USING_PROXY = 0;
  if (USING_PROXY)
    {
      if (PROXY[strlen (PROXY) - 1] != '/')
	strcat (PROXY, "/");
      if(!VERY_QUIET_MODE)
      fprintf(stderr,"using proxy %s\n",PROXY);
    }

  if (RECURSIVE_LEVEL >= 0)
    {
      if (FORCE_RECURSIVE_LEVEL > RECURSIVE_LEVEL)
	RECURSIVE_LEVEL = FORCE_RECURSIVE_LEVEL;
    }
  else
    {
      if (FORCE_RECURSIVE_LEVEL == 0)
	FORCE_RECURSIVE_LEVEL = RECURSIVE_LEVEL;
      else
	RECURSIVE_LEVEL = FORCE_RECURSIVE_LEVEL;

    }
  if (DEBUG_MODE)
    file_debug = fopen (FILE_DEBUG, "a+b");

  if (USING_PROXY)
    check_hostname (PROXY);
  else
    check_hostname (URL_NUMBER_PORT);

  unlink (FILE_TEMP);
  unlink (FILE_SCAN);
  unlink (FILE_NEW);
  unlink (FILE_DEBUG);
  unlink (FILE_ERROR);

  if((file_scan=fopen(FILE_SCAN,"w+b"))==NULL_CHAR)
    usage("make scan file error");

  fprintf(file_scan,"################################################################################\n");
  fprintf(file_scan,"##   %s\n",GETWWW_VERSION);
  fprintf(file_scan,"##   this html files are generated from \n");
  fprintf(file_scan,"##   %s%s \n",URL,www_file);
  fprintf(file_scan,"################################################################################\n");
  fclose(file_scan);

  get_www.Main (www_file);

  unlink (FILE_TEMP);
  if (DEBUG_MODE)
    fclose (file_debug);
  fprintf (stderr, "--------------------------------------------------------------\n");
  fprintf (stderr, "getwww : end without error.                                   \n");
  fprintf (stderr, "--------------------------------------------------------------\n");
  return (0);
}

/****************************************************************************************************/
void
terminater (int signum)
{
  if (DEBUG_MODE)
    fclose (file_debug);
  unlink (FILE_TEMP);
  fprintf (stderr, "\nuser aborted\n");
  exit (1);
}

/****************************************************************************************************/
void new_error(void)
{
  cout << "too many html tag exist\n";
  exit(1);
}


/****************************************************************************************************/
int
local_file_clean (void)
{
  int i, j, k, l;
  int nbytes;
  static char TMP_DIR[] = ".GETWWW.TMP.DIRECTORY/";
  char *remove_dir[PARSE_STRING_MAX];
  char command[PARSE_STRING_MAX];
  FILE  *file_scan;
  int src, dest;
  char buffer[PARSE_STRING_MAX];
  char file_name[PARSE_STRING_MAX];
  struct stat test;
  struct utimbuf times;

  if(stat(TMP_DIR,&test) != -1)
    {
      fprintf(stderr,"Clean old tmp files, Please wait..\n");
      system("rm -rf .GETWWW.TMP.DIRECTORY/ ");
    }
  if ((file_scan = fopen (FILE_SCAN, "rb")) != NULL_CHAR)
    {
      mkdir (TMP_DIR, 0777);

      for (i = 0, k = 0; !feof (file_scan); i++)
	{
	  file_name[i] = fgetc (file_scan);

	  if( file_name[i] == '\n' && !strncmp(file_name,"##",2))
	    {
	      i=-1;
	      continue;
	    }
	  else if (file_name[i] == '\n' || feof (file_scan))
	    {
	      file_name[i] = NULL_CHAR;
	      for (j = 0; j < i; j++)
		{		/* 丮  */
		  if (file_name[j] == '/')
		    {
		      file_name[j] = NULL_CHAR;
		      strcpy (command, TMP_DIR);
		      strcat (command, file_name);
		      if ((stat (command, &test) == -1) && (errno == ENOENT))
			mkdir (command, 0777);
		      file_name[j] = '/';
		    }
		}
	      for (j = 0; file_name[j] != '/' && file_name[j]; j++)
		;
	      if (file_name[j] == '/')
		{
		  file_name[j] = NULL_CHAR;
		  for (l = 0; l < k; l++)
		    if (!strcmp (file_name, remove_dir[l]))
		      break;
		  if (l == k)
		    {
		      remove_dir[k]=new char[strlen(file_name)+1];
		      strcpy (remove_dir[k++], file_name);
		    }
		  file_name[j] = '/';
		}
	      if ((src = open (file_name, O_RDONLY)) == -1)
		{
		  i = -1;
		  continue;
		}
	      strcpy (command, TMP_DIR);
	      strcat (command, file_name);
	      if ((dest = creat (command, 0666)) == -1)
		{
		  fprintf (stderr, "destination file open error : %s\n", file_name);
		  return (-1);
		}
	      fprintf (stderr, "Moving file %s\n", file_name);
	      for (nbytes = 1; nbytes > 0;)
		{
		  nbytes = read (src, buffer, PARSE_STRING_MAX);
		  write (dest, buffer, nbytes);
		}
	      close (src);
	      close (dest);
	      stat (file_name, &test);
	      times.actime=test.st_atime;
	      times.modtime=test.st_mtime;
	      utime(command, &times);
	      unlink (file_name);
	      i = -1;
	    }
	}
      fclose (file_scan);

      for (j = 0; j < k; j++)
	{
	  strcpy (command, "rm -rf ");
	  strcat (command, remove_dir[j]);
	  fprintf (stderr, "remove directory %s\n", remove_dir[j]);
	  system (command);
	}
      strcpy (command, "cp -r -p ");
      strcat (command, TMP_DIR);
      strcat (command, "* .");
      fprintf (stderr, "copy new file from %s\n", TMP_DIR);
      system (command);

      strcpy (command, "rm -r ");
      strcat (command, TMP_DIR);
      fprintf (stderr, "now run (%s) command for tmp file clean\n", command);
      system (command);
      fprintf (stderr, "done\n");
    }
  else
    usage("#scan file not exist use -f option");
  return (0);
}
