#include "getwww.h"

s_code status_code[] =
{ 
  100, "Continue",
  101, "Switching Protocols",
  200, "OK",
  201, "Created",
  202, "Accepted",
  203, "Non-Authoritative Information",
  204, "No Content",
  205, "Reset Content",
  206, "Partial Content",
  300, "Multiple Choices",
  301, "Moved Permanently",
  302, "Moved Temporarily",
  303, "See Other",
  304, "Not Modified",
  305, "Use Proxy",
  400, "Bad Request",
  401, "Unauthorized",
  402, "Payment Required",
  403, "Forbidden",
  404, "Not Found",
  405, "Method Not Allowed",
  406, "Not Acceptable",
  407, "Proxy Authentication Required",
  408, "Request Time-out",
  409, "Conflict",
  410, "Gone",
  411, "Length Required",
  412, "Precondition Failed",
  413, "Request Entity Too Large",
  414, "Request-URI Too Large",
  415, "Unsupported Media Type",
  500, "Internal Server Error",
  501, "Not Implemented",
  502, "Bad Gateway",
  503, "Service Unavailable",
  504, "Gateway Time-out",
  505, "HTTP Version not supported",
   -1, "unknown"
};

/****************************************************************************************************/

int Get_www::Main (char *www_file)
{
  int top=0;
  int this_html_scode;

  if (www_file[strlen (www_file) - 1] != '/' && *www_file)
    {
      int check_html_name;
      for (check_html_name = strlen (www_file) - 1; check_html_name > 0 && www_file[check_html_name] != '/'; check_html_name--)
	if (www_file[check_html_name] == '.')
	  break;
      if (www_file[check_html_name] != '.')
	{
	  this_html_scode = Get_file (www_file, HTML_HEAD);
	  if (this_html_scode != 200)
	    {
	      strcat (www_file, "/");
	      this_html_scode = Get_file (www_file, HTML_HEAD);
	      if (this_html_scode != 200)
		{
		  for (code_p = status_code; code_p->code_number != -1; code_p++)
		    if (code_p->code_number == this_html_scode)
		      break;
		  strcpy (err_msg, code_p->information);
		  strcat (err_msg, "\nfirst file not found err, wrong filename or network down, check filename and url");
		  usage (err_msg);
		}
	    }
	}
    }
  this_html_scode = Get_file (www_file, HTML_HEAD);
  if (this_html_scode != 200)
    {
      for (code_p = status_code; code_p->code_number != -1; code_p++)
	if (code_p->code_number == this_html_scode)
	  break;
      strcpy (err_msg, code_p->information);
      strcat (err_msg, "\nfirst file not found err, wrong filename or network down, check filename and url");
      usage (err_msg);
    }

  www_get_list[top]=new char[strlen(www_file)+1];
  strcpy (www_get_list[top], www_file);
  
  print_blank[0]=NULL_CHAR;
  
  level_meter[top] = 0;

  int new_tag;
  int level;

  for (top = 0; top >= 0; top--)
    {
      level = level_meter[top];
      if(!QUIET_MODE && !VERY_QUIET_MODE)
	{
	  int blank_count;
	  blank_count=strlen(print_blank);
	  strcpy(print_blank,"scanning: ");
	  strcat(print_blank,www_get_list[top]);
	  if(blank_count >strlen(print_blank))
	    {
	      for(int k=0;k<blank_count;k++)
		fprintf(stderr,"%c",' ');
	      fprintf(stderr,"\r");
	    }
	  fprintf(stderr,"%s\r",print_blank);
	}
      ref_number=0;
      Parse (www_get_list[top], level);

      if (ref_number== 0)
	continue;
      for (ref_number--, top++, this_level_first_tag = 1, new_tag_exist = 0 ; ref_number>= 0; delete[] ref_buf[ref_number--])
	{
	  for (int new_tag_list = exist_in_list = 0; new_tag_list < top && !exist_in_list; new_tag_list++)
	    if (!strcmp (ref_buf[ref_number], www_get_list[new_tag_list]))
	      {
		exist_in_list = 1;
		break;
	      }
	  if (exist_in_list)
	    continue;
	  
	  if (file_check.Want (ref_buf[ref_number]) || file_check.New (ref_buf[ref_number]))
	    continue;
	  if (level > FORCE_RECURSIVE_LEVEL)
	    if (level > RECURSIVE_LEVEL && file_check.Exist (ref_buf[ref_number]) && !USE_EXIST_FILE)
	      continue;
	  new_tag_exist = 1;
	  if (top > PARSE_FILE_MAX)
	    {
	      if (DEBUG_MODE)
		{
		  fprintf (file_debug, "==================================================\nerr log from www_main\n");
		  for (int i = 0; i < top; i++)
		    fprintf (file_debug, "stack %d : %s\n", i, www_get_list[i]);
		}
	      usage ("really too many html exist -- try -o [only get dir] option");
	    }
	  if (this_level_first_tag)
	    {
	      this_level_first_tag = 0;
	      delete[] www_get_list[--top];
	      level++;
	    }
	  www_get_list[top]=new char[strlen(ref_buf[ref_number])+1];
	  strcpy (www_get_list[top], ref_buf[ref_number]);
	  level_meter[top++] = level;
	}
      if (!new_tag_exist)
	  delete[] www_get_list[--top];
    }
}

/****************************************************************************************************/
int Get_www::Parse(char *www_file, int level)
{
  int file_type;

  if (level <= FORCE_RECURSIVE_LEVEL)
    {
      file_type = Get_file (www_file, HTML_BODY_DONT_CARE_IF_MODIFIED);
      if (file_type == -1)
	return (0);
      else
	return (Scan (www_file, file_type, 0));
    }
  else if (level <= RECURSIVE_LEVEL)
    {
      if (file_check.Exist (www_file))
	{
	  if (force_get_cgi_count)
	    {
	      int cgi_mark;
	      for (cgi_mark = 0; www_file[cgi_mark] != '?' && www_file[cgi_mark]; cgi_mark++)
		;
	      if (www_file[cgi_mark] == '?')
		{
		  int cgi_file;
		  int this_is_force_get_cgi = 0;

		  for (cgi_file = strlen (www_file) - 1; www_file[cgi_file] != '/' && cgi_file > 0; cgi_file--)
		    ;
		  if (www_file[cgi_file] == '/')
		    cgi_file++;
		  for (int k = 0; k<force_get_cgi_count; k++)
		    {
		      if (!strncmp (&www_file[cgi_file], FORCE_GET_CGI[k], strlen (FORCE_GET_CGI[k])))
			{
			  this_is_force_get_cgi = 1;
			  break;
			}
		    }
		  if (!this_is_force_get_cgi)
		    {
		      if (USE_EXIST_FILE)
			return (Scan (www_file, 0, USE_EXIST_FILE));
		      else
			return (0);
		    }
		}
	    }
	}
      file_type = Get_file (www_file, HTML_BODY);

      if (file_type == -1)
	return (0);
      else if (file_type == 304)
	{
	  if (USE_EXIST_FILE)
	    return (Scan (www_file, 0, USE_EXIST_FILE));
	  else
	    return (0);
	}
      else			/* 200 OK */
	return (Scan (www_file, file_type, 0));
    }
  else if (!file_check.Exist (www_file))
    {
      file_type = Get_file (www_file, HTML_BODY);
      if (file_type == -1)
	return (0);
      else			/* 200 OK */
	return (Scan (www_file, file_type, 0));
    }
  else if (USE_EXIST_FILE)
    return(Scan (www_file, 0, USE_EXIST_FILE));
  else
    return (0);
}


/****************************************************************************************************/
int Get_www::Get_file (char *www_file, int COMMAND)
{
  int i, j, k;
  int sock;
  int retry_count;
  long int code;
  long int html_size=0;
  int file_type = 0;
  long int bytes_read=0;
  long int bytes_read_total=0;
  struct stat test;
  char buffer[PARSE_FILE_MAX];
  FILE *file_temp, *file_error, *file_new;
  struct s_code *code_p;
  char RFC822[PARSE_STRING_MAX];
  char command[PARSE_STRING_MAX];

  time_t curtime;
  struct tm *loctime;

  static char ProtocolVersion[] = " HTTP/1.0\r\n";
  static char From[] = "";
  static char Accept[] = "Accept: */* ;q=0.1\r\n";
  static char Accept_encoding[] = "Accept-Encoding: x-compress; x-zip\r\n";
  static char Accept_language[] = "Accept-Language: en\r\n";
  static char User_Agent[] = "User-Agent: GETWWW-ROBOT/2.0\r\n"; /* dolphin@flower.comeng.chungnam.ac.kr */
  static char Referer[] = "";
  static char Authorization[] = "Authorization:  basic ";
  static char Charge_to[] = "";
  static char If_Modified_Since[] = "If-Modified-Since: ";
  static char Pragma[] = "Pragma: no-cache\r\n";
  static char end_mark[] = "\r\n";

  strcpy (RFC822, ProtocolVersion);
  strcat (RFC822, From);
  strcat (RFC822, Accept);
  strcat (RFC822, Accept_encoding);
  strcat (RFC822, Accept_language);
  strcat (RFC822, User_Agent);
  strcat (RFC822, Referer);

  if (AUTHORIZATION)
    {
      strcat (RFC822, Authorization);
      strcat (RFC822, LOGIN);
      strcat (RFC822, "\r\n");
    }
  strcat (RFC822, Charge_to);
  if ((COMMAND != HTML_HEAD) && !FORCE_GET && (COMMAND != HTML_BODY_DONT_CARE_IF_MODIFIED))
    {
      strcpy (buffer, www_file);
      www_scan_path (buffer, WWW_LOCAL);
      if (stat (buffer, &test) != -1)
	{
	  if (test.st_size != 0)
	    {
	      loctime = gmtime (&test.st_mtime);
	      strftime (buffer, PARSE_FILE_MAX, "%A, %d-%b-%y %H:%M:%S GMT\r\n", loctime);
	      strcat (RFC822, If_Modified_Since);
	      strcat (RFC822, buffer);
	    }
	  else
	    unlink (buffer);
	}
    }
  if (FORCE_GET)
    strcat (RFC822, Pragma);
  strcat (RFC822, end_mark);

  if (COMMAND == HTML_HEAD)
    {
      for (k = 0; k < 3; k++)
	{
	  if ((sock = connect_host ()) == -1)
	    continue;

	  if (USING_PROXY)
	    {
	      strcpy (command, "HEAD ");
	      strcat (command, URL_PORT);
	    }
	  else
	    strcpy (command, "HEAD /");

	  strcat (command, www_file);
	  strcat (command, RFC822);
	  write_command (sock, command, strlen (command));
	  for (i = 0, bytes_read = 0; i < 3 && bytes_read <= 0; i++)
	    bytes_read = read (sock, buffer, PARSE_FILE_MAX);
	  close (sock);
	  if (bytes_read == 0)
	    {
	      sleep (10);
	      continue;
	    }
	  for (i = 0; buffer[i] != ' '; i++)
	    ;
	  for (j = ++i; buffer[i] >= '0' && buffer[i] <= '9'; i++)
	    ;
	  buffer[i] = NULL_CHAR;
	  code = atol (&buffer[j]);
	  for (code_p = status_code; code_p->code_number != -1; code_p++)
	    if (code_p->code_number == code)
	      return (code);
	  return (-1);
	}
      return (-1);
    }
  else
    {
      for (k = 0; k < 3; k++)
	{
	  if ((sock = connect_host ()) == -1)
	    continue;
	  if (!QUIET_MODE && !VERY_QUIET_MODE)
	    {
	      if(*print_blank)
		{
		  for(i=0;print_blank[i];i++)
		    fprintf(stderr,"%c",' ');
		  fprintf(stderr,"\r");
		}
	      print_blank[0]=NULL_CHAR;
	    }
	  if(!VERY_QUIET_MODE)
	    fprintf (stderr, "getting : %s :",*www_file ? www_file : "index.html");

	  if (USING_PROXY)
	    {
	      strcpy (command, "GET ");
	      strcat (command, URL_PORT);
	    }
	  else
	    strcpy (command, "GET /");
	  strcat (command, www_file);
	  strcat (command, RFC822);
	  if (DEBUG_MODE)
	    {
	      file_new = fopen (FILE_NEW, "a+b");
	      fprintf (file_new, "----------------------------------------------------------------------\n");
	      fprintf (file_new, "%s\n", www_file);
	      fprintf (file_new, "---------------------\n");
	      fprintf(file_new,command);
	      fclose (file_new);
	    }
	  write_command (sock, command, strlen (command));
	  for (i = 0, bytes_read = 0; i < 3 && bytes_read <= 0; i++)
	    bytes_read = read (sock, buffer, PARSE_FILE_MAX);
	  if (bytes_read == 0)
	    {
	      if (!VERY_QUIET_MODE)
		fprintf (stderr, "no response from remote host retry\n");
	      close (sock);
	      sleep (10);
	      continue;
	    }
	  
	  if (DEBUG_MODE)
	    {
	      file_new = fopen (FILE_NEW, "a+b");
	      for (j = 0; strcmp(&buffer[j],"\n\n") && strcmp (&buffer[j], "\r\n\r\n") && j < bytes_read; j++)
		fputc (buffer[j], file_new);
	      fclose (file_new);
	    }
	  for (i = 0; buffer[i] != ' '; i++)	/* 200 OK ˻ */
	    ;
	  for (j = ++i; buffer[i] >= '0' && buffer[i] <= '9'; i++)
	    ;
	  buffer[i] = NULL_CHAR;

	  code = atol (&buffer[j]);
	  if (code == 304)
	    {
	      if(!VERY_QUIET_MODE)
		fprintf (stderr, "file not modified\n");
	      close (sock);
	      return (code);
	    }
	  else if (code != 200)
	    {
	      for (code_p = status_code; code_p->code_number != -1; code_p++)
		if (code_p->code_number == code)
		  break;
	      if (!VERY_QUIET_MODE)
		fprintf (stderr, " %d : %s\n", code, code_p->information);
	      file_error = fopen (FILE_ERROR, "a+b");
	      fprintf (file_error, "%s : %d : %s\n", www_file, code, code_p->information);
	      fclose (file_error);
	      close (sock);
	      return (-1);
	    }
	  for (html_size = 0, file_type = 0; (i < bytes_read) && 
	       strncmp (&buffer[i], "\r\n\r\n", 4) && strncmp(&buffer[i],"\n\n",2); i++)
	    {
	      if (!strncasecmp (&buffer[i], "content-length:", 15))
		{
		  char temp_buffer_i;
		  i += 15;
		  for (; buffer[i] == ' '; i++)
		    ;
		  for (j = i; buffer[i] >= '0' && buffer[i] <= '9'; i++)
		    ;
		  temp_buffer_i=buffer[i];
		  buffer[i] = NULL_CHAR;
		  html_size = atol (&buffer[j]);
		  buffer[i]=temp_buffer_i;
		  i--;
		}
	      else if (!strncasecmp (&buffer[i], "content-type:", 13))
		{
		  i += 13;
		  for (; buffer[i] == ' '; i++)
		    ;
		  if (!strncasecmp (&buffer[i], "text/html", 9))
		    file_type = 1;
		}
	    }
	  if ((file_temp = fopen (FILE_TEMP, "w+b")) == NULL_CHAR)
	    {
	      close (sock);
	      usage ("temp file make error");
	    }
	  if(!strncmp(&buffer[i],"\r\n\r\n",4))
	    i +=4;
	  else if(!strncmp(&buffer[i],"\n\n",2))
	    i +=2;
	  for (; i < bytes_read; i++)
	    {
	      bytes_read_total++;
	      fputc (buffer[i], file_temp);
	    }
	  for (bytes_read = 1,retry_count=0; bytes_read > 0 && retry_count <10;)
	    {
	      bytes_read = read (sock, buffer, PARSE_FILE_MAX);
	      bytes_read_total +=bytes_read;
	      for (i = 0; i < bytes_read; i++)
		fputc (buffer[i], file_temp);
	      if(html_size !=0 && (bytes_read_total != html_size) && !bytes_read)
		{		
		  bytes_read=1;
		  retry_count++;
		  sleep(10);
		}
	    }
	  fclose (file_temp);
	  close (sock);

	  if ((stat (FILE_TEMP, &test) == -1) && (errno == ENOENT))
	    {
	      unlink (FILE_TEMP);
	      if (!VERY_QUIET_MODE)
		fprintf (stderr, "file get uncompletely, retry \n");
	      continue;
	    }
	  if (test.st_size == 0)
	    {
	      unlink (FILE_TEMP);
	      if (!VERY_QUIET_MODE)
		fprintf (stderr, "file size 0 retry\n");
	      continue;
	    }
	  if (html_size != 0 && (test.st_size != html_size))
	    {
	      unlink (FILE_TEMP);
	      if (!VERY_QUIET_MODE)
		fprintf (stderr, "file size error retry\n");
	      continue;
	    }

	  if (!VERY_QUIET_MODE)
	    fprintf (stderr, "done : ");
	  return (file_type);
	}
      file_error = fopen (FILE_ERROR, "a+b");
      fprintf (file_error, "%s : %s\n", www_file, "file getting error");
      fclose (file_error);
      unlink (FILE_TEMP);
      return (-1);
    }
}

/****************************************************************************************************/
void Get_www::Set_ref_buf(char *www_file)
{
  ref_buf[ref_number]=new char[strlen(www_file)+1];
  strcpy(ref_buf[ref_number++],www_file);
}


/****************************************************************************************************/
void Get_www::Set_force_get_cgi(char *www_file)
{
  FORCE_GET_CGI[force_get_cgi_count]=new char[strlen(www_file)+1];
  strcpy(FORCE_GET_CGI[force_get_cgi_count++],www_file);
}


