#include "getwww.h"

#define WWW_CREAT 2
#define CUT_BACKWARD  0
#define CUT_FORWARD   1

/****************************************************************************************************/
int Get_www::Scan (char *www_file_orig, int file_type, int USE_EXIST_FILE)
{
  int i, j, in_treat;
  int b;
  int binary_count = 0;
  char c;
  char index[PARSE_STRING_MAX];
  char www_file[PARSE_STRING_MAX];
  FILE *file_src, *file_dest, *file_scan, *file_error;
  struct stat test;

  strcpy (www_file, www_file_orig);
  www_scan_path (www_file, WWW_CREAT);

  file_scan = fopen (FILE_SCAN, "a+b");
  fprintf (file_scan, "%s\n", www_file);
  fclose (file_scan);

  if (DEBUG_MODE)
    {
      fprintf (file_debug, "--------------------------------------------------------------------------------\n");
      fprintf (file_debug, "%s\n", www_file);
    }
  if (!USE_EXIST_FILE)
    {
      if ((file_src = fopen (FILE_TEMP, "rb")) == NULL_CHAR)
	{
	  if (!VERY_QUIET_MODE)
	    fprintf (stderr, " get file not exist\n");
	  file_error = fopen (FILE_ERROR, "a+b");
	  fprintf (file_error, "%s : %s\n", www_file, "get file not exist in harddisk");
	  fclose (file_error);
	  return (0);
	}
      for (i = 0; i < strlen (www_file); i++)
	{			/* 丮  */
	  if (www_file[i] == '/')
	    {
	      www_file[i] = NULL_CHAR;
	      if ((stat (www_file, &test) == -1) && (errno == ENOENT))
		mkdir (www_file, 0777);
	      www_file[i] = '/';
	    }
	}
      if ((file_dest = fopen (www_file, "w+b")) == NULL_CHAR)
	{
	  if (!VERY_QUIET_MODE)
	    fprintf (stderr, " cound not make dest file\n");
	  file_error = fopen (FILE_ERROR, "a+b");
	  fprintf (file_error, "%s : %s\n", www_file, "could not make dest file");
	  fclose (file_error);
	  return (0);
	}
      if (file_check.Binary (www_file))	/*  || (file_type != 1))  */
	{
	  if (!VERY_QUIET_MODE)
	    fprintf (stderr, "writing binary file : ", www_file);
	  for (c = fgetc (file_src); !feof (file_src); c = fgetc (file_src))
	    fputc (c, file_dest);
	  fclose (file_src);
	  fclose (file_dest);
	  unlink (FILE_TEMP);
	  if (!VERY_QUIET_MODE)
	    fprintf (stderr, "done\n", www_file);

	  return (0);
	}
    }
  else
    {
      if (file_check.Binary (www_file))
	return (0);
      if ((file_src = fopen (www_file, "rb")) == NULL_CHAR)
	{
	  strcpy (index, "could not open source file ");
	  strcat (index, www_file);
	  usage (index);
	}
    }
  if (!USE_EXIST_FILE && !VERY_QUIET_MODE)
    fprintf (stderr, "writing html file : ", www_file);

  for (in_treat = 0, i = 0;;)
    {
      c = fgetc (file_src);
      if (isascii (c) && iscntrl (c) && !isspace (c))
	binary_count++;
      if (binary_count > 5)
	{
	  if (!USE_EXIST_FILE)
	    {
	      for (j = 0; j < i; j++)
		fputc (index[j], file_dest);
	      fputc (c, file_dest);
	      for (c = fgetc (file_src); !feof (file_src); c = fgetc (file_src))
		fputc (c, file_dest);
	      if (!VERY_QUIET_MODE)
		fprintf (stderr, " not it is binary file : ");
	    }
	  break;
	}
      if (feof (file_src))
	{
	  if (!USE_EXIST_FILE)
	    for (j = 0; j < i; j++)
	      fputc (index[j], file_dest);
	  break;
	}
      if (in_treat)
	{
	  index[i++] = c;
	  if (i > 254)
	    {
	      if (!USE_EXIST_FILE)
		for (j = 0; j < i; j++)
		  fputc (index[j], file_dest);
	      in_treat = i = 0;
	    }
	  else if (c == '>')
	    {
	      index[i] = NULL_CHAR;
	      Scan_url (www_file, index);
	      if (!USE_EXIST_FILE)
		fputs (index, file_dest);
	      in_treat = i = 0;
	    }
	}
      else if (c == '<')
	{
	  in_treat = 1;
	  i = 0;
	  index[i++] = c;
	}
      else if (!USE_EXIST_FILE)
	fputc (c, file_dest);
    }
  fclose (file_src);
  unlink (FILE_TEMP);
  if (!USE_EXIST_FILE)
    fclose (file_dest);
  if (!USE_EXIST_FILE && !VERY_QUIET_MODE)
    fprintf (stderr, "done\n", www_file);
  return (0);
}


/****************************************************************************************************/
int Get_www::Scan_url (char *www_file, char *index_orig)
{
  int i, j, k, l, m, n, p, q;
  int first, end;
  int src_exist = 0;
  int java_exist = 0;
  int is_directory;
  int path_depth;
  int this_file_depth;
  char new_get_file_name[PARSE_STRING_MAX];
  char *ptr;
  char java_buf[PARSE_STRING_MAX];
  char index[PARSE_STRING_MAX];
  char src[PARSE_STRING_MAX];
  char dest[PARSE_STRING_MAX];
  char cur_path[PARSE_STRING_MAX];
  FILE *file_error;
  struct s_code code_p;

  if (strlen (index_orig) <= 7)	/* <hr>, <HTML>, <head>, <TITLE>, </th>, </tr> </applet>  */
    return (0);
  if (index_orig[1] == '!')	/* <!xxx> */
    return (0);

  if (DEBUG_MODE)
    fprintf (file_debug, "    tag : %s\n", index_orig);

  for (i = strlen (www_file) - 1; i > 0 && www_file[i] != '/'; i--)	/* aa/aa.html aa.html */
    ;
  if (www_file[i] == '/')
    {
      www_file[i] = NULL_CHAR;
      strcpy (cur_path, www_file);
      strcat (cur_path, "/");
      www_file[i] = '/';
    }
  else
    cur_path[0] = NULL_CHAR;

  for(path_depth=0,i=0;cur_path[i];i++)
    if(cur_path[i]=='/')
      path_depth++;

  strcpy (index, index_orig);

  for (i = 0; index[i] && index[i] != '>'; i++)
    {
      src_exist = 0;
      java_exist = 0;
      is_directory = 0;
      if (index[i] == '=')
	{
	  if ((j = Cut_blank (index, i - 1, CUT_BACKWARD)) == -1)
	    continue;

	  if (j > 2 && !strncasecmp (&index[j - 2], "src", 3))
	    src_exist = 1;
	  else if (j > 2 && !strncasecmp (&index[j - 2], "url", 3))
	    src_exist = 1;
	  else if (j > 3 && !strncasecmp (&index[j - 3], "href", 4))
	    {
	      for (l = 1; l < j - 6 && j > 8; l++)
		if (!strncasecmp (&index[l], "base", 4))
		  {
		    index_orig[1] = '!';
		    index_orig[2] = NULL_CHAR;
		    strcpy (&index_orig[2], &index[1]);
		    return (0);
		  }
	      src_exist = 1;
	    }
	  else if (j > 10 && !strncasecmp (&index[j - 9], "background", 10))
	    src_exist = 1;
	  else if (j > 8 && !strncasecmp (&index[j - 7], "codebase", 8))
	    {
	      for (k = i + 1; index[k] != '>' && index[k] != '='; k++)
		;
	      if (index[k] == '>')
		continue;
	      if ((l = Cut_blank (index, k - 1, CUT_BACKWARD)) == -1)
		continue;
	      if (!strncasecmp (&index[l - 3], "code", 4))
		java_exist = 1;
	    }

	  if (src_exist)
	    {
	      first = i;
	      if ((j = Cut_blank (index, i + 1, CUT_FORWARD)) == -1)	/* <src = " http:/ /aa/bb/cc/index\n.html "> */
		continue;	/*   ó Ѵ. */

	      if (index[j] == '"')
		{		/* =  " ڰ    . */
		  first = j;
		  if ((j = Cut_blank (index, j + 1, CUT_FORWARD)) == -1)
		    continue;
		}
	    }
	  else if (java_exist)
	    {
	      strcpy (java_buf, index);
	      first = i;
	      if ((j = Cut_blank (java_buf, i + 1, CUT_FORWARD)) == -1)
		continue;
	      if (index[j] == '"')
		{
		  first = j;
		  if ((j = Cut_blank (java_buf, j + 1, CUT_FORWARD)) == -1)
		    continue;
		}
	      for (p = j; java_buf[p] != ' ' && java_buf[p] != '"' && java_buf[p] != '\t' && index[j] != '>'; p++)
		;
	      q = p;
	      if (java_buf[p - 1] != '/')
		java_buf[p++] = '/';
	      if ((m = Cut_blank (java_buf, k + 1, CUT_FORWARD)) == -1)
		continue;
	      if (java_buf[m] == '"')
		if ((m = Cut_blank (java_buf, m + 1, CUT_FORWARD)) == -1)
		  continue;
	      for (; index[m] != ' ' && index[m] != '"'; m++)
		{
		  if (index[m] == '\n')
		    continue;
		  java_buf[p++] = index[m];
		}
	      for (; index[q]; q++)
		java_buf[p++] = index[q];
	      java_buf[p] = index[0] = NULL_CHAR;
	      strcpy (index, java_buf);
	    }
	  else
	    continue;

	  for (k = 0, src[k] = NULL_CHAR; index[j] != '"' && index[j] != ' ' && index[j] != '>' && index[j] != '\t' && index[j] != '#'; j++)
	    {			/* 鿭 \n  ƴ  ڸ  */
	      if (index[j] == '\n')
		continue;
	      src[k++] = index[j];
	      src[k] = NULL_CHAR;
	    }
	  end = j;

	  if (!strncasecmp (src, "ftp://", 6))	/* ɷ  tag  src="ftp://a.b.c.d" */
	    continue;
	  if (!strncasecmp (src, "mailto:", 7))		/* ɷ  tag  src="mailto:mail@a.b.c.d" */
	    continue;
	  if (!strncasecmp (src, "gopher://", 9))	/* href="gopher://gopher.vt.edu" */
	    continue;
	  if (!strncasecmp (src, "news://", 7))		/* href="news://comp.www.w3" */
	    continue;
	  if (!strncasecmp (src, "news:", 4))		/* href="news:comp.www.w3" */
	    continue;
	  if (!strncasecmp (src, "nntp://", 7))		
	    continue;					
	  if (!strncasecmp (src, "telnet://", 9))	
	    continue;					
	  if (!strncasecmp (src, "wais://", 7))		
	    continue;					
	  if (!strncasecmp (src, "file://", 7))		
	    continue;					
	  if (!strncasecmp (src, "prospero://", 11))	
	    continue;

	  if(!strncasecmp(src,"http:",5) && strncasecmp(&src[5],"//",2))
	     { /* Markus Schoepflin (Hiwi -06.96) <schoepf@telematik.informatik.uni-karlsruhe.de> */
	       char src_temp[PARSE_STRING_MAX];
	       strcpy(src_temp,&src[5]);
	       strcpy(src,src_temp);
	     }

	  if (src[0] == NULL_CHAR && index[end] == '#')		/*  = #1 =" #1" =http://a.b.c.d/aa.html#1 bb.html#2 */
	    return (0);

	  if (src[strlen (src) - 1] == '/' || !strcmp (&src[strlen (src) - 11], "/index.html"))
	    is_directory = 1;
	  k = www_scan_path (src, WWW_LOCAL);

	  if (!strncasecmp (src, "javascript", strlen ("javascript")))
	    {
	      for (ptr = src; *ptr; ptr++)
		if (!strncmp (ptr, "/index.html", strlen ("/index.html")))
		  {
		    *ptr = NULL_CHAR;
		    break;
		  }
	    }
	  if(!strncmp(src,"../",3))
	    {
	      for(j=0,this_file_depth=0;src[j];j++)
		if(!strncmp(&src[j],"../",3))
		  this_file_depth++;
	      if(this_file_depth >path_depth)
		{
		  file_error = fopen (FILE_ERROR, "a+b");
		  fprintf (file_error, "   : %s %s%s\n",src ,"invalid path name in ", www_file);
		  fclose (file_error);
		  j=(this_file_depth-path_depth)*3;
		  strcpy(dest,&src[j]);
		  strcpy(src,dest);
		  dest[0]=NULL_CHAR;
		}
	    }
	  if (k == 1)
	    return (0);

	  else if (k == 2)
	    {
	      for (j = 0, l = 0; cur_path[j]; j++)
		{
		  if (cur_path[j] == '/')
		    {
		      if (!strncasecmp (cur_path, src, j + 1))
			{	/* aa/bb/cc/aa.html    aa/bb/dd/aa.html aa/cc/aa.html bb/aa.html */
			  l = j + 1;
			  continue;
			}
		      break;
		    }
		}
	      dest[0] = NULL_CHAR;
	      if (l == 0)
		m = p = 0;
	      else
		m = p = l;
	      for (n = 0; cur_path[m]; m++)
		{
		  if (cur_path[m] == '/')
		    {
		      strcpy (&index_orig[first + 1], "../");
		      strcpy (&dest[n], "../");
		      first += 3;
		      n += 3;
		    }
		}
	      strcpy (&index_orig[first + 1], &src[p]);
	      strcat (&index_orig[first + strlen (&src[p])], &index[end]);
	      strcat (&dest[n], &src[p]);
	      if (java_exist)
		{
		  strcpy (index, index_orig);
		  for (l = first + 1; index[l] != '"' && index[l] != ' '; l++)
		    ;
		  for (m = l; index[m] != '/'; m--)
		    ;
		  for (; index[l]; l++)
		    index_orig[m++] = index[l];
		  index_orig[m] = NULL_CHAR;
		}
	    }
	  else
	    {
	      if (!strncmp (src, "./", 2))
		{
		  strcpy (&index_orig[first + 1], &src[2]);
		  strcat (&index_orig[first + strlen (&src[2])], &index[end]);
		  strcpy (dest, &src[2]);
		  if (java_exist)
		    {
		      strcpy (index, index_orig);
		      for (l = first + 1; index[l] != '"' && index[l] != ' '; l++)
			;
		      for (m = l; index[m] != '/'; m--)
			;
		      for (; index[l]; l++)
			index_orig[m++] = index[l];
		      index_orig[m] = NULL_CHAR;
		    }
		}
	      else
		{
		  strcpy (&index_orig[first + 1], src);
		  strcat (&index_orig[first + strlen (src)], &index[end]);
		  strcpy (dest, src);
		  if (java_exist)
		    {
		      strcpy (index, index_orig);
		      for (l = first + 1; index[l] != '"' && index[l] != ' '; l++)
			;
		      for (m = l; index[m] != '/'; m--)
			;
		      for (; index[l]; l++)
			index_orig[m++] = index[l];
		      index_orig[m] = NULL_CHAR;
		    }
		}
	    }
	  if (DEBUG_MODE)
	    fprintf (file_debug, "        : %s\n", index_orig);
	  if(!strcmp(&dest[strlen(dest)-13],"../index.html"))
	    is_directory=1;

	  k = www_scan_path (dest, WWW_REMOTE);

	  if (k == 1)
	    return (0);
	  else if (!strncmp (dest, "../", 3))
	    {
	      for (l = 0, m = strlen (cur_path) - 1; !strncmp (&dest[l], "../", 3);)
		{		/*      aa/bb/cc/    ../../aa.html */
		  l += 3;
		  for (m--; cur_path[m] != '/'; m--)
		    if (m == 0)
		      {
			strcpy (new_get_file_name, &dest[l]);
			if (!is_directory &&
			    new_get_file_name[strlen (new_get_file_name) - 1] == '/')
			  {
			    new_get_file_name[strlen (new_get_file_name) - 1] = NULL_CHAR;
			    p = get_www.Get_file (new_get_file_name, HTML_HEAD);
			    if (p == 302) 
			      strcat(new_get_file_name,"/"); /* Keith.Briggs@physics.uwa.edu.au */
			    else
			      {
				for (n = 0; strncmp (&index_orig[n], "/index.html", 11) && index_orig[n] != '>'; n++)
				  ;
				if (index_orig[n] == '/')
				  {
				    strcpy (index, index_orig);
				    index_orig[n] = NULL_CHAR;
				    strcat (index_orig, &index[n + 11]);
				  }
				if (p != 200)
				  {
				    file_error = fopen (FILE_ERROR, "a+b");
				    fprintf (file_error, "%s : %s %s\n", new_get_file_name, "could not test file error in", www_file);
				    fclose (file_error);
				  }
			      }
			  }
			if (DEBUG_MODE)
			  fprintf (file_debug, "        : %s\n", new_get_file_name);
			get_www.Set_ref_buf(new_get_file_name);
			return (0);
		      }
		}
	      if (cur_path[0] == NULL_CHAR)
		m = -1;
	      strncpy (new_get_file_name, cur_path, m + 1);
	      new_get_file_name[m + 1] = NULL_CHAR;
	      strcat (&new_get_file_name[m + 1], &dest[l]);
	    }
	  else
	    {
	      strcpy (new_get_file_name, cur_path);
	      new_get_file_name[strlen (cur_path)] = NULL_CHAR;
	      strcat (&new_get_file_name[strlen (cur_path)], dest);
	    }
	  if (!is_directory &&
	      new_get_file_name[strlen (new_get_file_name) - 1] == '/')
	    {
	      new_get_file_name[strlen (new_get_file_name) - 1] = NULL_CHAR;
	      p = get_www.Get_file (new_get_file_name, HTML_HEAD);
	      if (p == 302)
		strcat(new_get_file_name,"/"); /* Keith.Briggs@physics.uwa.edu.au */
	      else
		{
		  for (n = 0; strncmp (&index_orig[n], "/index.html", 11) && index_orig[n] != '>'; n++)
		    ;
		  if (index_orig[n] == '/')
		    {
		      strcpy (index, index_orig);
		      index_orig[n] = NULL_CHAR;
		      strcat (index_orig, &index[n + 11]);
		    }
		  if (p != 200)
		    {
		      file_error = fopen (FILE_ERROR, "a+b");
		      fprintf (file_error, "%s : %s %s\n", new_get_file_name, "could not test file error in", www_file);
		      fclose (file_error);
		    }
		}
	    }
	  if (DEBUG_MODE)
	    fprintf (file_debug, "        : %s\n", new_get_file_name);
	  get_www.Set_ref_buf(new_get_file_name);
	  return (0);
	}
    }
}


/*  ˻Ϸ ڿ  鿭 Ѵ. */
/****************************************************************************************************/
int Get_www::Cut_blank (char *index, int i, int forward)
{
  int j;

  if (forward)
    {
      for (j = i; index[j] == ' ' || index[j] == '\n' || index[j] == '\t'; j++)
	;
      if (index[j] == '>')
	return (-1);
      return (j);
    }
  else
    {
      for (j = i; index[j] == ' ' || index[j] == '\n' || index[j] == '\t'; j--)
	;
      if (index[j] == '<')
	return (-1);
      return (j);
    }
}



/****************************************************************************************************/
int www_scan_path (char *www_file_orig, int make_local_file)
{
  int i, j, path = 0;
  int cgi = 0;
  char www_file[PARSE_STRING_MAX];
  char www_file_2[PARSE_STRING_MAX];

  if (!strncasecmp (www_file_orig, "http://", 7))
    {
      path = 2;
      if (!strncasecmp (www_file_orig, URL_PORT, (strlen (URL_PORT) - 1)))
	strcpy (www_file, &www_file_orig[strlen (URL_PORT)-1]);
      else if (!strncasecmp (www_file_orig, URL, (strlen (URL) - 1)))
	strcpy (www_file, &www_file_orig[strlen (URL)-1]);
      else if (!strncasecmp (www_file_orig, URL_NUMBER_PORT, (strlen (URL_NUMBER_PORT) - 1)))
	strcpy (www_file, &www_file_orig[strlen (URL_NUMBER_PORT)-1]);
      else if (!strncasecmp (www_file_orig, URL_NUMBER, (strlen (URL_NUMBER) - 1)))
	strcpy (www_file, &www_file_orig[strlen (URL_NUMBER)-1]);
      else
	return (1);
    }
  else
    strcpy (www_file, www_file_orig);

  if(www_file[0]==NULL_CHAR)
    {
      strcpy(www_file_orig,"index.html");
      return(2);
    }
  else if (www_file[0] == '/')
    {
      strcpy (www_file_2, &www_file[1]);
      strcpy (www_file, www_file_2);
      path = 2;
    }
  if (make_local_file == WWW_LOCAL || make_local_file == WWW_CREAT)
    {
      int cgi_start=0;
      for (i = j = 0, www_file_2[j] = NULL_CHAR; www_file[i]; i++)
	{
	  if (www_file[i] == '?')
	    {
	      strcpy (&www_file_2[j], "L----QUESTION.");
	      j += 14;
	      www_file_2[j] = NULL_CHAR;
	      cgi_start=1;
	    }
	  else if (www_file[i] == '/' && cgi_start)
	    {
	      strcpy (&www_file_2[j], "L----SLASH---.");
	      j += 14;
	      www_file_2[j] = NULL_CHAR;
	    }
	  else if (www_file[i] == '&')
	    {
	      strcpy (&www_file_2[j], "L----AMPER---.");
	      j += 14;
	      www_file_2[j] = NULL_CHAR;
	    }
	  else
	    {
	      www_file_2[j++] = www_file[i];
	      www_file_2[j] = NULL_CHAR;
	    }
	}

      strcpy (www_file, www_file_2);
      for (i = 0; www_file[i]; i++)	/*  ͸  */
	;
      if ((i == 0) || (i > 0 && www_file[i - 1] == '/'))
	{			/* NULL_CHAR   */
	  strcat (www_file, "index.html");
	  strcpy (www_file_orig, www_file);
	  return (path);
	}
      else
	{
	  for (j = i; j > 0; j--)
	    {
	      if (www_file[j] == '.' || www_file[j]==';')
		{
		  strcpy (www_file_orig, www_file);
		  return (path);
		}
	    }
	  if (make_local_file != WWW_CREAT)
	    strcat (www_file, "/index.html");
	  strcpy (www_file_orig, www_file);
	  return (path);
	}
    }
  else
    {
      for (i = j = 0; www_file[i]; i++)
	{
	  if (!strncmp (&www_file[i], "L----", 5))
	    {
	      if (!strncmp (&www_file[i], "L----QUESTION.", 14))
		www_file_2[j++] = '?';
	      else if (!strncmp (&www_file[i], "L----AMPER---.", 14))
		www_file_2[j++] = '&';
	      else if (!strncmp (&www_file[i], "L----SLASH---.", 14))
		www_file_2[j++] = '/';
	      i += 13;
	    }
	  else
	    www_file_2[j++] = www_file[i];
	}
      www_file_2[j] = NULL_CHAR;
      strcpy (www_file, www_file_2);

      if (strlen (www_file) >= 11)
	{
	  if (!strncmp (&www_file[strlen (www_file) - 11], "/index.html", 11))
	    {
	      strncpy (www_file_orig, www_file, strlen (www_file) - 10);
	      www_file_orig[strlen (www_file) - 10] = NULL_CHAR;
	      return (0);
	    }
	  strcpy (www_file_orig, www_file);
	  return (0);
	}
      else
	{
	  strcpy (www_file_orig, www_file);
	  return (0);
	}
    }
}
