#include <string.h>
#include <stdio.h>
#include "parseurl.h"

int freeurl(URL *url){
	free(url->schema);
	free(url->specific);
	free(url->hostinfo);
	free(url->hostname);
	free(url->path);
	free(url->filename);
}

/* Oh,no! We have to remove parent level like /parent/../index.html from path
   Let's do it recursively! */
char *remove2dot(char *path){
    char *ptr;
    char *tail;
    
    if(!(ptr=strstr(path,"../"))) return path;
    if(ptr==path) return path; /* How could it be? */
    tail=ptr+2;
    ptr--;
    *ptr=0;
    if(!(ptr=strrchr(path,'/'))) *path=0; else *ptr=0;
    path=strcat(path,tail);
    return remove2dot(path);
}

int parseurl(URL *url,char *s){
char *ss;
int len;

	len=strlen(s)+1;
	url->schema=(char*)malloc(len);*(url->schema)=0;
	url->specific=(char*)malloc(len);*(url->specific)=0;
	url->hostinfo=(char*)malloc(len);*(url->hostinfo)=0;
	url->hostname=(char*)malloc(len);*(url->hostname)=0;
	url->port=0;
	url->path=(char*)malloc(len);*(url->path)=0;
	url->filename=(char*)malloc(len);*(url->filename)=0;

	if(sscanf(s,"%[^:]:%s",url->schema,url->specific)!=2){
		strcpy(url->path,s);
		strcpy(url->schema,"");
		strcpy(url->specific,"");
	}else{
		switch(sscanf(url->specific,"//%[^/]%s",url->hostinfo,url->path)){
			case 1: strcpy(url->path,"/");break;
			case 2: break;
			default:
				freeurl(url);
				return(1);
				break;
		}
		sscanf(url->hostinfo,"%[^:]:%d",url->hostname,&(url->port));
	}

	if(ss=strstr(url->path,"#"))
		*ss=0;
	url->path=remove2dot(url->path);
	if(url->path[0]!='/'){
		if(!strncmp(url->path,"./",2))
			strcpy(url->filename,url->path+2);
		else
			strcpy(url->filename,url->path);
		strcpy(url->path,"");
	}
	if((ss=strrchr(url->path,'/'))&&(strcmp(ss,"/"))){
		strcpy(url->filename,ss+1);
		*(ss+1)=0;
	}
	return(0);
}


int freetag(TAG *tag){
	if(tag->name)free(tag->name);
	if(tag->href)free(tag->href);
	if(tag->src)free(tag->src);
	if(tag->content)free(tag->content);
	if(tag->equiv)free(tag->equiv);
}


#define SKIP(s,set)	while((*s)&&(strchr(set,*s)))s++;
#define SKIPN(s,set)	while((*s)&&(!strchr(set,*s)))s++;

int parsetag(TAG *tag,char *stag){
char *s,*e,*p;
int len;

	len=strlen(stag);s=stag+1; e=s;
	while(!strchr(" \t\r\n>",*e))e++; *e=0;
	tag->name=strdup(s);
	tag->href=NULL;
	tag->src=NULL;
	tag->content=NULL;
	tag->equiv=NULL;
	e++;
	while(e-stag<len){
		SKIP(e," \t\r\n>");s=e;
		SKIPN(e," \t\r\n=>");
		if((*e)=='='){
			*e=0;e++;
			SKIP(e," \t\r\n>");p=e;
			if(*p=='"'){
				p++;e++;
				SKIPN(e,"\">");
			}else
			if(*p=='\''){
				p++;e++;
				SKIPN(e,"'>");
			}else{
				SKIPN(e," \t\r\n>");
			}
			*e=0;e++;

			if(!strcasecmp(s,"href")){
				tag->href=strdup(p);
			}else
			if(!strcasecmp(s,"src")){
				tag->src=strdup(p);
			}else
			if(!strcasecmp(s,"content")){
				tag->content=strdup(p);
			}else
			if(!strcasecmp(s,"name")||!strcasecmp(s,"http-equiv")){
				tag->equiv=strdup(p);
			}
		}else{
			*e=0;e++;
		}
	}
}
