%{
/*
 *  html.l - a lexer source for (f)lex.
 *
 *	Copyright (c) 1997 Naoya Tozuka <naochan@naochan.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You cannot use this program and its sources for commercial purposes.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
#include "y.tab.h"

#include "lexlib.h"
#define yywrap()  1

int  skip_space = 0;
int  is_debug_mode = 0;
extern char poolarea[];

/* #define poolh(x)    printf("/%s/", x) */
#define token(tk) \
  { if (is_debug_mode) fprintf(stderr, \
           "%-12s  is   \"%s\"      (yylval.type=%d yylval.addr=%s)\n",\
			#tk, yytext, yylval.type, poolat(yylval.addr)); \
    return (tk); }
%}
%start TAG AMPER PARAMETER COMMENT
HHHH  [-]+
%%
<INITIAL>"<"[Dd][Tt]">"      { token(D_TERM); }
<INITIAL>"<"[Dd][Dd]">"      { token(D_DEFINE); }
<INITIAL>"<"                 { BEGIN TAG; token(LP); }
<INITIAL>"</"[Pp]">"         { token(PARAGRAPH_END); }
<INITIAL>"</"                { BEGIN TAG; token(LP_SLASH); }

<TAG>">"	             	 { /* END TAG */ 
                               BEGIN INITIAL; token(RP); }

<INITIAL>"<!"{HHHH}	         { BEGIN COMMENT; }
<COMMENT>{HHHH}">"	         { BEGIN INITIAL; }

<INITIAL>"&"                 { BEGIN AMPER; token(AND); }
<AMPER>";"                   { /* END AMPER */
                               BEGIN INITIAL; token(SEMICOLON); }

<TAG>HTML|[Hh]tml		     token(HTML);
<TAG>HEAD|[Hh]ead		     token(HEAD);
<TAG>TITLE|[Tt]itle	         token(TITLE);
<TAG>BODY|[Bb]ody            { skip_space = 1 - skip_space; /* reverse it */
                               token(BODY); }
<TAG>PRE|[Pp]re              token(PRE);

<TAG>BR|[Bb]r                token(BR);
<TAG>HR|[Hh]r                token(HR);
<TAG>IMG|[Ii]mg              token(IMG);

<TAG>FONT|[Ff]ont            token(FONT);
<TAG>BIG|[Bb]ig              { token(BIG);   /* 0.96 */ }
<TAG>SMALL|[Ss]mall          { token(SMALL); /* 0.96 */ }
<TAG>A|a                     token(ANCHOR);
<TAG>P|p                     token(PARAGRAPH);
<TAG>CENTER|[Cc]enter        token(CENTER);
<TAG>[Hh]1                   token(H1);
<TAG>[Hh]2                   token(H2);
<TAG>[Hh]3                   token(H3);
<TAG>[Hh]4                   token(H4);
<TAG>[Hh]5                   token(H5);
<TAG>[Hh]6                   token(H6);
<TAG>SCRIPT|[Ss]cript        token(SCRIPT);

<TAG>TABLE|[Tt]able          token(TABLE);
<TAG>CAPTION|[Cc]aption      token(CAPTION);
<TAG>TR|[Tt]r                token(TR);
<TAG>TH|[Tt]h                token(TH);
<TAG>TD|[Tt]d                token(TD);

<TAG>DL|[Dd]l                token(D_LIST);
<TAG>OL|[Oo]l                token(O_LIST);
<TAG>UL|[Uu]l                token(U_LIST);
<TAG>LI|[Ll]i                token(LISTITEM);
<TAG>MENU|[Mm]enu            token(MENU);
<TAG>DIR|[Dd]ir              token(DIR);

<TAG>B|b                     token(BOLD);
<TAG>I|i                     token(ITALIC);
<TAG>U|u                     token(UNDERLINE);
<TAG>TT|[Tt]t                token(TYPEWRITER);

<TAG>BLOCKQUOTE|[Bb]lock[Qq]uote|BQ|[Bb]q     token(BLOCKQUOTE);
<TAG>ADDRESS|[Aa]ddress      token(ADDRESS);
<TAG>ISINDEX|[Ii]s[Ii]ndex   token(ISINDEX);
<TAG>EM|[Ee]m                token(EM);
<TAG>STRONG|[Ss]trong        token(STRONG);
<TAG>CODE|[Cc]ode            token(CODE);
<TAG>SAMP|[Ss]amp            token(SAMP);
<TAG>KBD|[Kk]bd              token(KBD);
<TAG>VAR|[Vv]ar              token(TAG_VAR);
<TAG>CITE|[Cc]ite            token(CITE);
<TAG>DFN|[Dd]fn              token(DFN);
<TAG>NOBR|[Nn]o[Bb]r         token(NOBR);
<TAG>WBR|[Ww][Bb]r           token(WBR);
<TAG>BASEFONT|[Bb]ase[Ff]ont token(BASEFONT);
<TAG>BLINK|[Bb]link          token(BLINK);

<TAG>FORM|[Ff]orm            token(FORM);
<TAG>INPUT|[Ii]nput          token(INPUT);
<TAG>SELECT|[Ss]elect        token(SELECT);
<TAG>OPTION|[Oo]ption        token(OPTION);
<TAG>TEXTAREA|[Tt]ext[Aa]rea token(TEXTAREA);

<TAG>[ \t\n]+                ;
<TAG>=                       { BEGIN PARAMETER; token(EQUAL); }
<TAG>[A-Za-z/]+              { yylval.addr = pool(yytext); 
                               token(VAR); }
<PARAMETER>\"[^\"]+\"            { yylval.addr = pool(yytext);
                               BEGIN TAG; token(PARAM);}
<PARAMETER>#[0-9A-Fa-f]{6}   { yylval.addr = pool(yytext); 
                               BEGIN TAG; token(PARAM);}
<PARAMETER>[^# \t\n>"]+      { yylval.addr = pool(yytext);
                               BEGIN TAG; token(PARAM);}

<AMPER>#[0-9]{2,3}           { yylval.addr = pool(yytext);
                               token(STRING); }
<AMPER>[A-Za-z]+             { yylval.addr = pool(yytext);
                               token(STRING); }
<AMPER>[^#;<A-Za-z]+         { yylval.addr = pool(yytext);
                               BEGIN INITIAL; token(CONTEXT); }

<INITIAL>{HHHH}              { yylval.addr = pool(yytext);
                               token(CONTEXT); }

<INITIAL>"TeX"               { token(TeX); }
<INITIAL>"LaTeX"             { token(LaTeX); }
<INITIAL>"LaTeX2e"           { token(LaTeX2e); }
<INITIAL>"pTeX"              { token(pTeX); }
<INITIAL>"pLaTeX"            { token(pLaTeX); }
<INITIAL>"pLaTeX2e"          { token(pLaTeX2e); }

<INITIAL>[ \t]               { if (skip_space > 0) { token(SPACE); } }
<INITIAL>\n                  { if (skip_space > 0) { token(RETURN); } }
<INITIAL>[^<> \t\n&]+        { yylval.addr = pool(yytext);
                               token(CONTEXT); }

<COMMENT>.                   ;

.                            { printf("(%c)", yytext[0]); }

%%


