/*
 *  KOTETU: HTML parse procedures
 *      by k-chinen@is.aist-nara.ac.jp, 1994, 1995, 1996, 1997
 *
 * $Id: parse.c,v 1.12 1996/11/24 14:46:47 k-chinen Exp k-chinen $
 */
/*
 * Warnning: Apply only HTML, don't apply other.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>

#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif

#include <pthread.h>
#include <errno.h>

#include "parse.h"


#define _DEBUG_PARSE


#ifdef  DEBUG_PARSE
#ifndef TRACE_PARSE
#define TRACE_PARSE
#endif
#endif

#define TAG_NAME_SIZE   128
#define TAG_SIZE        1024


#ifndef STANDALONE_HTML_PARSE_TEST

#include "wcol.h"

#else /* STANDALONE_HTML_PARSE_TEST */

#ifndef STRING_SIZE
#define STRING_SIZE  512
#endif

#ifdef __STDC__
#include <stdarg.h>
int
Trace(const char *fmt, ...);
#else
#include <varargs.h>
int
Trace();
#endif


#define Error Trace

int _trace_flag_=0;

/*
 * Trace routine for debug
 */
#ifdef __STDC__
int
Trace(const char *fmt, ...)
#else
int
Trace(va_alist)
va_dcl
#endif
{
#ifndef __STDC__
    char *fmt;
#endif
    va_list args;

#ifdef __STDC__
    va_start(args, fmt);
#else
    va_start(args);
    fmt = (char*) va_arg(args, char *);
#endif
    if(_trace_flag_) {
        vprintf(fmt, args);
        fflush(stdout);
    }

    va_end(args);
}

#endif /* STANDALONE_HTML_PARSE_TEST */




/*
 * Hash function
 */
static
int
hash(st)
char *st;
{
    register unsigned long k;
    register unsigned char *p;

    p = (unsigned char*) st;
    k = 0;
    while(*p) {
        k += *p++;
    }

    return (int)(k%NUM_SLOT);
}


/*
 * New cell with empry value
 */
static
pa_cell*
new_cell()
{
    register pa_cell *p;
    p = (pa_cell*) MALLOC(sizeof(pa_cell));
    p->val = NULL;
    p->next = NULL;

    return p;
}

static
pa_cell*
new_str_cell(st)
char *st;
{
    register pa_cell *p;

    p = new_cell();
    STRDUP(p->val, st);
    p->next = (pa_cell*)NULL;

    return p;
}



/*
 * Destroy cell
 */
static
void
destroy_cell(c)
pa_cell *c;
{
    if(c==NULL)
        return;

    if(c->next!=NULL) {
        destroy_cell(c->next);
        c->next = NULL;
    }
    if(c->val!=NULL) {
        FREE(c->val);
        c->val = NULL;
    }

    FREE(c);    /* of course, 'c' is not NULL */
}

static
void
destroy_cell_withoutvalue(c)
pa_cell *c;
{
    if(c==NULL)
        return;

    if(c->next!=NULL) {
        destroy_cell_withoutvalue(c->next);
        c->next = NULL;
    }

    FREE(c);    /* of course, 'c' is not NULL */
}



/*
 * New tags
 */
tags_t
*HTML_NewTags()
{
    int i;
    tags_t *ret;

    ret=(tags_t*)MALLOC(sizeof(tags_t));
    if(ret) {
        for(i=0;i<NUM_SLOT;i++)
            ret->slot[i] = NULL;
        ret->count = 0;
    }

    return ret;
}

/*
 * Destroy tags
 */
int
HTML_DestroyTags(tags)
tags_t *tags;
{
    register int j;

    for(j=0;j<NUM_SLOT;j++) {
        destroy_cell(tags->slot[j]);
        tags->slot[j] = NULL;
    }
    tags->count = 0;
    FREE(tags);

    return 0;
}

int
HTML_DestroyTags_withoutvalue(tags)
tags_t *tags;
{
    register int j;

    for(j=0;j<NUM_SLOT;j++) {
        destroy_cell_withoutvalue(tags->slot[j]);
        tags->slot[j] = NULL;
    }
    tags->count = 0;
    FREE(tags);

    return 0;
}




/*
 *  Is it stored already ?
 *
 *  return:
 *              0   - not stored
 *              1   - stored
 */
int
HTML_IsInTags(tags, st)
tags_t *tags;
char *st;
{
    int i = 0;
    register pa_cell *p;

    if(tags==NULL)
        return 0;

    i = hash(st);
    p = tags->slot[i];
    if(p==NULL) {
        return 0;
    }
    else {
        do {
            /*
             * check same things, so do nothing
             */
            if(*(p->val)==*st) {
                if(strcmp(p->val, st)==0) {
                    return 1;
                }
            }
            /*
             * check empty, so append new item
             */
            if(p->next==NULL) {
                return 0;
            }
            p = p->next;
        } while (p!=NULL);
    }

    return 1;
}



/*
 *  Append string to slot
 *
 *  return:
 *          0   - append
 *          1   - not append
 */
static
int
append_slot(tags, n, st)
tags_t  *tags;
pa_cell **n;
char    *st;
{
    int i = 0;
    register pa_cell *p;

    *n = NULL;
    i = hash(st);

#ifdef DEBUG_PARSE
    Trace("append_slot: tag %p, '%s'\n", tags, st);
#endif
    /*
    Trace("append_slot: aslot=%#x, n=%#x, st=%#x '%s' -> i=%d\n",
        aslot, n, st, st, i);
    */

    p = tags->slot[i];
    if(p==NULL) {
        *n = tags->slot[i] = new_str_cell(st);
        (*n)->num = tags->count++;
        return 0;
    }
    else {
        do {
            /*
             * check same things, so do nothing
             */
            if(*(p->val)==*st) {
                if(strcmp(p->val, st)==0) {
                    return 1;
                }
            }
            /*
             * check empty, so append new item
             */
            if(p->next==NULL) {
                *n = p->next = new_str_cell(st);
                (*n)->num = tags->count++;
                return 0;
            }
            p = p->next;
        } while (p!=NULL);
    }

    Trace("append_slot: cannot append\n");

    return 1;
}




int
HTML_ShowTags(prefix, tags)
char   *prefix;
tags_t *tags;
{
    register int i,c,s;
    pa_cell *p;

    if(prefix!=(char*)NULL)
        Trace("------------------- slot of references '%s' (%d).\n",
            prefix, tags->count);
    else
        Trace("------------------- slot of references (%d).\n",
            tags->count);
    s = c = 0;
    for(i=0;i<NUM_SLOT;i++) {
        c = 0;
        if(tags->slot[i]!=NULL) {
            Trace("%4d: ",i);
            p = tags->slot[i];
            while(p!=NULL) {
                Trace("%s (%d), ", p->val, p->num);

                p = p->next;
                c++;
            }
            Trace("<%d>\n",c);
        }
        s += c;
    }
    Trace("------------------- total %d references\n", s);
    fflush(stdout);

    return 0;
}




/*
 * HTML_ParseTag - Parse Tags in HTML
 *
 *  arguments:
 *      tags    parsed results
 *      cfn     path for HTML
 *      cmd     parsed tag type (e.g., Anchors, in-line images)
 *
 *  return:
 *      -1      error
 *      >=0     num of tags
 */

int
HTML_ParseTags_FromMemory(tags, cmd, area, area_len)
tags_t  *tags;
int     cmd;        /* command flag, anchor or inline image */
char    *area;
int     area_len;
{
    register int ch;
    pa_cell *new_one;
    char name[TAG_NAME_SIZE+1];
    char tag[TAG_SIZE+1];
    int image, back, anchor, frame, embed, applet;
    int read_flag=0;
    char *pos, *tail;

#ifdef DEBUG_PARSE
#define BLK 1024
    char hidden_buffer[BLK];

    Trace("# area %p (length %d), mode=", area, area_len);
    if(cmd & PARSE_REFERED)     Trace("anchors\n");
    if(cmd & PARSE_INCLUDED)    Trace("inline-images\n");
    fflush(stdout);
    fflush(stderr);

    if(area_len>BLK) {
        strncpy(hidden_buffer, area, BLK);
        hidden_buffer[BLK-1] = '\0';
        area = hidden_buffer;
    }
    dump_stringchunk("body", area);


#endif

    if(tags==NULL) {
        Error("HTML_ParseTags_FromMemory: no space to save");
        return -1;
    }



    pos = area;
    tail = area + area_len;

    while(ch = *pos++, pos<=tail)
    {
        register char *p, *q;
        register int plen, qlen;

        image=back=anchor=frame=embed=applet=0;

        /* check, Is it tag ? */
        if(ch=='<') {
            /*
             * Split Tag
             */
            p = tag;
            plen = 0;

            while(ch = *pos++,  (pos<=tail && ch!='>' && plen<TAG_SIZE))
            {
                *p++ = ch;
                plen++;
            }
            *p = '\0';
#if 0
            Trace("# tag='%s' plen=%d\n", tag, plen);
#endif

            if(tag[0]=='\0' || plen>=TAG_SIZE) {

#if 0
                Trace(" # *give up\n");
#endif

                /* skip */
                while(ch = *pos++, (pos<=tail && ch!='>'))
                {
                    ;
                }

                continue;
            }

            /*
             * Split Name - if name is too long, we give up to use
             */
            p = tag;
            q = name;
            qlen = 0;
            while(*p && *p!=' ' && *p!='\t' && *p!='\n' && qlen<TAG_NAME_SIZE) {
                *q++ = *p++;
                qlen++;
            }
            *q = '\0';
#if 0
            /*
            Trace("    # tag name='%s' qlen=%d\n", name, qlen);
            */
            Trace("    # tag name='%c[7m%s%c[m' qlen=%d\n",
                    0x1b, name, 0x1b, qlen);
#endif
            if(name[0]=='\0' || qlen>=TAG_NAME_SIZE) {
#if 0
                Trace(" # *give up\n");
#endif
                continue;
            }

#ifdef TRACE_PARSE
            /*
            Trace("\t# tag='%s' %c(J(plen=%d) name='%s' %c(J(qlen=%d)\n",
                tag, 0x1b, plen, name, 0x1b, qlen);
            */
            Trace("\t# tag %2d '%s' name %2d '%s'\n",
                plen, tag, qlen, name);
#endif

            /*
             * only support Anchors and Images. if you want to other tags,
             * please remove this limiter.
             */
            if((cmd & PARSE_REFERED)    && strcasecmp(name, "A")==0)
                anchor=1;
            if((cmd & PARSE_REFERED)    && strcasecmp(name, "AREA")==0)
                anchor=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "IMG")==0)
                image=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "BODY")==0)
                back=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "FRAME")==0)
                frame=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "EMBED")==0)
                embed=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "APPLET")==0)
                applet=1;
            if(back+image+anchor+frame+embed+applet==0)
                continue;



            /*
            putchar('@');
            */
            q = NULL;
            while(*p) {
                /*
                fprintf(stderr,"\trest='%s'\n", p);
                fflush(stderr);
                */
                if(anchor   && strncasecmp(p, "HREF", 4)==0) {
                    q = p + 4;
                    break;
                }
                if(image    && strncasecmp(p, "SRC", 3)==0) {
                    q = p + 3;
                    break;
                }
                if(back     && strncasecmp(p, "BACKGROUND", 10)==0) {
                    q = p + 10;
                    break;
                }
                if(frame    && strncasecmp(p, "SRC", 3)==0) {
                    q = p + 3;
                    break;
                }
                if(embed    && strncasecmp(p, "SRC", 3)==0) {
                    q = p + 3;
                    break;
                }
                if(applet   && strncasecmp(p, "CODE", 4)==0) {
                    q = p + 4;
                    break;
                }
                while(*p&&*p!=' '&&*p!='\t'&&*p!='\n'&&*p!='\r')
                while(*p&&*p!=' '&&*p!='\t'&&*p!='\n'&&*p!='\r')
                    p++;
                while(*p&&(*p==' '||*p=='\t'||*p=='\n'||*p=='\r'))
                    p++;
            }
            /*
             * 'SRC' or 'HREF' exists in tag
             */
            if(q) {
#ifdef DEBUG_PARSE
                Trace("    # *found*\n");
                Trace("      '%s'\n", q);
#endif

                /*
                 * search '='
                 */
                while(*q&&(*q==' '||*q=='\t'))
                    q++;
                if(*q!='=')
                    continue;
                else
                    q++;

                /*
                 * search value (URL or PATH)
                 */
                while(*q&&(*q==' '||*q=='\t'))
                    q++;
                p = tag;
                if(*q=='"') {
                    /*
                     * Read quoted string with double-quote.
                     */
                    q++;
                    while(*q&&*q!='"'&&*q!=' ')
                        *p++ = *q++;
                    *p='\0';
                }
                else {
                    /*
                     * Read not quoted string.
                     */
                    while(*q&&*q!='>'&&*q!=' '&&*q!='\t'&&*q!='\r'&&*q!='\n')
                        *p++ = *q++;
                    *p='\0';
                }

                /*
                 * If '?' is included in URL, skip
                 */
                p = tag;
                while(*p&&*p!='?')
                    p++;
                if(*p=='?') {
                    goto next;
                }

                /*
                 * If '#' is included in URL, cut rest of them
                 */
                p = tag;
                while(*p&&*p!='#')
                    p++;
                if(*p=='#') {
                    /*
                    goto next;
                    */
                    *p='\0';
                }

                /*
                Trace("HTML_ParseTags_FromMemory: tslot=%#x, idx=%#x\n",
                    tslot, idx);
                */
                if(!append_slot(tags, &new_one, tag)) {
                }
next:
                ;
            }
        }
    }


#ifdef DEBUG_PARSE
    Trace("HTML_ParseTags_FromMemory: END %p (len %d) // count %d\n",
        area, area_len, tags->count);
#endif

    return tags->count;
}


int
HTML_ParseTags_FromFile(tags, cfn, cmd)
tags_t  *tags;
char    *cfn;
int     cmd;        /* command flag, anchor or inline image */
{
    register int ch;
    pa_cell *new_one;
    char name[TAG_NAME_SIZE+1];
    char tag[TAG_SIZE+1];
    int image, back, anchor, frame, embed, applet;
    int read_flag=0;
    FILE *cf;

#ifdef DEBUG_PARSE
    Trace("# filename='%s' mode=", cfn);
    if(cmd & PARSE_REFERED)     Trace("anchors\n");
    if(cmd & PARSE_INCLUDED)     Trace("inline-images\n");
    fflush(stdout);
    fflush(stderr);
#endif

    if(tags==NULL) {
        Error("HTML_ParseTags_FromFile: no space to save");
        return -1;
    }

#if 0
    if(!IsHTML(cfn)) {
        /*
        Trace("HTML_ParseTags_FromFile: name='%s' ... No HTML, SKIP\n", cfn);
        */
        return -1;
    }
#endif

    if((cf=fopen(cfn, "r"))==NULL) {
        Error("HTML_ParseTags_FromFile: cannot open '%s'.", cfn);
        return -1;
    }

    while((ch=fgetc(cf))!=EOF) {
        register char *p, *q;
        register int plen, qlen;

        image=back=anchor=frame=embed=applet=0;

        /* check, Is it tag ? */
        if(ch=='<') {
            /*
             * Split Tag
             */
            p = tag;
            plen = 0;
            while(ch=fgetc(cf), (ch!=EOF && ch!='>' && plen<TAG_SIZE)) {
                *p++ = ch;
                plen++;
            }
            *p = '\0';
#if 0
            Trace("# tag='%s' plen=%d\n", tag, plen);
#endif

            if(tag[0]=='\0' || plen>=TAG_SIZE) {

#if 0
                Trace(" # *give up\n");
#endif

                /* skip */
                while(ch=fgetc(cf), (ch!=EOF && ch!='>'))
                    ;

                continue;
            }

            /*
             * Split Name - if name is too long, we give up to use
             */
            p = tag;
            q = name;
            qlen = 0;
            while(*p && *p!=' ' && *p!='\t' && *p!='\n' && qlen<TAG_NAME_SIZE) {
                *q++ = *p++;
                qlen++;
            }
            *q = '\0';
#if 0
            /*
            Trace("    # tag name='%s' qlen=%d\n", name, qlen);
            */
            Trace("    # tag name='%c[7m%s%c[m' qlen=%d\n",
                    0x1b, name, 0x1b, qlen);
#endif
            if(name[0]=='\0' || qlen>=TAG_NAME_SIZE) {
#if 0
                Trace(" # *give up\n");
#endif
                continue;
            }

#ifdef TRACE_PARSE
            /*
            Trace("\t# tag='%s' %c(J(plen=%d) name='%s' %c(J(qlen=%d)\n",
                tag, 0x1b, plen, name, 0x1b, qlen);
            */
            Trace("\t# tag(%d)='%s' name(%d)='%s'\n",
                plen, tag, qlen, name);
#endif

            /*
             * only support Anchors and Images. if you want to other tags,
             * please remove this limiter.
             */
            if((cmd & PARSE_REFERED)    && strcasecmp(name, "A")==0)
                anchor=1;
            if((cmd & PARSE_REFERED)    && strcasecmp(name, "AREA")==0)
                anchor=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "IMG")==0)
                image=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "BODY")==0)
                back=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "FRAME")==0)
                frame=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "EMBED")==0)
                embed=1;
            if((cmd & PARSE_INCLUDED)   && strcasecmp(name, "APPLET")==0)
                applet=1;
            if(back+image+anchor+frame+embed+applet==0)
                continue;



            /*
            putchar('@');
            */
            q = NULL;
            while(*p) {
                /*
                fprintf(stderr,"\trest='%s'\n", p);
                fflush(stderr);
                */
                if(anchor   && strncasecmp(p, "HREF", 4)==0) {
                    q = p + 4;
                    break;
                }
                if(image    && strncasecmp(p, "SRC", 3)==0) {
                    q = p + 3;
                    break;
                }
                if(back     && strncasecmp(p, "BACKGROUND", 10)==0) {
                    q = p + 10;
                    break;
                }
                if(frame    && strncasecmp(p, "SRC", 3)==0) {
                    q = p + 3;
                    break;
                }
                if(embed    && strncasecmp(p, "SRC", 3)==0) {
                    q = p + 3;
                    break;
                }
                if(applet   && strncasecmp(p, "CODE", 4)==0) {
                    q = p + 4;
                    break;
                }
                while(*p&&*p!=' '&&*p!='\t'&&*p!='\n'&&*p!='\r')
                while(*p&&*p!=' '&&*p!='\t'&&*p!='\n'&&*p!='\r')
                    p++;
                while(*p&&(*p==' '||*p=='\t'||*p=='\n'||*p=='\r'))
                    p++;
            }
            /*
             * 'SRC' or 'HREF' exists in tag
             */
            if(q) {
#ifdef DEBUG_PARSE
                Trace("    # *found*\n");
#endif

                /*
                 * search '='
                 */
                while(*q&&(*q==' '||*q=='\t'))
                    q++;
                if(*q!='=')
                    continue;
                else
                    q++;

                /*
                 * search value (URL or PATH)
                 */
                while(*q&&(*q==' '||*q=='\t'))
                    q++;
                p = tag;
                if(*q=='"') {
                    /*
                     * Read quoted string with double-quote.
                     */
                    q++;
                    while(*q&&*q!='"'&&*q!=' ')
                        *p++ = *q++;
                    *p='\0';
                }
                else {
                    /*
                     * Read not quoted string.
                     */
                    while(*q&&*q!='>'&&*q!=' '&&*q!='\t'&&*q!='\r'&&*q!='\n')
                        *p++ = *q++;
                    *p='\0';
                }

                /*
                 * If '?' is included in URL, skip
                 */
                p = tag;
                while(*p&&*p!='?')
                    p++;
                if(*p=='?') {
                    goto next;
                }

                /*
                 * If '#' is included in URL, cut rest of them
                 */
                p = tag;
                while(*p&&*p!='#')
                    p++;
                if(*p=='#') {
                    /*
                    goto next;
                    */
                    *p='\0';
                }

                /*
                Trace("HTML_ParseTags_FromFile: tslot=%#x, idx=%#x\n", tslot, idx);
                */
                if(!append_slot(tags, &new_one, tag)) {
                }
next:
                ;
            }
        }
    }

    fclose(cf);

#ifdef DEBUG_PARSE
    Trace("HTML_ParseTags_FromFile: END '%s' %d\n", cfn, tags->count);
#endif

    return tags->count;
}


int
HTML_ParseTags(tags, cmd, cfn, alen)
tags_t  *tags;
int     cmd;        /* command flag, anchor or inline image */
char    *cfn;
int     alen;
{
    int f;
    int len;
    int nr;
    char *buf;
    int chk;

#ifdef DEBUG_PARSE
    Trace("HTML_ParseTags: START cmd %d, file '%s', alen %d\n",
        cmd, cfn, alen);
#endif

    if((buf=(char*)malloc(sizeof(char)*alen))==NULL) {
        return -1;
    }

    if((f=open(cfn, O_RDONLY))<0) {
        free(buf);
        return -1;
    }

    len = 0;
    while((nr = read(f, &buf[len], alen-len))) {
        if(nr<0) {
            if(errno==EINTR) {
                continue;
            }
            break;
        }
        len += nr;
    }

    close(f);

    chk = HTML_ParseTags_FromMemory(tags, cmd, buf, len);
#ifdef DEBUG_PARSE
    Trace("HTML_ParseTags: END chk %d, '%s' %d\n",
        chk, cfn, tags->count);
#endif
    free(buf);

    return chk;
}



/*
 * HTML_ConvURLList - convert URL list (which is char**) from 'tags_t'
 *
 * Note:
 *      This routine generate array of char* on heap, don't forget FREE it.
 *      A element in array point tags_t's value. If you want to FREE tags,
 *      don't FREE these values before use.
 *
 */
char**
HTML_ConvURLList(tags, len)
tags_t *tags;
int *len;
{
    char **ret;
    pa_cell *p;
    int i, c;

    c = 0;
    if((ret = (char**)MALLOC(sizeof(char*)*(tags->count+1)))==NULL)
        return NULL;

    for(i=0;i<NUM_SLOT;i++) {
        p = tags->slot[i];
        while(p!=NULL) {
            ret[p->num] = p->val;
            c++;
            p = p->next;
        }
    }
    ret[tags->count] = NULL;        /* sentinel */
    *len = tags->count;

#if 0
    Trace("conv %d URL\n", c);
    for(i=0;i<c;i++) {
        Trace("%4d: %s\n", i, ret[i]);
    }
#endif

    return ret;
}

/*
 *
 */
void
HTML_ShowURLList(char **list)
{
    int c;

    if(list==NULL) {
        Trace("; no tags\n");
        return;
    }

    c = 0;
    Trace("; %p %p\n", list, *list);
    while(*list!=NULL) {
        Trace("  %3d: %s\n", c, *list);
        /*
        Trace("%s\n", *list);
        */
        /*
        fprintf(stdout, "  %p %s\n", *list, *list);
        */
        c++;
        list++;
    }
    Trace("; %d URL\n", c);
}

void
HTML_ShowIndependURLList(char *base, char **list)
{
    int c;
    char *nurl;

    if(list==NULL) {
        Trace("; no tags\n");
        return;
    }

    c = 0;
    while(*list!=NULL) {
        if((nurl = URL_JoinStrOnHeap(base, *list))!=NULL) {
            fprintf(stdout, "%s\n", nurl);
            FREE(nurl);
            c++;
        }
        list++;
    }
    Trace("; %d URL\n", c);
}

/*
 *
 */
void
HTML_FreeURLList(char **list)
{
    int c;

    if(list==NULL)
        return;

    c = 0;
    while(*list!=NULL) {
        FREE(*list);
        *list = NULL;
        c++;
        list++;
    }
}





#ifdef PARSE_TEST

#if 0
extern int _trace_flag_;
#endif


/*
 * Main routine for test.
 */
int
main(argc, argv)
int argc;
char **argv;
{
    int     opt, i;
    char    check_url[STRING_SIZE];
    char    base_url[STRING_SIZE];
    int     count,mode=PARSE_REFERED|PARSE_INCLUDED;
    tags_t  *tags;
    char    **urllist;
    int     len;
    int     normalize=0;

    extern char *optarg;
    extern int  optind;


    /*
     * Init
     */
    _trace_flag_ = 0;
    check_url[0] = '\0';

    /*
     * Parse options and argments
     */
    while((opt = getopt(argc, argv, "hvgrinc:b:")) != -1) {
        switch(opt) {
        case 'h':
            printf("%s - parse prefetch targets from HTML file.\n", argv[0]);
            printf("          by k-chinen@is.aist-nara.ac.jp, 1994-1997.\n\n");
            printf("usage: %s [options] files\n\n", argv[0]);
            printf("option: -h          help\n");
            printf("        -v          verbose (toggle, OFF in default)\n");
            printf("        -g          refered & included resource (default)\n");
            printf("        -r          refered only\n");
            printf("        -i          included only\n");
            printf("        -n          normalize (toggle, OFF in default)\n");
            printf("        -c url      check url\n");
            printf("        -b url      base url\n");
            printf("\n");
            printf("NOTE:\n");
            printf("    'refered' means A and FRAME-tag.\n");
            printf("        <A HREF=\"url\">\n");
            printf("\n");
            printf("    'included' means IMG,BODY,FRAME,EMBED and APPLET-tag.\n");
            printf("        <IMG SRC=\"url\">\n");
            printf("        <BODY BACKGROUND=\"url\">\n");
            printf("        <FRAME SRC=\"url\">\n");
            printf("        <EMBED SRC=\"url\">\n");
            printf("        <APPLET CODE=\"url\">\n");
            printf("\n");
            exit(KOTETU_EXIT_NORMAL);
            break;
        case 'v':
            _trace_flag_ = 1 - _trace_flag_;
            break;
        case 'c':
            strcpy(check_url, optarg);
            break;
        case 'b':
            strcpy(base_url, optarg);
            break;
        case 'g':
            mode = PARSE_REFERED | PARSE_INCLUDED;
            break;
        case 'r':
            mode = PARSE_REFERED;
            break;
        case 'n':
            normalize = 1 - normalize;
            break;
        case 'i':
            mode = PARSE_INCLUDED;
            break;
        }
    }

    for(i=optind; i<argc; i++) {
        if((tags = HTML_NewTags())==NULL) {
            Trace("not enough memory\n");
            break;
        }

        Trace("; File %s\n", argv[i]);

#if 0
        if((count=HTML_ParseTags_FromFile(tags, mode, argv[i])))<=0)
#endif
        if((count=HTML_ParseTags(tags, mode, argv[i], KB(64)))<=0)
        {
            Trace("no tags\n");
        }
        else {
            /*
             * check specified url existstance
             */
            if(check_url[0]) {
                if(HTML_IsInTags(tags, check_url)) {
                    Trace("Found '%s'\n", check_url);
                }
                else {
                    Trace("Not found '%s'\n", check_url);
                }
            }
            /*
             * show referenced URL (anchors or in-line images)
             */
            else {
                urllist = HTML_ConvURLList(tags, &len);
                HTML_DestroyTags_withoutvalue(tags);

                Trace("; %d/%d URL(s)\n", len, count);
                if(normalize) {
/*
                    HTML_ShowIndependURLList(argv[i], urllist);
*/
                    HTML_ShowIndependURLList(base_url, urllist);
                }
                else {
                    HTML_ShowURLList(urllist);
                }

                HTML_FreeURLList(urllist);
                FREE(urllist);
            }
        }
        FREE(tags);

#ifdef MEMORY_CHECK
        show_memoryusage();
#endif
    }

    exit(KOTETU_EXIT_NORMAL);
}
#endif /* PARSE_TEST */




