/*
 * KOTETU:
 *      by k-chinen@is.aist-nara.ac.jp, 1996, 1997
 *
 * $Id: url.c,v 1.2 1996/11/24 14:49:03 k-chinen Exp k-chinen $
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <sys/types.h>

#include "wcol.h"
#include "url.h"


#define _DEBUG_URL

/*
#define w_alloc_str(p,len) (p==NULL ? p = (char*) MALLOC(len+1) : p)
*/

#define w_alloc_str(p,len) (p==NULL ? p = alloc_str(len) : p)



char local_domain[STRING_SIZE];


/*
 * URL_Show - Show URL
 */
void URL_Show(url_t *this)
{
    Trace("URL_Show: ");
    if(this == NULL) {
        Trace("*EMPTY* URL\n");
    }
    else {
        switch(this->loc) {
        case STATIC_URL:    Trace("*static*      ");    break;
        case CLEAR_URL:     Trace("*heap*        ");    break;
        default:            Trace("*unknown-loc* ");    break;
        }
        switch(this->state) {
        case DIRTY_URL:     Trace("*dirty*        ");   break;
        case CLEAR_URL:     Trace("*clear*        ");   break;
        case NORMAL_URL:    Trace("*normalized*   ");   break;
        default:            Trace("*unknown-type* ");   break;
        }
        if(this->full_loc == NULL) {
            Trace("fullname locator is *EMPTY*\n");
        }
        else {
            Trace("'%s'\n", this->full_loc);
        }

        if(this->proto == NULL) {
            Trace("\tprotocol *EMPTY*\n");
        }
        else {
            Trace("\tprotocol '%s'\n", this->proto);
        }
        if(this->host == NULL) {
            Trace("\thost     *EMPTY*\n");
        }
        else {
            Trace("\thost     '%s'\n", this->host);
        }
        if(this->port == NULL) {
            Trace("\tport     *EMPTY*\n");
        }
        else {
            Trace("\tport     '%s'\n", this->port);
        }
        if(this->path == NULL) {
            Trace("\tpath     *EMPTY*\n");
        }
        else {
            Trace("\tpath     '%s'\n", this->path);
        }
    }
}

/*
 * URL_New - Generate New URL
 */
url_t *URL_New()
{
    url_t*  this;

    if((this = (url_t*) MALLOC(sizeof(url_t))) == NULL) {
        return NULL;
    }
    this->loc   = HEAP_URL;
    this->state = DIRTY_URL;
    this->full_loc[0] = '\0';
    this->proto = NULL;
    this->host  = NULL;
    this->port  = NULL;
    this->path  = NULL;

    return this;
}

/*
 * URL_NewVal - Generate new URL with value
 */
url_t *URL_NewVal(char *val)
{
    url_t*  this;

    if((this = (url_t*) MALLOC(sizeof(url_t))) == NULL) {
        return NULL;
    }
    this->state = DIRTY_URL;
    if(val == NULL) {
        this->full_loc[0] = '\0';
    }
    else {
        strcpy(this->full_loc, val);
    }
    this->loc   = HEAP_URL;
    this->proto = NULL;
    this->host  = NULL;
    this->port  = NULL;
    this->path  = NULL;

    return this;
}





int URL_Clear(url_t *this)
{
    /*
    this->loc   = HEAP_URL;
    */
    this->state = DIRTY_URL;
    this->full_loc[0] = '\0';
    this->proto = NULL;
    this->host  = NULL;
    this->port  = NULL;
    this->path  = NULL;

    return 0;
}

int URL_Reset(url_t *this)
{
    /*
    this->loc   = HEAP_URL;
    */
    this->state = DIRTY_URL;
    this->full_loc[0] = '\0';
    if(this->proto) FREE(this->proto);  this->proto = NULL;
    if(this->host)  FREE(this->host);   this->host  = NULL;
    if(this->port)  FREE(this->port);   this->port  = NULL;
    if(this->path)  FREE(this->path);   this->path  = NULL;

    return 0;
}

int URL_Set(url_t *this, char *val)
{
    strcpy(this->full_loc, val);

    return 0;
}



int URL_Delete(url_t *this)
{
    if(this==NULL) {
        return -1;
    }
    if(this->proto) FREE(this->proto);
    if(this->host)  FREE(this->host);
    if(this->port)  FREE(this->port);
    if(this->path)  FREE(this->path);
    FREE(this);

    return 0;
}


/*
 * ParseURL - Parse URL and split to protocol, host, port and path.
 */
int ParseURL(char *url,
    char *protocol, char *host, char *port, char *path)
{
    register char *p,*q;

    if(url==NULL)
        return 1;

    /* init */
    protocol[0] = '\0';
    host[0] = '\0';
    port[0] = '\0';
    path[0] = '\0';

    /* Skip whites */
    p = url;
    while(*p&&(*p==' '||*p=='\t'||*p=='\r'||*p=='\n'))
        p++;
    url = p;


    /*
     * PROTOCOL
     */
    q = protocol;
    while(*p&&*p!=':')
        *q++ = *p++;
    *q = '\0';

    /* No protocol */
    if(*p!=':') {
        protocol[0] = '\0';
        p = url;
    }
    else {
        p++;
    }

    /*
     * HOST and PORT
     */
    if(*p=='/' && *(p+1)=='/') {
        p++;
        p++;

        q = host;
        while(*p&&*p!='/')
            *q++ = *p++;
        *q = '\0';

        /* check Port */
        q = host;
        while(*q&&*q!=':')
            q++;
        if(*q==':') {
            *q = '\0';
            q++;

            strcpy(port, q);
        }
        /* Not port */
        else {
            port[0] = '\0';
        }

    }
    /* No host */
    else {
        host[0] = '\0';
        port[0] = '\0';
    }

    /*
     * PATH
     */
    q = path;
    while(*p)
        *q++ = *p++;
    *q = '\0';

    return 0;
}



/*
 *
 *  return
 *      0   success
 *      !0  error
 */
int URL_RawParse(url_t *url)
{
    if(w_alloc_str(url->proto, STRING_SIZE) == NULL) {
        return -1;
    }
    if(w_alloc_str(url->host,  STRING_SIZE) == NULL) {
        return -1;
    }
    if(w_alloc_str(url->port,  STRING_SIZE) == NULL) {
        return -1;
    }
    if(w_alloc_str(url->path,  STRING_SIZE) == NULL) {
        return -1;
    }

    ParseURL(url->full_loc, url->proto, url->host, url->port, url->path);
    url->state = CLEAR_URL;

    return 0;
}

#if 0

/*
 *  NormalizeURL - URL Normarization
 *  return
 *          NULL    Error
 *          !NULL   Normalized URL (it is exist in heap, please free it)
 */
char *NormalizeURL(char *target)
{
    char protocol[STRING_SIZE];
    char host[STRING_SIZE], port[STRING_SIZE];
    char path[STRING_SIZE];
    int  chk;
    char tmp[STRING_SIZE];
    char *ret;

/*
    Trace("NormalizeURL: target='%s'\n", target);
*/

    if(chk = ParseURL(target, protocol, host, port, path)) {
        return NULL;
    }
#if 1
   Trace("\tprotocol '%s'\n\thost     '%s'\n\tport     '%s'\n\tpath     '%s'\n",
        protocol, host, port, path);
#endif

    if(protocol[0] == '\0') {
        strcpy(protocol, "no-protocol");
    }
    else {
        /* 'HTTP' -> 'http' */
        if(strcasecmp_first_all(protocol, 'H', "HTTP")) {
            strcpy(protocol, "http");
        }
        if(strcasecmp_first_all(protocol, 'F', "FTP")) {
            strcpy(protocol, "ftp");
        }
        if(strcasecmp_first_all(protocol, 'Z', "Z39.50")) {
            strcpy(protocol, "wais");
        }
        if(strcasecmp_first_all(protocol, 'N', "NNTP")) {
            strcpy(protocol, "news");
        }
    }

    if(host[0] == '\0') {
        strcpy(host, "localhost");
    }
    else {
        char *p;

        p = host;
        while(*p && *p!='.') {
            p++;
        }
        if(*p=='\0') {
            strcat(host, local_domain);
        }
    }

    if(port[0] == '\0') {
        if(strcasecmp_first_all(protocol, 'h', "http")) {
            strcpy(port, "80");
        }
        else
        if(strcasecmp_first_all(protocol, 'f', "ftp")) {
            strcpy(port, "21");
        }
        else {
            strcpy(port, "80");
        }
    }

    if(path[0] == '\0') {
        strcpy(path, "/");
    }
    else {
        char *p;

        p = path;
        while(*p && *p!='#') {
            p++;
        }
        if(*p=='#') {
            *p='\0';
        }
    }

#if 0
   Trace("\tprotocol '%s'\n\thost     '%s'\n\tport     '%s'\n\tpath     '%s'\n",
        protocol, host, port, path);
#endif

    sprintf(tmp, "%s://%s:%s%s", protocol, host, port, path);
    if(STRDUP(ret, tmp)==NULL) {
        Fatal("NormalizeURL: no memory");
        return NULL;
    }

/*
    Trace("NormalizeURL: Result(%#x) '%s'\n", ret, ret);
*/

    return ret;
}

#endif



/*
 * replace_str
 *
 * arguments:
 *      x:  target      heap char array.
 *      v:  value       value for replace
 */
static char *replace_str(char **x, char *v)
{
    char *t;

#if 0
    Trace("replace_str: address (x,*x)=(%p, %p)\n\tcurrent '%s'\n\tnew '%s'\n",
        x, *x, *x, v);
#endif

    /* if target is NULL or short, setup string from heap */
    if(x==NULL || (*x != NULL && strlen(*x)<strlen(v))) {
        if(STRDUP(t, v)==NULL) {
            return NULL;
        }


        /* replace address */
        if(*x!=NULL)
            FREE(*x);
        *x = t;
    }

    /* set value */
    strcpy(*x, v);

    return t;
}


/*
 *  URL_Normalize - URL Normarization
 *
 *  return
 *      0   success
 *      !0  error
 */
int URL_Normalize(url_t *this)
{
    char tmp[URL_LENGTH];
    char register *p, *q;
    int  chk;


/*
    Trace("URL_Normalize: target='%s'\n", target);
*/
    if(this == NULL) {
        return -1;
    }

    if(this->state == NORMAL_URL) {
        return 0;
    }

    if(chk = URL_Parse(this)) {
        return -1;
    }

#if 0
    URL_Show(this);
#endif

    if(this->proto == NULL || this->proto[0] == '\0') {
        replace_str(&this->proto, "file");
    }
    else {
        /* 'HTTP' -> 'http' */
        if(strcmp_first_all(this->proto, 'h', "http")) {
            /* nothing */
        }
        if(strcmp_first_all(this->proto, 'H', "HTTP")) {
            replace_str(&this->proto, "http");
        }
        if(strcasecmp_first_all(this->proto, 'Z', "Z39.50")) {
            replace_str(&this->proto, "wais");
        }
        if(strcasecmp_first_all(this->proto, 'N', "NNTP")) {
            replace_str(&this->proto, "news");
        }
    }

    if(this->host == NULL || this->host[0] == '\0') {
        replace_str(&this->host, "localhost");
    }
    else {
        p = this->host;
        while(*p && *p!='.') {
            p++;
        }
        /* hostname is not include domain */
        if(*p=='\0') {
            strcpy(tmp, this->host);
            strcat(tmp, local_domain);
            replace_str(&this->host, tmp);
        }
    }


    if(this->port == NULL || this->port[0] == '\0') {
        if(strcasecmp_first_all(this->proto, 'f', "http")) {
            replace_str(&this->port, "80");
        }
        else
        if(strcasecmp_first_all(this->proto, 'f', "ftp")) {
            replace_str(&this->port, "21");
        }
        else {
            replace_str(&this->port, "80");
        }
    }

    if(this->path == NULL || this->path[0] == '\0') {
        replace_str(&this->path, "/");
    }
    else {
        int c;
#if 0
        p = this->path;
        while(*p && *p!='#') {
            p++;
        }
        if(*p=='#') {
            *p = '\0';
        }
#else
        p = this->path;
        q = tmp;
        c = 0;
        while(*p) {
            if(*p=='#') {
                c = 1;
                break;
            }
            else
            if(*p>='\x7f') {
                c = 1;
                *q++ = '%';
                *q++ = "0123456789ABCDEF"[*p/16];
                *q++ = "0123456789ABCDEF"[*p%16];
                p++;
            }
            else {
                *q++ = *p++;
            }
        }
        *q++ = '\0';

        if(c) {
            Trace("\tbefore: '%s'\n", this->path);
            Trace("\tafter:  '%s'\n", tmp);

            replace_str(&this->path, tmp);
        }
#endif


    }

#if 0
    URL_Show(this);
#endif

    if(strlen(this->proto) + strlen(this->host)
        + strlen(this->port) + strlen(this->path) + 5 < URL_LENGTH ) {

        sprintf(this->full_loc,
            "%s://%s:%s%s", this->proto, this->host, this->port, this->path);


        this->state = NORMAL_URL;

        return 0;
    }
    else {
        return -1;
    }

#if 0
    URL_Show(this);
#endif

    /*NOTREACHED*/
    return 0;
}



/*
 * uniq_path -
 *
 *  return
 *      safe    0
 *      error   1
 */
int uniq_path(char *path)
{
    int  count;
    char *pos[100];
    register char *p,*q,*r;
    /*
    int i;
    */

    /*
    Trace("uniq_path: %s\n",path);
    */
    p = path;
    count = 0;
    pos[count] = p;
    while(*p) {
        if(*p=='/') {
            /*
            Trace("\t%s",p);
            */
            if(*(p+1)=='.'&&*(p+2)=='/') {
                /*
                Trace(" SKIP");
                */
                /*
                 * copy from r to q (shift 2 chars)
                 */
                r = p+3;
                q = p+1;
                while(*q++ = *r++)
                    ;
            }
            else
            if( (*(p+1)=='.'&&*(p+2)=='.'&&*(p+3)=='/')
                || (*(p+1)=='.'&&*(p+2)=='.'&&*(p+3)=='\0') ) {
                /*
                Trace(" BACK");
                */
                if(count) {
                    /*
                     * copy from r to q (shift 3 + more chars)
                     */
                    r = p+3;
                    p = q = pos[count];
                    while(*q++ = *r++)
                        ;
                    count--;
                }
                else {
                    Error("uniq_path: Cannot back, thru. path='%s'", path);
                    p++;

                    return 1;
                }
            }
            else
            {
                count++;
                pos[count] = p;
                p++;
            }
            /*
            putchar('\n');
            */
        }
        else {
            p++;
        }
    }
    /*
    for(i=0;i<=count;i++) {
        Trace("\t%2d %s\n", i, pos[i]);
    }
    */
    /*
    Trace("uniq_path: %s\n\n", path);
    */
    return 0;
}


/*
 * URL_JoinStr --- join name to base-name (URL or path)
 *
 *  return:
 *      success 0
 *      error  !0
 */

int
URL_JoinStr(char *store, int len, char *base, char *target)
{
    int ret;
    char b_proto[STRING_SIZE], b_host[STRING_SIZE], b_port[STRING_SIZE];
    char b_path[STRING_SIZE];
    char t_proto[STRING_SIZE], t_host[STRING_SIZE], t_port[STRING_SIZE];
    char t_path[STRING_SIZE];
    char *proto, *host, *port, *path;

    char *limit;

    ret = 0;

#if 0
    Trace("URL_JoinStr: base='%s', name='%s'\n", base, target);
#endif
    /* Skip '#' */
    if(*target=='#') {
        if(len>strlen(base)) {
            strcpy(store,base);
            ret = 0;
        }
        else {
            ret = -1;
        }
        goto out;
    }

    if(strlen(base)+strlen(target)>len-1) {
        ret = -1;
        goto out;
    }


    ParseURL(base,   b_proto, b_host, b_port, b_path);
    ParseURL(target, t_proto, t_host, t_port, t_path);

/*
    Trace("URL_JoinStr: base\n\tproto '%s' host '%s' port '%s'\n\tpath '%s'\n",
        b_proto, b_host, b_port, b_path);
    Trace("URL_JoinStr: target\n\tproto '%s' host '%s' port '%s'\n\tpath '%s'\n",
        t_proto, t_host, t_port, t_path);
*/

#if 0
    Trace("URL_JoinStr:\nbase\tproto '%s' host '%s' port '%s'\n\tpath '%s'\n",
        b_proto, b_host, b_port, b_path);
    Trace("target\tproto '%s' host '%s' port '%s'\n\tpath '%s'\n",
        t_proto, t_host, t_port, t_path);
#endif

    /*
     * Protocol
     */
    if(t_proto[0]=='\0') {
#if 0
        if(b_proto[0]=='\0') {
            proto = "http";
            /*
            proto = "file";
            */
        }
        else {
            proto = b_proto;
        }
#endif
        proto = b_proto;
    }
    else {
        proto = t_proto;
    }
    /*
     * Host
     */
    if(t_host[0]=='\0') {
        host = b_host;
    }
    else {
        host = t_host;
    }
    /*
     * Port
     */
    if(t_port[0]=='\0') {
        port = b_port;
    }
    else {
        port = t_port;
    }
    /*
     * Path
     */
    if(t_path[0]=='\0') {
        path = "/";
    }
    if(t_path[0]=='/') {
        path = t_path;
    }
    else {
        char *p;

        p = b_path + strlen(b_path);
        /*
        Trace("\t b_path: addr=%#x val=%s\n", b_path, b_path);
        Trace("\t p: first addr=%#x\n", p);
        */
        while(p>b_path && *p!='/') {
            /*
            Trace("\t p: addr=%#x val=%c\n", p, *p);
            */
            p--;
        }

        p++;
        strcpy(p, t_path);
        path = b_path;
    }

    if(uniq_path(path)) {
        Error("Ignore path !!!");
        Error("\tbase   = '%s'", base);
        Error("\ttarget = '%s'", target);
        return NULL;
    }


#if 0
    sprintf(store, "%s://%s:%s%s", proto, host, port, path);
#endif
    if(*proto) {
        if(*host) {
            if(*port) {
                sprintf(store, "%s://%s:%s%s", proto, host, port, path);
            }
            else {
                sprintf(store, "%s://%s%s",   proto, host, path);
            }
        }
        else {
            if(*port) {
                sprintf(store, "%s://localhost:%s%s", proto, port, path);
            }
            else {
                sprintf(store, "%s://localhost%s",    proto, path);
            }
        }
    }
    else {
        if(*host) {
            if(*port) {
                sprintf(store, "file://%s:%s%s",  host, port, path);
            }
            else {
                sprintf(store, "file://%s%s",     host, path);
            }
        }
        else {
            if(*port) {
                sprintf(store, "file://localhost:%s%s", port, path);
            }
            else {
                sprintf(store, "file:%s",     path);
            }
        }
    }


    if(path[0]!='/') {
        Error("URL_JoinStr: Strange path '%s' in '%s'", path, store);
#if 1
        sprintf(store, "%s://%s:%s/%s", proto, host, port, path);
#endif
    }

#if 0
/*
    Trace("URL_JoinStr: result\n\tproto '%s' host '%s' port '%s'\n\tpath '%s'\n",
        proto, host, port, path);
*/
/*
    Trace(" -> '%s'\n", store);
*/
#endif


out:
    return ret;
}


/*
 * URL_JoinStrOnHeap --- join name to base-name (URL or path)
 *
 *  return:
 *          NULL    cannot join
 *          !NULL   joined path (it is exist in heap, please free it)
 */
char*
URL_JoinStrOnHeap(char *base, char *target)
{
    char *ret;
    int chk;
    int templen;

    char b_proto[STRING_SIZE], b_host[STRING_SIZE], b_port[STRING_SIZE];
    char b_path[STRING_SIZE];
    char t_proto[STRING_SIZE], t_host[STRING_SIZE], t_port[STRING_SIZE];
    char t_path[STRING_SIZE];
    char *proto, *host, *port, *path;

#if 0
    Trace("URL_JoinStrOnHeap: base='%s', name='%s'\n",base,target);
#endif

    templen = strlen(base)+strlen(target);
    ret = alloc_str(templen);
    chk = URL_JoinStr(ret, templen, base, target);

    if(chk)
        return NULL;
    else
        return ret;
}


url_t *URL_Join(url_t *base, url_t *target)
{
    return base;
}

/*
 *
 * return:
 *      0   not note or body
 *      1   note
 *      2   body
 *
 */
int IsURL_NOTEorBODY(char *name, char *realname)
{
    char *p, *q;

    Trace("IsURL_NOTEorBODY: hook to note, '%s'\n", name);

    p = q = name;
    while(*p)
        p++;

    p--;
    while(*p && q<p && *p != '*') {
        p--;
    }
    if(*p!='*') {
        goto not_note;
    }


    if(q<p && *(p-1)=='*') {
        p--;
        Trace("\tpiece '%s'\n", p);
        if(strcmp(p,"**NOTE")==0) {
            goto is_note;
        }
        if(strcmp(p,"**BODY")==0) {
            goto is_body;
        }
    }
    goto not_note;


is_note:
    strcpy(realname, name);
    q = realname + (p-name);
    *q = '\0';
    Trace("\tIt is NOTE '%s'\n", name);
    return 1;
    /*NOTREACHED*/
    goto not_note;
is_body:
    strcpy(realname, name);
    q = realname + (p-name);
    *q = '\0';
    Trace("\tIt is BODY '%s'\n", name);

    return 2;
    /*NOTREACHED*/
    goto not_note;

not_note:
    Trace("\tIt is not NOTE or BODY '%s'\n", name);
    return 0;

}


/*
 * check temporary URL or not
 * return:
 *      temporary -> 1
 *      otherwise -> 0
 *
 */
int
istempURL(char *url)
{
    int ret;
    register char *p;

    ret = 0;
    p = url;
    while(*p) {
        if(*p=='?') {
            ret = 1;
            goto out;
        }
        if(*p=='c' && strcmp(p, "cgi-bin")==0) {
            ret = 1;
            goto out;
        }
        if(*p=='.' && strcmp(p, ".cgi")==0) {
            ret = 1;
            goto out;
        }
        p++;
    }

out:
    return ret;
}


/*
 *
 * return:
 *      1   HTML
 *      0   not HTML
 *
 */
int IsHTML(char *path)
{
    register char *p;
    int ret=0;

    p = path;
    while(*p)
        p++;
    if(p!=path&&*(p-1)=='/') {
        ret = 1;
        /* empty */
    }
    else {
        if(p!=path)
            p--;
        /*
         * seek period
         */
        while(p!=path&&*p!='.')
            p--;

        if(*p!='.' && *(p-1)=='/')  {
            ret = 1;
        }
        else if(*p=='.') {
#ifdef PADDING_NAME
            if(strcmp(p, PADDING_NAME)==0)  {
                ret = 1;
            }
            else
#endif
            if(strcasecmp(p, ".html")==0
                ||strcasecmp(p, ".htm")==0
                ||strcasecmp(p, ".shtml")==0)  {
                ret = 1;
            }
        }
    }

    return ret;
}


