#line 7 "formal.c-nw" #include #include #include "uri.e" #include "unescape.e" #define NMCHAR \ "-.0123456789@ABCDEFGHIJKLMNOPQRSTUVXYZ_abcdefghijklmnopqrstuvwxyz" #define MAXSTR 4096 /* Enough? */ static Bool p_scheme(const char *s, const char **rest, Strip *scheme) { int n; n = strcspn(s, ":/?#"); if (n == 0 || s[n] != ':') return FALSE; *scheme = strn2strip(s, n); *rest = s + n + 1; return TRUE; } static Bool p_user(const char *s, const char **rest, Strip *user) { int n; char h[MAXSTR] = ""; n = strcspn(s, ":@/"); if (n == 0) return FALSE; *user = str2strip(URL_unescape(strncat(h, s, n))); *rest = s + n; return TRUE; } static void p_password_opt(const char *s, const char **rest, Strip *passw) { int n; char h[MAXSTR] = ""; if (*s != ':') { *passw = NULL; *rest = s; } else { s++; n = strcspn(s, "@"); *passw = str2strip(URL_unescape(strncat(h, s, n))); *rest = s + n; } } static Bool p_host(const char *s, const char **rest, Strip *host) { int n; char h[MAXSTR] = ""; n = strcspn(s, ":/"); if (n == 0) return FALSE; *host = str2strip(URL_unescape(strncat(h, s, n))); *rest = s + n; return TRUE; } static void p_port_opt(const char *s, const char **rest, Strip *port) { int n; if (*s != ':') { *port = NULL; *rest = s; } else { s++; n = strcspn(s, "/"); *port = strn2strip(s, n); *rest = s + n; } } static Bool p_user_passw_host_port(const char *s, const char **rest, Strip *user, Strip *passw, Strip *host, Strip *port) { const char *s1; int n; if (!p_user(s, &s1, user)) return FALSE; p_password_opt(s1, &s1, passw); if (*s1 != '@') return FALSE; s1++; if (!p_host(s1, &s1, host)) return FALSE; p_port_opt(s1, rest, port); return TRUE; } static Bool p_host_port(const char *s, const char **rest, Strip *host, Strip *port) { const char *s1; if (!p_host(s, &s1, host)) return FALSE; p_port_opt(s1, rest, port); return TRUE; } static Bool p_connect_opt(const char *s, const char **rest, Strip *user, Strip *passw, Strip *host, Strip *port) { if (s[0] != '/' || s[1] != '/') { *user = *passw = *host = *port = NULL; *rest = s; return TRUE; } else if (p_user_passw_host_port(s + 2, rest, user, passw, host, port)) { return TRUE; } else if (p_host_port(s + 2, rest, host, port)) { *user = *passw = NULL; return TRUE; } else return FALSE; } static Bool p_abs_path(const char *s, const char **rest, Strip *path, Bool *is_dir) { int n; if (*s == '\0') { *path = str2strip("/"); *is_dir = TRUE; *rest = s; return TRUE; } else if (*s != '/') { return FALSE; } else { n = strcspn(s, "?#"); *path = strn2strip(s, n); *is_dir = n > 0 && s[n-1] == '/'; *rest = s + n; return TRUE; } } static void p_path_opt(const char *s, const char **rest, Strip *path, Bool *is_dir) { int n; n = strcspn(s, "?#"); if (n == 0) { *path = NULL; *is_dir = FALSE; *rest = s; } else { *path = strn2strip(s, n); *is_dir = s[n-1] == '/'; *rest = s + n; } } static void p_search_opt(const char *s, const char **rest, Strip *search) { int n; if (*s != '?') { *search = NULL; *rest = s; } else { s++; n = strcspn(s, "#"); *search = strn2strip(s, n); *rest = s + n; } } static void p_anchor_opt(const char *s, const char **rest, Strip *anchor) { int n; char h[MAXSTR] = ""; if (*s != '#') { *anchor = NULL; *rest = s; } else { s++; n = strspn(s, NMCHAR); *anchor = str2strip(URL_unescape(strncat(h, s, n))); *rest = s + n; } } static Bool p_url(const char *s, const char **rest, URI *uri) { const char *s1; if (!p_scheme(s, &s1, &uri->scheme)) return FALSE; if (! p_connect_opt(s1, &s1, &uri->user, &uri->passw, &uri->host, &uri->port)) return FALSE; if (!p_abs_path(s1, &s1, &uri->path, &uri->is_dir)) return FALSE; p_search_opt(s1, &s1, &uri->search); p_anchor_opt(s1, rest, &uri->anchor); uri->tp = URI_URL; return TRUE; } static Bool p_urn(const char *s, const char **rest, URI *uri) { const char *s1; if (!p_scheme(s, &s1, &uri->scheme)) return FALSE; uri->path = str2strip(s1); uri->tp = URI_URN; *rest = s1 + strlen(s1); return TRUE; } static Bool p_relative(const char *s, const char **rest, URI *uri) { const char *s1; if (*s == '\0') return FALSE; uri->scheme = NULL; if (! p_connect_opt(s, &s1, &uri->user, &uri->passw, &uri->host, &uri->port)) return FALSE; p_path_opt(s1, &s1, &uri->path, &uri->is_dir); p_search_opt(s1, &s1, &uri->search); p_anchor_opt(s1, rest, &uri->anchor); uri->tp = URI_Rel; return TRUE; } #define chk(s, v) (s ? s : (s = str2strip(v))) static Bool p_nonhierarchical(const char *s, const char **rest, URI *uri) { static Strip mailto = NULL; const char *s1; if (*s == '\0') return FALSE; if (!p_scheme(s, &s1, &uri->scheme)) return FALSE; if (uri->scheme != chk(mailto, "mailto")) return FALSE; uri->path = str2strip(s1); uri->tp = URI_URL; *rest = s1 + strlen(s1); uri->user = uri->passw = uri->host = uri->port = NULL; uri->anchor = uri->search = NULL; uri->is_dir = FALSE; return TRUE; } EXPORT Bool URL_parse(const char *uri, URI *parsed) { const char *rest = uri; if (!uri) /* Empty URI */ return FALSE; else if (p_nonhierarchical(uri, &rest, parsed) && empty(rest)) return TRUE; else if (p_url(uri, &rest, parsed) && empty(rest)) return TRUE; /* It's a URL */ else if (p_urn(uri, &rest, parsed) && empty(rest)) return TRUE; /* It's a URN */ else if (p_relative(uri, &rest, parsed) && empty(rest)) return TRUE; /* It's a relative URL */ else return FALSE; /* Syntax error */ }