| /* ----------------------------------------------------------------------- * |
| * |
| * Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin |
| * |
| * Permission is hereby granted, free of charge, to any person |
| * obtaining a copy of this software and associated documentation |
| * files (the "Software"), to deal in the Software without |
| * restriction, including without limitation the rights to use, |
| * copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following |
| * conditions: |
| * |
| * The above copyright notice and this permission notice shall |
| * be included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * ----------------------------------------------------------------------- */ |
| |
| /* |
| * urlparse.c |
| */ |
| |
| #include <string.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include "url.h" |
| |
| /* |
| * Return the type of a URL without modifying the string |
| */ |
| enum url_type url_type(const char *url) |
| { |
| const char *q; |
| |
| q = strchr(url, ':'); |
| if (!q) |
| return URL_SUFFIX; |
| |
| if (q[1] == '/' && q[2] == '/') |
| return URL_NORMAL; |
| |
| if (q[1] == ':') |
| return URL_OLD_TFTP; |
| |
| return URL_SUFFIX; |
| } |
| |
| /* |
| * Decompose a URL into its components. This is done in-place; |
| * this routine does not allocate any additional storage. Freeing the |
| * original buffer frees all storage used. |
| */ |
| void parse_url(struct url_info *ui, char *url) |
| { |
| char *p = url; |
| char *q, *r, *s; |
| int c; |
| |
| memset(ui, 0, sizeof *ui); |
| |
| q = strchr(p, ':'); |
| if (q && (q[1] == '/' && q[2] == '/')) { |
| ui->type = URL_NORMAL; |
| |
| ui->scheme = p; |
| *q = '\0'; |
| p = q+3; |
| |
| q = strchr(p, '/'); |
| if (q) { |
| *q = '\0'; |
| ui->path = q+1; |
| q = strchr(q+1, '#'); |
| if (q) |
| *q = '\0'; |
| } else { |
| ui->path = ""; |
| } |
| |
| r = strchr(p, '@'); |
| if (r) { |
| ui->user = p; |
| *r = '\0'; |
| s = strchr(p, ':'); |
| if (s) { |
| *s = '\0'; |
| ui->passwd = s+1; |
| } |
| p = r+1; |
| } |
| |
| ui->host = p; |
| r = strchr(p, ':'); |
| if (r) { |
| *r++ = '\0'; |
| ui->port = 0; |
| while ((c = *r++)) { |
| c -= '0'; |
| if (c > 9) |
| break; |
| ui->port = ui->port * 10 + c; |
| } |
| } |
| } else if (q && q[1] == ':') { |
| *q = '\0'; |
| ui->scheme = "tftp"; |
| ui->host = p; |
| ui->path = q+2; |
| ui->type = URL_OLD_TFTP; |
| } else { |
| ui->path = p; |
| ui->type = URL_SUFFIX; |
| } |
| } |
| |
| /* |
| * Escapes unsafe characters in a URL. |
| * This does *not* escape things like query characters! |
| * Returns the number of characters in the total output. |
| */ |
| size_t url_escape_unsafe(char *output, const char *input, size_t bufsize) |
| { |
| static const char uchexchar[] = "0123456789ABCDEF"; |
| const char *p; |
| unsigned char c; |
| char *q; |
| size_t n = 0; |
| |
| q = output; |
| for (p = input; (c = *p); p++) { |
| if (c <= ' ' || c > '~') { |
| if (++n < bufsize) *q++ = '%'; |
| if (++n < bufsize) *q++ = uchexchar[c >> 4]; |
| if (++n < bufsize) *q++ = uchexchar[c & 15]; |
| } else { |
| if (++n < bufsize) *q++ = c; |
| } |
| } |
| |
| *q = '\0'; |
| return n; |
| } |
| |
| static int hexdigit(char c) |
| { |
| if (c >= '0' && c <= '9') |
| return c - '0'; |
| c |= 0x20; |
| if (c >= 'a' && c <= 'f') |
| return c - 'a' + 10; |
| return -1; |
| } |
| |
| /* |
| * Unescapes a buffer, optionally ending at an *unescaped* terminator |
| * (like ; for TFTP). The unescaping is done in-place. |
| * |
| * If a terminator is reached, return a pointer to the first character |
| * after the terminator. |
| */ |
| char *url_unescape(char *buffer, char terminator) |
| { |
| char *p = buffer; |
| char *q = buffer; |
| unsigned char c; |
| int x, y; |
| |
| while ((c = *p)) { |
| if (c == terminator) { |
| *q = '\0'; |
| return p; |
| } |
| p++; |
| if (c == '%') { |
| x = hexdigit(p[0]); |
| if (x >= 0) { |
| y = hexdigit(p[1]); |
| if (y >= 0) { |
| *q++ = (x << 4) + y; |
| p += 2; |
| continue; |
| } |
| } |
| } |
| *q++ = c; |
| } |
| *q = '\0'; |
| return NULL; |
| } |
| |
| #ifdef URL_TEST |
| |
| int main(int argc, char *argv[]) |
| { |
| int i; |
| struct url_info url; |
| |
| for (i = 1; i < argc; i++) { |
| parse_url(&url, argv[i]); |
| printf("scheme: %s\n" |
| "user: %s\n" |
| "passwd: %s\n" |
| "host: %s\n" |
| "port: %d\n" |
| "path: %s\n" |
| "type: %d\n", |
| url.scheme, url.user, url.passwd, url.host, url.port, |
| url.path, url.type); |
| } |
| |
| return 0; |
| } |
| |
| #endif |