| /* ----------------------------------------------------------------------- * |
| * |
| * Copyright 2011 Intel Corporation; author: H. Peter Anvin |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, |
| * Boston MA 02110-1301, USA; either version 2 of the License, or |
| * (at your option) any later version; incorporated herein by reference. |
| * |
| * ----------------------------------------------------------------------- */ |
| |
| #include <inttypes.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <ctype.h> |
| #include <dprintf.h> |
| #include "pxe.h" |
| |
| enum http_readdir_state { |
| st_start, /* 0 Initial state */ |
| st_open, /* 1 "<" */ |
| st_a, /* 2 "<a" */ |
| st_attribute, /* 3 "<a " */ |
| st_h, /* 4 "<a h" */ |
| st_hr, /* 5 */ |
| st_hre, /* 6 */ |
| st_href, /* 7 */ |
| st_hrefeq, /* 8 */ |
| st_hrefqu, /* 9 */ |
| st_badtag, /* 10 */ |
| st_badtagqu, /* 11 */ |
| st_badattr, /* 12 */ |
| st_badattrqu, /* 13 */ |
| }; |
| |
| struct machine { |
| char xchar; |
| uint8_t st_xchar; |
| uint8_t st_left; /* < */ |
| uint8_t st_right; /* > */ |
| uint8_t st_space; /* white */ |
| uint8_t st_other; /* anything else */ |
| }; |
| |
| static const struct machine statemachine[] = { |
| /* xchar st_xchar st_left st_right st_space st_other */ |
| { 0, 0, st_open, st_start, st_start, st_start }, |
| { 'a', st_a, st_badtag, st_start, st_open, st_badtag }, |
| { 0, 0, st_open, st_open, st_attribute, st_badtag }, |
| { 'h', st_h, st_open, st_start, st_attribute, st_badattr }, |
| { 'r', st_hr, st_open, st_start, st_attribute, st_badattr }, |
| { 'e', st_hre, st_open, st_start, st_attribute, st_badattr }, |
| { 'f', st_href, st_open, st_start, st_attribute, st_badattr }, |
| { '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr }, |
| { '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq }, |
| { '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu }, |
| { '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag }, |
| { '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu }, |
| { '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr }, |
| { '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu }, |
| }; |
| |
| struct html_entity { |
| uint16_t ucs; |
| const char entity[9]; |
| }; |
| |
| static const struct html_entity entities[] = { |
| { 34, "quot" }, |
| { 38, "amp" }, |
| { 60, "lt" }, |
| { 62, "gt" }, |
| #ifdef HTTP_ALL_ENTITIES |
| { 160, "nbsp" }, |
| { 161, "iexcl" }, |
| { 162, "cent" }, |
| { 163, "pound" }, |
| { 164, "curren" }, |
| { 165, "yen" }, |
| { 166, "brvbar" }, |
| { 167, "sect" }, |
| { 168, "uml" }, |
| { 169, "copy" }, |
| { 170, "ordf" }, |
| { 171, "laquo" }, |
| { 172, "not" }, |
| { 173, "shy" }, |
| { 174, "reg" }, |
| { 175, "macr" }, |
| { 176, "deg" }, |
| { 177, "plusmn" }, |
| { 178, "sup2" }, |
| { 179, "sup3" }, |
| { 180, "acute" }, |
| { 181, "micro" }, |
| { 182, "para" }, |
| { 183, "middot" }, |
| { 184, "cedil" }, |
| { 185, "sup1" }, |
| { 186, "ordm" }, |
| { 187, "raquo" }, |
| { 188, "frac14" }, |
| { 189, "frac12" }, |
| { 190, "frac34" }, |
| { 191, "iquest" }, |
| { 192, "Agrave" }, |
| { 193, "Aacute" }, |
| { 194, "Acirc" }, |
| { 195, "Atilde" }, |
| { 196, "Auml" }, |
| { 197, "Aring" }, |
| { 198, "AElig" }, |
| { 199, "Ccedil" }, |
| { 200, "Egrave" }, |
| { 201, "Eacute" }, |
| { 202, "Ecirc" }, |
| { 203, "Euml" }, |
| { 204, "Igrave" }, |
| { 205, "Iacute" }, |
| { 206, "Icirc" }, |
| { 207, "Iuml" }, |
| { 208, "ETH" }, |
| { 209, "Ntilde" }, |
| { 210, "Ograve" }, |
| { 211, "Oacute" }, |
| { 212, "Ocirc" }, |
| { 213, "Otilde" }, |
| { 214, "Ouml" }, |
| { 215, "times" }, |
| { 216, "Oslash" }, |
| { 217, "Ugrave" }, |
| { 218, "Uacute" }, |
| { 219, "Ucirc" }, |
| { 220, "Uuml" }, |
| { 221, "Yacute" }, |
| { 222, "THORN" }, |
| { 223, "szlig" }, |
| { 224, "agrave" }, |
| { 225, "aacute" }, |
| { 226, "acirc" }, |
| { 227, "atilde" }, |
| { 228, "auml" }, |
| { 229, "aring" }, |
| { 230, "aelig" }, |
| { 231, "ccedil" }, |
| { 232, "egrave" }, |
| { 233, "eacute" }, |
| { 234, "ecirc" }, |
| { 235, "euml" }, |
| { 236, "igrave" }, |
| { 237, "iacute" }, |
| { 238, "icirc" }, |
| { 239, "iuml" }, |
| { 240, "eth" }, |
| { 241, "ntilde" }, |
| { 242, "ograve" }, |
| { 243, "oacute" }, |
| { 244, "ocirc" }, |
| { 245, "otilde" }, |
| { 246, "ouml" }, |
| { 247, "divide" }, |
| { 248, "oslash" }, |
| { 249, "ugrave" }, |
| { 250, "uacute" }, |
| { 251, "ucirc" }, |
| { 252, "uuml" }, |
| { 253, "yacute" }, |
| { 254, "thorn" }, |
| { 255, "yuml" }, |
| { 338, "OElig" }, |
| { 339, "oelig" }, |
| { 352, "Scaron" }, |
| { 353, "scaron" }, |
| { 376, "Yuml" }, |
| { 402, "fnof" }, |
| { 710, "circ" }, |
| { 732, "tilde" }, |
| { 913, "Alpha" }, |
| { 914, "Beta" }, |
| { 915, "Gamma" }, |
| { 916, "Delta" }, |
| { 917, "Epsilon" }, |
| { 918, "Zeta" }, |
| { 919, "Eta" }, |
| { 920, "Theta" }, |
| { 921, "Iota" }, |
| { 922, "Kappa" }, |
| { 923, "Lambda" }, |
| { 924, "Mu" }, |
| { 925, "Nu" }, |
| { 926, "Xi" }, |
| { 927, "Omicron" }, |
| { 928, "Pi" }, |
| { 929, "Rho" }, |
| { 931, "Sigma" }, |
| { 932, "Tau" }, |
| { 933, "Upsilon" }, |
| { 934, "Phi" }, |
| { 935, "Chi" }, |
| { 936, "Psi" }, |
| { 937, "Omega" }, |
| { 945, "alpha" }, |
| { 946, "beta" }, |
| { 947, "gamma" }, |
| { 948, "delta" }, |
| { 949, "epsilon" }, |
| { 950, "zeta" }, |
| { 951, "eta" }, |
| { 952, "theta" }, |
| { 953, "iota" }, |
| { 954, "kappa" }, |
| { 955, "lambda" }, |
| { 956, "mu" }, |
| { 957, "nu" }, |
| { 958, "xi" }, |
| { 959, "omicron" }, |
| { 960, "pi" }, |
| { 961, "rho" }, |
| { 962, "sigmaf" }, |
| { 963, "sigma" }, |
| { 964, "tau" }, |
| { 965, "upsilon" }, |
| { 966, "phi" }, |
| { 967, "chi" }, |
| { 968, "psi" }, |
| { 969, "omega" }, |
| { 977, "thetasym" }, |
| { 978, "upsih" }, |
| { 982, "piv" }, |
| { 8194, "ensp" }, |
| { 8195, "emsp" }, |
| { 8201, "thinsp" }, |
| { 8204, "zwnj" }, |
| { 8205, "zwj" }, |
| { 8206, "lrm" }, |
| { 8207, "rlm" }, |
| { 8211, "ndash" }, |
| { 8212, "mdash" }, |
| { 8216, "lsquo" }, |
| { 8217, "rsquo" }, |
| { 8218, "sbquo" }, |
| { 8220, "ldquo" }, |
| { 8221, "rdquo" }, |
| { 8222, "bdquo" }, |
| { 8224, "dagger" }, |
| { 8225, "Dagger" }, |
| { 8226, "bull" }, |
| { 8230, "hellip" }, |
| { 8240, "permil" }, |
| { 8242, "prime" }, |
| { 8243, "Prime" }, |
| { 8249, "lsaquo" }, |
| { 8250, "rsaquo" }, |
| { 8254, "oline" }, |
| { 8260, "frasl" }, |
| { 8364, "euro" }, |
| { 8465, "image" }, |
| { 8472, "weierp" }, |
| { 8476, "real" }, |
| { 8482, "trade" }, |
| { 8501, "alefsym" }, |
| { 8592, "larr" }, |
| { 8593, "uarr" }, |
| { 8594, "rarr" }, |
| { 8595, "darr" }, |
| { 8596, "harr" }, |
| { 8629, "crarr" }, |
| { 8656, "lArr" }, |
| { 8657, "uArr" }, |
| { 8658, "rArr" }, |
| { 8659, "dArr" }, |
| { 8660, "hArr" }, |
| { 8704, "forall" }, |
| { 8706, "part" }, |
| { 8707, "exist" }, |
| { 8709, "empty" }, |
| { 8711, "nabla" }, |
| { 8712, "isin" }, |
| { 8713, "notin" }, |
| { 8715, "ni" }, |
| { 8719, "prod" }, |
| { 8721, "sum" }, |
| { 8722, "minus" }, |
| { 8727, "lowast" }, |
| { 8730, "radic" }, |
| { 8733, "prop" }, |
| { 8734, "infin" }, |
| { 8736, "ang" }, |
| { 8743, "and" }, |
| { 8744, "or" }, |
| { 8745, "cap" }, |
| { 8746, "cup" }, |
| { 8747, "int" }, |
| { 8756, "there4" }, |
| { 8764, "sim" }, |
| { 8773, "cong" }, |
| { 8776, "asymp" }, |
| { 8800, "ne" }, |
| { 8801, "equiv" }, |
| { 8804, "le" }, |
| { 8805, "ge" }, |
| { 8834, "sub" }, |
| { 8835, "sup" }, |
| { 8836, "nsub" }, |
| { 8838, "sube" }, |
| { 8839, "supe" }, |
| { 8853, "oplus" }, |
| { 8855, "otimes" }, |
| { 8869, "perp" }, |
| { 8901, "sdot" }, |
| { 8968, "lceil" }, |
| { 8969, "rceil" }, |
| { 8970, "lfloor" }, |
| { 8971, "rfloor" }, |
| { 9001, "lang" }, |
| { 9002, "rang" }, |
| { 9674, "loz" }, |
| { 9824, "spades" }, |
| { 9827, "clubs" }, |
| { 9829, "hearts" }, |
| { 9830, "diams" }, |
| #endif /* HTTP_ALL_ENTITIES */ |
| { 0, "" } |
| }; |
| |
| struct entity_state { |
| char entity_buf[16]; |
| char *ep; |
| }; |
| |
| static char *emit(char *p, int c, struct entity_state *st) |
| { |
| const struct html_entity *ent; |
| unsigned int ucs; |
| |
| if (!st->ep) { |
| if (c == '&') { |
| /* Entity open */ |
| st->ep = st->entity_buf; |
| } else { |
| *p++ = c; |
| } |
| } else { |
| if (c == ';') { |
| st->ep = NULL; |
| *p = '\0'; |
| if (st->entity_buf[0] == '#') { |
| if ((st->entity_buf[1] | 0x20)== 'x') { |
| ucs = strtoul(st->entity_buf + 2, NULL, 16); |
| } else { |
| ucs = strtoul(st->entity_buf + 1, NULL, 10); |
| } |
| } else { |
| for (ent = entities; ent->ucs; ent++) { |
| if (!strcmp(st->entity_buf, ent->entity)) |
| break; |
| } |
| ucs = ent->ucs; |
| } |
| if (ucs < 32 || ucs >= 0x10ffff) |
| return p; /* Bogus */ |
| if (ucs >= 0x10000) { |
| *p++ = 0xf0 + (ucs >> 18); |
| *p++ = 0x80 + ((ucs >> 12) & 0x3f); |
| *p++ = 0x80 + ((ucs >> 6) & 0x3f); |
| *p++ = 0x80 + (ucs & 0x3f); |
| } else if (ucs >= 0x800) { |
| *p++ = 0xe0 + (ucs >> 12); |
| *p++ = 0x80 + ((ucs >> 6) & 0x3f); |
| *p++ = 0x80 + (ucs & 0x3f); |
| } else if (ucs >= 0x80) { |
| *p++ = 0xc0 + (ucs >> 6); |
| *p++ = 0x80 + (ucs & 0x3f); |
| } else { |
| *p++ = ucs; |
| } |
| } else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) { |
| *st->ep++ = c; |
| } |
| } |
| return p; |
| } |
| |
| static const char *http_get_filename(struct inode *inode, char *buf) |
| { |
| int c, lc; |
| char *p; |
| const struct machine *sm; |
| struct entity_state es; |
| enum http_readdir_state state = st_start; |
| enum http_readdir_state pstate = st_start; |
| |
| memset(&es, 0, sizeof es); |
| |
| p = buf; |
| for (;;) { |
| c = pxe_getc(inode); |
| if (c == -1) |
| return NULL; |
| |
| lc = tolower(c); |
| |
| sm = &statemachine[state]; |
| |
| if (lc == sm->xchar) |
| state = sm->st_xchar; |
| else if (c == '<') |
| state = sm->st_left; |
| else if (c == '>') |
| state = sm->st_right; |
| else if (isspace(c)) |
| state = sm->st_space; |
| else |
| state = sm->st_other; |
| |
| if (state == st_hrefeq || state == st_hrefqu) { |
| if (state != pstate) |
| p = buf; |
| else if (p < buf + FILENAME_MAX) |
| p = emit(p, c, &es); |
| pstate = state; |
| } else { |
| if (pstate != st_start) |
| pstate = st_start; |
| if (p != buf && state == st_start) { |
| *p = '\0'; |
| return buf; |
| } |
| } |
| } |
| } |
| |
| int http_readdir(struct inode *inode, struct dirent *dirent) |
| { |
| char buf[FILENAME_MAX + 6]; |
| const char *fn, *sp; |
| |
| for (;;) { |
| fn = http_get_filename(inode, buf); |
| |
| if (!fn) |
| return -1; /* End of directory */ |
| |
| /* Ignore entries with http special characters */ |
| if (strchr(fn, '#')) |
| continue; |
| if (strchr(fn, '?')) |
| continue; |
| |
| /* A slash if present has to be the last character, and not the first */ |
| sp = strchr(fn, '/'); |
| if (sp) { |
| if (sp == fn || sp[1]) |
| continue; |
| } else { |
| sp = strchr(fn, '\0'); |
| } |
| |
| if (sp > fn + NAME_MAX) |
| continue; |
| |
| dirent->d_ino = 0; /* Not applicable */ |
| dirent->d_off = 0; /* Not applicable */ |
| dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1; |
| dirent->d_type = *sp == '/' ? DT_DIR : DT_REG; |
| memcpy(dirent->d_name, fn, sp-fn); |
| dirent->d_name[sp-fn] = '\0'; |
| return 0; |
| } |
| } |