Logo Search packages:      
Sourcecode: ldc version File versions

entity.c

// Copyright (c) 1999-2009 by Digital Mars
// All Rights Reserved
// written by Walter Bright
// http://www.digitalmars.com
// License for redistribution is by either the Artistic License
// in artistic.txt, or the GNU General Public License in gnu.txt.
// See the included readme.txt for details.


#include <string.h>

/*********************************************
 * Convert from named entity to its encoding.
 * For reference:
 *    http://www.htmlhelp.com/reference/html40/entities/
 *    http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html
 */

struct NameId
{
    const char *name;
    unsigned short value;
};

#if IN_GCC
static NameId namesA[]={
      "Aacgr",    0x0386,
      "aacgr",    0x03AC,
      "Aacute",   0x00C1,
      "aacute",   0x00E1,
      "Abreve",   0x0102,
      "abreve",   0x0103,
      "Acirc",    0x00C2,
      "acirc",    0x00E2,
      "acute",    0x00B4,
      "Acy",      0x0410,
      "acy",      0x0430,
      "AElig",    0x00C6,
      "aelig",    0x00E6,
      "Agr",      0x0391,
      "agr",      0x03B1,
      "Agrave",   0x00C0,
      "agrave",   0x00E0,
      "aleph",    0x2135,
      "alpha",    0x03B1,
      "Amacr",    0x0100,
      "amacr",    0x0101,
      "amalg",    0x2210,
      "amp",      0x0026,
      "and",      0x2227,
      "ang",      0x2220,
      "ang90",    0x221F,
      "angmsd",   0x2221,
      "angsph",   0x2222,
      "angst",    0x212B,
      "Aogon",    0x0104,
      "aogon",    0x0105,
      "ap",       0x2248,
      "ape",      0x224A,
      "apos",     0x0027,
      "Aring",    0x00C5,
      "aring",    0x00E5,
      "ast",      0x002A,
      "asymp",    0x224D,
      "Atilde",   0x00C3,
      "atilde",   0x00E3,
      "Auml",     0x00C4,
      "auml",     0x00E4,
      NULL,       0
};

static NameId namesB[]={
      "barwed",   0x22BC,
      "Barwed",   0x2306,
      "bcong",    0x224C,
      "Bcy",      0x0411,
      "bcy",      0x0431,
      "becaus",   0x2235,
      "bepsi",    0x220D,
      "bernou",   0x212C,
      "beta",     0x03B2,
      "beth",     0x2136,
      "Bgr",      0x0392,
      "bgr",      0x03B2,
      "blank",    0x2423,
      "blk12",    0x2592,
      "blk14",    0x2591,
      "blk34",    0x2593,
      "block",    0x2588,
      "bottom",   0x22A5,
      "bowtie",   0x22C8,
      "boxdl",    0x2510,
      "boxDL",    0x2555,
      "boxdL",    0x2556,
      "boxDl",    0x2557,
      "boxdr",    0x250C,
      "boxDR",    0x2552,
      "boxDr",    0x2553,
      "boxdR",    0x2554,
      "boxh",     0x2500,
      "boxH",     0x2550,
      "boxhd",    0x252C,
      "boxhD",    0x2564,
      "boxHD",    0x2565,
      "boxHd",    0x2566,
      "boxhu",    0x2534,
      "boxhU",    0x2567,
      "boxHU",    0x2568,
      "boxHu",    0x2569,
      "boxul",    0x2518,
      "boxUL",    0x255B,
      "boxUl",    0x255C,
      "boxuL",    0x255D,
      "boxur",    0x2514,
      "boxUR",    0x2558,
      "boxuR",    0x2559,
      "boxUr",    0x255A,
      "boxv",     0x2502,
      "boxV",     0x2551,
      "boxvh",    0x253C,
      "boxvH",    0x256A,
      "boxVH",    0x256B,
      "boxVh",    0x256C,
      "boxvl",    0x2524,
      "boxvL",    0x2561,
      "boxVL",    0x2562,
      "boxVl",    0x2563,
      "boxvr",    0x251C,
      "boxvR",    0x255E,
      "boxVR",    0x255F,
      "boxVr",    0x2560,
      "bprime",   0x2035,
      "breve",    0x02D8,
      "brvbar",   0x00A6,
      "bsim",     0x223D,
      "bsime",    0x22CD,
      "bsol",     0x005C,
      "bull",     0x2022,
      "bump",     0x224E,
      "bumpe",    0x224F,
      NULL,       0
};

static NameId namesC[]={
      "Cacute",   0x0106,
      "cacute",   0x0107,
      "cap",      0x2229,
      "Cap",      0x22D2,
      "caret",    0x2041,
      "caron",    0x02C7,
      "Ccaron",   0x010C,
      "ccaron",   0x010D,
      "Ccedil",   0x00C7,
      "ccedil",   0x00E7,
      "Ccirc",    0x0108,
      "ccirc",    0x0109,
      "Cdot",     0x010A,
      "cdot",     0x010B,
      "cedil",    0x00B8,
      "cent",     0x00A2,
      "CHcy",     0x0427,
      "chcy",     0x0447,
      "check",    0x2713,
      "chi",      0x03C7,
      "cir",      0x25CB,
      "circ",     0x005E,
      "cire",     0x2257,
      "clubs",    0x2663,
      "colon",    0x003A,
      "colone",   0x2254,
      "comma",    0x002C,
      "commat",   0x0040,
      "comp",     0x2201,
      "compfn",   0x2218,
      "cong",     0x2245,
      "conint",   0x222E,
      "coprod",   0x2210,
      "copy",     0x00A9,
      "copysr",   0x2117,
      "cross",    0x2717,
      "cuepr",    0x22DE,
      "cuesc",    0x22DF,
      "cularr",   0x21B6,
      "cup",      0x222A,
      "Cup",      0x22D3,
      "cupre",    0x227C,
      "curarr",   0x21B7,
      "curren",   0x00A4,
      "cuvee",    0x22CE,
      "cuwed",    0x22CF,
      NULL,       0
};

static NameId namesD[]={
      "dagger",   0x2020,
      "Dagger",   0x2021,
      "daleth",   0x2138,
      "darr",     0x2193,
      "dArr",     0x21D3,
      "darr2",    0x21CA,
      "dash",     0x2010,
      "dashv",    0x22A3,
      "dblac",    0x02DD,
      "Dcaron",   0x010E,
      "dcaron",   0x010F,
      "Dcy",      0x0414,
      "dcy",      0x0434,
      "deg",      0x00B0,
      "Delta",    0x0394,
      "delta",    0x03B4,
      "Dgr",      0x0394,
      "dgr",      0x03B4,
      "dharl",    0x21C3,
      "dharr",    0x21C2,
      "diam",     0x22C4,
      "diams",    0x2666,
      "die",      0x00A8,
      "divide",   0x00F7,
      "divonx",   0x22C7,
      "DJcy",     0x0402,
      "djcy",     0x0452,
      "dlarr",    0x2199,
      "dlcorn",   0x231E,
      "dlcrop",   0x230D,
      "dollar",   0x0024,
      "Dot",      0x00A8,
      "dot",      0x02D9,
      "DotDot",   0x20DC,
      "drarr",    0x2198,
      "drcorn",   0x231F,
      "drcrop",   0x230C,
      "DScy",     0x0405,
      "dscy",     0x0455,
      "Dstrok",   0x0110,
      "dstrok",   0x0111,
      "dtri",     0x25BF,
      "dtrif",    0x25BE,
      "DZcy",     0x040F,
      "dzcy",     0x045F,
      NULL,       0
};

static NameId namesE[]={
      "Eacgr",    0x0388,
      "eacgr",    0x03AD,
      "Eacute",   0x00C9,
      "eacute",   0x00E9,
      "Ecaron",   0x011A,
      "ecaron",   0x011B,
      "ecir",     0x2256,
      "Ecirc",    0x00CA,
      "ecirc",    0x00EA,
      "ecolon",   0x2255,
      "Ecy",      0x042D,
      "ecy",      0x044D,
      "Edot",     0x0116,
      "edot",     0x0117,
      "eDot",     0x2251,
      "EEacgr",   0x0389,
      "eeacgr",   0x03AE,
      "EEgr",     0x0397,
      "eegr",     0x03B7,
      "efDot",    0x2252,
      "Egr",      0x0395,
      "egr",      0x03B5,
      "Egrave",   0x00C8,
      "egrave",   0x00E8,
      "egs",      0x22DD,
      "ell",      0x2113,
      "els",      0x22DC,
      "Emacr",    0x0112,
      "emacr",    0x0113,
      "empty",    0x2205,
      "emsp",     0x2003,
      "emsp13",   0x2004,
      "emsp14",   0x2005,
      "ENG",      0x014A,
      "eng",      0x014B,
      "ensp",     0x2002,
      "Eogon",    0x0118,
      "eogon",    0x0119,
      "epsi",     0x220A,
      "epsis",    0x220A,
      "epsiv",    0x03B5,
      "equals",   0x003D,
      "equiv",    0x2261,
      "erDot",    0x2253,
      "esdot",    0x2250,
      "eta",      0x03B7,
      "ETH",      0x00D0,
      "eth",      0x00F0,
      "Euml",     0x00CB,
      "euml",     0x00EB,
      "excl",     0x0021,
      "exist",    0x2203,
      NULL,       0
};

static NameId namesF[]={
      "Fcy",      0x0424,
      "fcy",      0x0444,
      "female",   0x2640,
      "ffilig",   0xFB03,
      "fflig",    0xFB00,
      "ffllig",   0xFB04,
      "filig",    0xFB01,
      "flat",     0x266D,
      "fllig",    0xFB02,
      "fnof",     0x0192,
      "forall",   0x2200,
      "fork",     0x22D4,
      "frac12",   0x00BD,
      "frac13",   0x2153,
      "frac14",   0x00BC,
      "frac15",   0x2155,
      "frac16",   0x2159,
      "frac18",   0x215B,
      "frac23",   0x2154,
      "frac25",   0x2156,
      "frac34",   0x00BE,
      "frac35",   0x2157,
      "frac38",   0x215C,
      "frac45",   0x2158,
      "frac56",   0x215A,
      "frac58",   0x215D,
      "frac78",   0x215E,
      "frown",    0x2322,
      NULL,       0
};

static NameId namesG[]={
      "gacute",   0x01F5,
      "Gamma",    0x0393,
      "gamma",    0x03B3,
      "gammad",   0x03DC,
      "gap",      0x2273,
      "Gbreve",   0x011E,
      "gbreve",   0x011F,
      "Gcedil",   0x0122,
      "Gcirc",    0x011C,
      "gcirc",    0x011D,
      "Gcy",      0x0413,
      "gcy",      0x0433,
      "Gdot",     0x0120,
      "gdot",     0x0121,
      "ge",       0x2265,
      "gE",       0x2267,
      "gel",      0x22DB,
      "gEl",      0x22DB,
      "ges",      0x2265,
      "Gg",       0x22D9,
      "Ggr",      0x0393,
      "ggr",      0x03B3,
      "gimel",    0x2137,
      "GJcy",     0x0403,
      "gjcy",     0x0453,
      "gl",       0x2277,
      "gnap",     0xE411,
      "gne",      0x2269,
      "gnE",      0x2269,
      "gnsim",    0x22E7,
      "grave",    0x0060,
      "gsdot",    0x22D7,
      "gsim",     0x2273,
      "gt",       0x003E,
      "Gt",       0x226B,
      "gvnE",     0x2269,
      NULL,       0
};

static NameId namesH[]={
      "hairsp",   0x200A,
      "half",     0x00BD,
      "hamilt",   0x210B,
      "HARDcy",   0x042A,
      "hardcy",   0x044A,
      "harr",     0x2194,
      "hArr",     0x21D4,
      "harrw",    0x21AD,
      "Hcirc",    0x0124,
      "hcirc",    0x0125,
      "hearts",   0x2665,
      "hellip",   0x2026,
      "horbar",   0x2015,
      "Hstrok",   0x0126,
      "hstrok",   0x0127,
      "hybull",   0x2043,
      "hyphen",   0x002D,
      NULL,       0
};

static NameId namesI[]={
      "Iacgr",    0x038A,
      "iacgr",    0x03AF,
      "Iacute",   0x00CD,
      "iacute",   0x00ED,
      "Icirc",    0x00CE,
      "icirc",    0x00EE,
      "Icy",      0x0418,
      "icy",      0x0438,
      "idiagr",   0x0390,
      "Idigr",    0x03AA,
      "idigr",    0x03CA,
      "Idot",     0x0130,
      "IEcy",     0x0415,
      "iecy",     0x0435,
      "iexcl",    0x00A1,
      "iff",      0x21D4,
      "Igr",      0x0399,
      "igr",      0x03B9,
      "Igrave",   0x00CC,
      "igrave",   0x00EC,
      "IJlig",    0x0132,
      "ijlig",    0x0133,
      "Imacr",    0x012A,
      "imacr",    0x012B,
      "image",    0x2111,
      "incare",   0x2105,
      "infin",    0x221E,
      "inodot",   0x0131,
      "int",      0x222B,
      "intcal",   0x22BA,
      "IOcy",     0x0401,
      "iocy",     0x0451,
      "Iogon",    0x012E,
      "iogon",    0x012F,
      "iota",     0x03B9,
      "iquest",   0x00BF,
      "isin",     0x220A,
      "Itilde",   0x0128,
      "itilde",   0x0129,
      "Iukcy",    0x0406,
      "iukcy",    0x0456,
      "Iuml",     0x00CF,
      "iuml",     0x00EF,
      NULL,       0
};

static NameId namesJ[]={
      "Jcirc",    0x0134,
      "jcirc",    0x0135,
      "Jcy",      0x0419,
      "jcy",      0x0439,
      "Jsercy",   0x0408,
      "jsercy",   0x0458,
      "Jukcy",    0x0404,
      "jukcy",    0x0454,
      NULL,       0
};

static NameId namesK[]={
      "kappa",    0x03BA,
      "kappav",   0x03F0,
      "Kcedil",   0x0136,
      "kcedil",   0x0137,
      "Kcy",      0x041A,
      "kcy",      0x043A,
      "Kgr",      0x039A,
      "kgr",      0x03BA,
      "kgreen",   0x0138,
      "KHcy",     0x0425,
      "khcy",     0x0445,
      "KHgr",     0x03A7,
      "khgr",     0x03C7,
      "KJcy",     0x040C,
      "kjcy",     0x045C,
      NULL,       0
};

static NameId namesL[]={
      "lAarr",    0x21DA,
      "Lacute",   0x0139,
      "lacute",   0x013A,
      "lagran",   0x2112,
      "Lambda",   0x039B,
      "lambda",   0x03BB,
      "lang",     0x3008,
      "lap",      0x2272,
      "laquo",    0x00AB,
      "larr",     0x2190,
      "Larr",     0x219E,
      "lArr",     0x21D0,
      "larr2",    0x21C7,
      "larrhk",   0x21A9,
      "larrlp",   0x21AB,
      "larrtl",   0x21A2,
      "Lcaron",   0x013D,
      "lcaron",   0x013E,
      "Lcedil",   0x013B,
      "lcedil",   0x013C,
      "lceil",    0x2308,
      "lcub",     0x007B,
      "Lcy",      0x041B,
      "lcy",      0x043B,
      "ldot",     0x22D6,
      "ldquo",    0x201C,
      "ldquor",   0x201E,
      "le",       0x2264,
      "lE",       0x2266,
      "leg",      0x22DA,
      "lEg",      0x22DA,
      "les",      0x2264,
      "lfloor",   0x230A,
      "lg",       0x2276,
      "Lgr",      0x039B,
      "lgr",      0x03BB,
      "lhard",    0x21BD,
      "lharu",    0x21BC,
      "lhblk",    0x2584,
      "LJcy",     0x0409,
      "ljcy",     0x0459,
      "Ll",       0x22D8,
      "Lmidot",   0x013F,
      "lmidot",   0x0140,
      "lnap",     0xE2A2,
      "lne",      0x2268,
      "lnE",      0x2268,
      "lnsim",    0x22E6,
      "lowast",   0x2217,
      "lowbar",   0x005F,
      "loz",      0x25CA,
      "lozf",     0x2726,
      "lpar",     0x0028,
      "lrarr2",   0x21C6,
      "lrhar2",   0x21CB,
      "lsh",      0x21B0,
      "lsim",     0x2272,
      "lsqb",     0x005B,
      "lsquo",    0x2018,
      "lsquor",   0x201A,
      "Lstrok",   0x0141,
      "lstrok",   0x0142,
      "lt",       0x003C,
      "Lt",       0x226A,
      "lthree",   0x22CB,
      "ltimes",   0x22C9,
      "ltri",     0x25C3,
      "ltrie",    0x22B4,
      "ltrif",    0x25C2,
      "lvnE",     0x2268,
      NULL,       0
};

static NameId namesM[]={
      "macr",     0x00AF,
      "male",     0x2642,
      "malt",     0x2720,
      "map",      0x21A6,
      "marker",   0x25AE,
      "Mcy",      0x041C,
      "mcy",      0x043C,
      "mdash",    0x2014,
      "Mgr",      0x039C,
      "mgr",      0x03BC,
      "micro",    0x00B5,
      "mid",      0x2223,
      "middot",   0x00B7,
      "minus",    0x2212,
      "minusb",   0x229F,
      "mldr",     0x2026,
      "mnplus",   0x2213,
      "models",   0x22A7,
      "mu",       0x03BC,
      "mumap",    0x22B8,
      NULL,       0
};

static NameId namesN[]={
      "nabla",    0x2207,
      "Nacute",   0x0143,
      "nacute",   0x0144,
      "nap",      0x2249,
      "napos",    0x0149,
      "natur",    0x266E,
//    "nbsp",     0x00A0,
      "nbsp",     32,    // make non-breaking space appear as space
      "Ncaron",   0x0147,
      "ncaron",   0x0148,
      "Ncedil",   0x0145,
      "ncedil",   0x0146,
      "ncong",    0x2247,
      "Ncy",      0x041D,
      "ncy",      0x043D,
      "ndash",    0x2013,
      "ne",       0x2260,
      "nearr",    0x2197,
      "nequiv",   0x2262,
      "nexist",   0x2204,
      "nge",      0x2271,
      "ngE",      0x2271,
      "nges",     0x2271,
      "Ngr",      0x039D,
      "ngr",      0x03BD,
      "ngt",      0x226F,
      "nharr",    0x21AE,
      "nhArr",    0x21CE,
      "ni",       0x220D,
      "NJcy",     0x040A,
      "njcy",     0x045A,
      "nlarr",    0x219A,
      "nlArr",    0x21CD,
      "nldr",     0x2025,
      "nle",      0x2270,
      "nlE",      0x2270,
      "nles",     0x2270,
      "nlt",      0x226E,
      "nltri",    0x22EA,
      "nltrie",   0x22EC,
      "nmid",     0x2224,
      "not",      0x00AC,
      "notin",    0x2209,
      "npar",     0x2226,
      "npr",      0x2280,
      "npre",     0x22E0,
      "nrarr",    0x219B,
      "nrArr",    0x21CF,
      "nrtri",    0x22EB,
      "nrtrie",   0x22ED,
      "nsc",      0x2281,
      "nsce",     0x22E1,
      "nsim",     0x2241,
      "nsime",    0x2244,
      "nsmid",    0xE2AA,
      "nspar",    0x2226,
      "nsub",     0x2284,
      "nsube",    0x2288,
      "nsubE",    0x2288,
      "nsup",     0x2285,
      "nsupe",    0x2289,
      "nsupE",    0x2289,
      "Ntilde",   0x00D1,
      "ntilde",   0x00F1,
      "nu",       0x03BD,
      "num",      0x0023,
      "numero",   0x2116,
      "numsp",    0x2007,
      "nvdash",   0x22AC,
      "nvDash",   0x22AD,
      "nVdash",   0x22AE,
      "nVDash",   0x22AF,
      "nwarr",    0x2196,
      NULL,       0
};

static NameId namesO[]={
      "Oacgr",    0x038C,
      "oacgr",    0x03CC,
      "Oacute",   0x00D3,
      "oacute",   0x00F3,
      "oast",     0x229B,
      "ocir",     0x229A,
      "Ocirc",    0x00D4,
      "ocirc",    0x00F4,
      "Ocy",      0x041E,
      "ocy",      0x043E,
      "odash",    0x229D,
      "Odblac",   0x0150,
      "odblac",   0x0151,
      "odot",     0x2299,
      "OElig",    0x0152,
      "oelig",    0x0153,
      "ogon",     0x02DB,
      "Ogr",      0x039F,
      "ogr",      0x03BF,
      "Ograve",   0x00D2,
      "ograve",   0x00F2,
      "OHacgr",   0x038F,
      "ohacgr",   0x03CE,
      "OHgr",     0x03A9,
      "ohgr",     0x03C9,
      "ohm",      0x2126,
      "olarr",    0x21BA,
      "Omacr",    0x014C,
      "omacr",    0x014D,
      "Omega",    0x03A9,
      "omega",    0x03C9,
      "ominus",   0x2296,
      "oplus",    0x2295,
      "or",       0x2228,
      "orarr",    0x21BB,
      "order",    0x2134,
      "ordf",     0x00AA,
      "ordm",     0x00BA,
      "oS",       0x24C8,
      "Oslash",   0x00D8,
      "oslash",   0x00F8,
      "osol",     0x2298,
      "Otilde",   0x00D5,
      "otilde",   0x00F5,
      "otimes",   0x2297,
      "Ouml",     0x00D6,
      "ouml",     0x00F6,
      NULL,       0
};

static NameId namesP[]={
      "par",      0x2225,
      "para",     0x00B6,
      "part",     0x2202,
      "Pcy",      0x041F,
      "pcy",      0x043F,
      "percnt",   0x0025,
      "period",   0x002E,
      "permil",   0x2030,
      "perp",     0x22A5,
      "Pgr",      0x03A0,
      "pgr",      0x03C0,
      "PHgr",     0x03A6,
      "phgr",     0x03C6,
      "Phi",      0x03A6,
      "phis",     0x03C6,
      "phiv",     0x03D5,
      "phmmat",   0x2133,
      "phone",    0x260E,
      "Pi",       0x03A0,
      "pi",       0x03C0,
      "piv",      0x03D6,
      "planck",   0x210F,
      "plus",     0x002B,
      "plusb",    0x229E,
      "plusdo",   0x2214,
      "plusmn",   0x00B1,
      "pound",    0x00A3,
      "pr",       0x227A,
      "prap",     0x227E,
      "pre",      0x227C,
      "prime",    0x2032,
      "Prime",    0x2033,
      "prnap",    0x22E8,
      "prnE",     0xE2B3,
      "prnsim",   0x22E8,
      "prod",     0x220F,
      "prop",     0x221D,
      "prsim",    0x227E,
      "PSgr",     0x03A8,
      "psgr",     0x03C8,
      "Psi",      0x03A8,
      "psi",      0x03C8,
      "puncsp",   0x2008,
      NULL,       0
};

static NameId namesQ[]={
      "quest",    0x003F,
      "quot",     0x0022,
      NULL,       0
};

static NameId namesR[]={
      "rAarr",    0x21DB,
      "Racute",   0x0154,
      "racute",   0x0155,
      "radic",    0x221A,
      "rang",     0x3009,
      "raquo",    0x00BB,
      "rarr",     0x2192,
      "Rarr",     0x21A0,
      "rArr",     0x21D2,
      "rarr2",    0x21C9,
      "rarrhk",   0x21AA,
      "rarrlp",   0x21AC,
      "rarrtl",   0x21A3,
      "rarrw",    0x219D,
      "Rcaron",   0x0158,
      "rcaron",   0x0159,
      "Rcedil",   0x0156,
      "rcedil",   0x0157,
      "rceil",    0x2309,
      "rcub",     0x007D,
      "Rcy",      0x0420,
      "rcy",      0x0440,
      "rdquo",    0x201D,
      "rdquor",   0x201C,
      "real",     0x211C,
      "rect",     0x25AD,
      "reg",      0x00AE,
      "rfloor",   0x230B,
      "Rgr",      0x03A1,
      "rgr",      0x03C1,
      "rhard",    0x21C1,
      "rharu",    0x21C0,
      "rho",      0x03C1,
      "rhov",     0x03F1,
      "ring",     0x02DA,
      "rlarr2",   0x21C4,
      "rlhar2",   0x21CC,
      "rpar",     0x0029,
      "rpargt",   0xE291,
      "rsh",      0x21B1,
      "rsqb",     0x005D,
      "rsquo",    0x2019,
      "rsquor",   0x2018,
      "rthree",   0x22CC,
      "rtimes",   0x22CA,
      "rtri",     0x25B9,
      "rtrie",    0x22B5,
      "rtrif",    0x25B8,
      "rx",       0x211E,
      NULL,       0
};

static NameId namesS[]={
      "Sacute",   0x015A,
      "sacute",   0x015B,
      "samalg",   0x2210,
      "sbsol",    0xFE68,
      "sc",       0x227B,
      "scap",     0x227F,
      "Scaron",   0x0160,
      "scaron",   0x0161,
      "sccue",    0x227D,
      "sce",      0x227D,
      "Scedil",   0x015E,
      "scedil",   0x015F,
      "Scirc",    0x015C,
      "scirc",    0x015D,
      "scnap",    0x22E9,
      "scnE",     0xE2B5,
      "scnsim",   0x22E9,
      "scsim",    0x227F,
      "Scy",      0x0421,
      "scy",      0x0441,
      "sdot",     0x22C5,
      "sdotb",    0x22A1,
      "sect",     0x00A7,
      "semi",     0x003B,
      "setmn",    0x2216,
      "sext",     0x2736,
      "sfgr",     0x03C2,
      "sfrown",   0x2322,
      "Sgr",      0x03A3,
      "sgr",      0x03C3,
      "sharp",    0x266F,
      "SHCHcy",   0x0429,
      "shchcy",   0x0449,
      "SHcy",     0x0428,
      "shcy",     0x0448,
      "shy",      0x00AD,
      "Sigma",    0x03A3,
      "sigma",    0x03C3,
      "sigmav",   0x03C2,
      "sim",      0x223C,
      "sime",     0x2243,
      "smid",     0xE301,
      "smile",    0x2323,
      "SOFTcy",   0x042C,
      "softcy",   0x044C,
      "sol",      0x002F,
      "spades",   0x2660,
      "spar",     0x2225,
      "sqcap",    0x2293,
      "sqcup",    0x2294,
      "sqsub",    0x228F,
      "sqsube",   0x2291,
      "sqsup",    0x2290,
      "sqsupe",   0x2292,
      "squ",      0x25A1,
      "square",   0x25A1,
      "squf",     0x25AA,
      "ssetmn",   0x2216,
      "ssmile",   0x2323,
      "sstarf",   0x22C6,
      "star",     0x22C6,
      "starf",    0x2605,
      "sub",      0x2282,
      "Sub",      0x22D0,
      "sube",     0x2286,
      "subE",     0x2286,
      "subne",    0x228A,
      "subnE",    0x228A,
      "sum",      0x2211,
      "sung",     0x2669,
      "sup",      0x2283,
      "Sup",      0x22D1,
      "sup1",     0x00B9,
      "sup2",     0x00B2,
      "sup3",     0x00B3,
      "supe",     0x2287,
      "supE",     0x2287,
      "supne",    0x228B,
      "supnE",    0x228B,
      "szlig",    0x00DF,
      NULL,       0
};

static NameId namesT[]={
      "target",   0x2316,
      "tau",      0x03C4,
      "Tcaron",   0x0164,
      "tcaron",   0x0165,
      "Tcedil",   0x0162,
      "tcedil",   0x0163,
      "Tcy",      0x0422,
      "tcy",      0x0442,
      "tdot",     0x20DB,
      "telrec",   0x2315,
      "Tgr",      0x03A4,
      "tgr",      0x03C4,
      "there4",   0x2234,
      "Theta",    0x0398,
      "thetas",   0x03B8,
      "thetav",   0x03D1,
      "THgr",     0x0398,
      "thgr",     0x03B8,
      "thinsp",   0x2009,
      "thkap",    0x2248,
      "thksim",   0x223C,
      "THORN",    0x00DE,
      "thorn",    0x00FE,
      "tilde",    0x02DC,
      "times",    0x00D7,
      "timesb",   0x22A0,
      "top",      0x22A4,
      "tprime",   0x2034,
      "trade",    0x2122,
      "trie",     0x225C,
      "TScy",     0x0426,
      "tscy",     0x0446,
      "TSHcy",    0x040B,
      "tshcy",    0x045B,
      "Tstrok",   0x0166,
      "tstrok",   0x0167,
      "twixt",    0x226C,
      NULL,       0
};

static NameId namesU[]={
      "Uacgr",    0x038E,
      "uacgr",    0x03CD,
      "Uacute",   0x00DA,
      "uacute",   0x00FA,
      "uarr",     0x2191,
      "uArr",     0x21D1,
      "uarr2",    0x21C8,
      "Ubrcy",    0x040E,
      "ubrcy",    0x045E,
      "Ubreve",   0x016C,
      "ubreve",   0x016D,
      "Ucirc",    0x00DB,
      "ucirc",    0x00FB,
      "Ucy",      0x0423,
      "ucy",      0x0443,
      "Udblac",   0x0170,
      "udblac",   0x0171,
      "udiagr",   0x03B0,
      "Udigr",    0x03AB,
      "udigr",    0x03CB,
      "Ugr",      0x03A5,
      "ugr",      0x03C5,
      "Ugrave",   0x00D9,
      "ugrave",   0x00F9,
      "uharl",    0x21BF,
      "uharr",    0x21BE,
      "uhblk",    0x2580,
      "ulcorn",   0x231C,
      "ulcrop",   0x230F,
      "Umacr",    0x016A,
      "umacr",    0x016B,
      "uml",      0x00A8,
      "Uogon",    0x0172,
      "uogon",    0x0173,
      "uplus",    0x228E,
      "upsi",     0x03C5,
      "Upsi",     0x03D2,
      "urcorn",   0x231D,
      "urcrop",   0x230E,
      "Uring",    0x016E,
      "uring",    0x016F,
      "Utilde",   0x0168,
      "utilde",   0x0169,
      "utri",     0x25B5,
      "utrif",    0x25B4,
      "Uuml",     0x00DC,
      "uuml",     0x00FC,
      NULL,       0
};

static NameId namesV[]={
      "varr",     0x2195,
      "vArr",     0x21D5,
      "Vcy",      0x0412,
      "vcy",      0x0432,
      "vdash",    0x22A2,
      "vDash",    0x22A8,
      "Vdash",    0x22A9,
      "veebar",   0x22BB,
      "vellip",   0x22EE,
      "verbar",   0x007C,
      "Verbar",   0x2016,
      "vltri",    0x22B2,
      "vprime",   0x2032,
      "vprop",    0x221D,
      "vrtri",    0x22B3,
      "vsubne",   0x228A,
      "vsubnE",   0xE2B8,
      "vsupne",   0x228B,
      "vsupnE",   0x228B,
      "Vvdash",   0x22AA,
      NULL,       0
};

static NameId namesW[]={
      "Wcirc",    0x0174,
      "wcirc",    0x0175,
      "wedgeq",   0x2259,
      "weierp",   0x2118,
      "wreath",   0x2240,
      NULL,       0
};

static NameId namesX[]={
      "xcirc",    0x25CB,
      "xdtri",    0x25BD,
      "Xgr",      0x039E,
      "xgr",      0x03BE,
      "xharr",    0x2194,
      "xhArr",    0x2194,
      "Xi",       0x039E,
      "xi",       0x03BE,
      "xlArr",    0x21D0,
      "xrArr",    0x21D2,
      "xutri",    0x25B3,
      NULL,       0
};

static NameId namesY[]={
      "Yacute",   0x00DD,
      "yacute",   0x00FD,
      "YAcy",     0x042F,
      "yacy",     0x044F,
      "Ycirc",    0x0176,
      "ycirc",    0x0177,
      "Ycy",      0x042B,
      "ycy",      0x044B,
      "yen",      0x00A5,
      "YIcy",     0x0407,
      "yicy",     0x0457,
      "YUcy",     0x042E,
      "yucy",     0x044E,
      "yuml",     0x00FF,
      "Yuml",     0x0178,
      NULL,       0
};

static NameId namesZ[]={
      "Zacute",   0x0179,
      "zacute",   0x017A,
      "Zcaron",   0x017D,
      "zcaron",   0x017E,
      "Zcy",      0x0417,
      "zcy",      0x0437,
      "Zdot",     0x017B,
      "zdot",     0x017C,
      "zeta",     0x03B6,
      "Zgr",      0x0396,
      "zgr",      0x03B6,
      "ZHcy",     0x0416,
      "zhcy",     0x0436,
      NULL, 0
};

// @todo@ order namesTable and names? by frequency
static NameId* namesTable[] = { 
      namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI,
      namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR,
      namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL
};

int HtmlNamedEntity(unsigned char *p, int length)
{
    int tableIndex = tolower(*p) - 'a';
    if (tableIndex >= 0 && tableIndex < 26) {
      NameId* names = namesTable[tableIndex];
      int i;

      for (i = 0; names[i].name; i++){
            if (strncmp(names[i].name, (char *)p, length) == 0){
                  return names[i].value;
            }
      }
    }
    error("unrecognized character entity \"%.*s\"", length, p);
    return -1;
}

#else //TODO: Merge Walter's list with Thomas'

static NameId names[] =
{
    // Entities
    "quot", 34,
    "amp",  38,
    "lt",   60,
    "gt",   62,

    "OElig",      338,
    "oelig",      339,
    "Scaron",     352,
    "scaron",     353,
    "Yuml", 376,
    "circ", 710,
    "tilde",      732,
    "ensp", 8194,
    "emsp", 8195,
    "thinsp",     8201,
    "zwnj", 8204,
    "zwj",  8205,
    "lrm",  8206,
    "rlm",  8207,
    "ndash",      8211,
    "mdash",      8212,
    "lsquo",      8216,
    "rsquo",      8217,
    "sbquo",      8218,
    "ldquo",      8220,
    "rdquo",      8221,
    "bdquo",      8222,
    "dagger",     8224,
    "Dagger",     8225,
    "permil",     8240,
    "lsaquo",     8249,
    "rsaquo",     8250,
    "euro", 8364,

    // Latin-1 (ISO-8859-1) Entities
    "nbsp", 160,
    "iexcl",      161,
    "cent", 162,
    "pound",      163,
    "curren",     164,
    "yen",  165,
    "brvbar",     166,
    "sect", 167,
    "uml",  168,
    "copy", 169,
    "ordf", 170,
    "laquo",      171,
    "not",  172,
    "shy",  173,
    "reg",  174,
    "macr", 175,
    "deg",  176,
    "plusmn",     177,
    "sup2", 178,
    "sup3", 179,
    "acute",      180,
    "micro",      181,
    "para", 182,
    "middot",     183,
    "cedil",      184,
    "sup1", 185,
    "ordm", 186,
    "raquo",      187,
    "frac14",     188,
    "frac12",     189,
    "frac34",     190,
    "iquest",     191,
    "Agrave",     192,
    "Aacute",     193,
    "Acirc",      194,
    "Atilde",     195,
    "Auml", 196,
    "Aring",      197,
    "AElig",      198,
    "Ccedil",     199,
    "Egrave",     200,
    "Eacute",     201,
    "Ecirc",      202,
    "Euml", 203,
    "Igrave",     204,
    "Iacute",     205,
    "Icirc",      206,
    "Iuml", 207,
    "ETH",  208,
    "Ntilde",     209,
    "Ograve",     210,
    "Oacute",     211,
    "Ocirc",      212,
    "Otilde",     213,
    "Ouml", 214,
    "times",      215,
    "Oslash",     216,
    "Ugrave",     217,
    "Uacute",     218,
    "Ucirc",      219,
    "Uuml", 220,
    "Yacute",     221,
    "THORN",      222,
    "szlig",      223,
    "agrave",     224,
    "aacute",     225,
    "acirc",      226,
    "atilde",     227,
    "auml", 228,
    "aring",      229,
    "aelig",      230,
    "ccedil",     231,
    "egrave",     232,
    "eacute",     233,
    "ecirc",      234,
    "euml", 235,
    "igrave",     236,
    "iacute",     237,
    "icirc",      238,
    "iuml", 239,
    "eth",  240,
    "ntilde",     241,
    "ograve",     242,
    "oacute",     243,
    "ocirc",      244,
    "otilde",     245,
    "ouml", 246,
    "divide",     247,
    "oslash",     248,
    "ugrave",     249,
    "uacute",     250,
    "ucirc",      251,
    "uuml", 252,
    "yacute",     253,
    "thorn",      254,
    "yuml", 255,

      // Symbols and Greek letter entities
    "fnof", 402,
    "Alpha",      913,
    "Beta", 914,
    "Gamma",      915,
    "Delta",      916,
    "Epsilon",    917,
    "Zeta", 918,
    "Eta",  919,
    "Theta",      920,
    "Iota", 921,
    "Kappa",      922,
    "Lambda",     923,
    "Mu",   924,
    "Nu",   925,
    "Xi",   926,
    "Omicron",    927,
    "Pi",   928,
    "Rho",  929,
    "Sigma",      931,
    "Tau",  932,
    "Upsilon",    933,
    "Phi",  934,
    "Chi",  935,
    "Psi",  936,
    "Omega",      937,
    "alpha",      945,
    "beta", 946,
    "gamma",      947,
    "delta",      948,
    "epsilon",    949,
    "zeta", 950,
    "eta",  951,
    "theta",      952,
    "iota", 953,
    "kappa",      954,
    "lambda",     955,
    "mu",   956,
    "nu",   957,
    "xi",   958,
    "omicron",    959,
    "pi",   960,
    "rho",  961,
    "sigmaf",     962,
    "sigma",      963,
    "tau",  964,
    "upsilon",    965,
    "phi",  966,
    "chi",  967,
    "psi",  968,
    "omega",      969,
    "thetasym",   977,
    "upsih",      978,
    "piv",  982,
    "bull", 8226,
    "hellip",     8230,
    "prime",      8242,
    "Prime",      8243,
    "oline",      8254,
    "frasl",      8260,
    "weierp",     8472,
    "image",      8465,
    "real", 8476,
    "trade",      8482,
    "alefsym",    8501,
    "larr", 8592,
    "uarr", 8593,
    "rarr", 8594,
    "darr", 8595,
    "harr", 8596,
    "crarr",      8629,
    "lArr", 8656,
    "uArr", 8657,
    "rArr", 8658,
    "dArr", 8659,
    "hArr", 8660,
    "forall",     8704,
    "part", 8706,
    "exist",      8707,
    "empty",      8709,
    "nabla",      8711,
    "isin", 8712,
    "notin",      8713,
    "ni",   8715,
    "prod", 8719,
    "sum",  8721,
    "minus",      8722,
    "lowast",     8727,
    "radic",      8730,
    "prop", 8733,
    "infin",      8734,
    "ang",  8736,
    "and",  8743,
    "or",   8744,
    "cap",  8745,
    "cup",  8746,
    "int",  8747,
    "there4",     8756,
    "sim",  8764,
    "cong", 8773,
    "asymp",      8776,
    "ne",   8800,
    "equiv",      8801,
    "le",   8804,
    "ge",   8805,
    "sub",  8834,
    "sup",  8835,
    "nsub", 8836,
    "sube", 8838,
    "supe", 8839,
    "oplus",      8853,
    "otimes",     8855,
    "perp", 8869,
    "sdot", 8901,
    "lceil",      8968,
    "rceil",      8969,
    "lfloor",     8970,
    "rfloor",     8971,
    "lang", 9001,
    "rang", 9002,
    "loz",  9674,
    "spades",     9824,
    "clubs",      9827,
    "hearts",     9829,
    "diams",      9830,
};

int HtmlNamedEntity(unsigned char *p, int length)
{
    int i;

    // BUG: this is a dumb, slow linear search
    for (i = 0; i < sizeof(names) / sizeof(names[0]); i++)
    {
      // Entries are case sensitive
      if (memcmp(names[i].name, (char *)p, length) == 0 &&
          !names[i].name[length])
          return names[i].value;
    }
    return -1;
}

#endif

Generated by  Doxygen 1.6.0   Back to index