Logo Search packages:      
Sourcecode: ldc version File versions

lexer.c

// Compiler implementation of the D programming language
// Copyright (c) 1999-2009 by Digital Mars
// All Rights Reserved
// written by Walter Bright
// http://www.digitalmars.com
// License for redistribution is by either the Artistic License
// in artistic.txt, or the GNU General Public License in gnu.txt.
// See the included readme.txt for details.

#if IN_LLVM
#include <cmath>
#endif

/* Lexical Analyzer */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <errno.h>
#include <wchar.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h> // for time() and ctime()

#include "rmem.h"

#include "stringtable.h"

#include "lexer.h"
#include "utf.h"
#include "identifier.h"
#include "id.h"
#include "module.h"

#if _WIN32 && __DMC__
// from \dm\src\include\setlocal.h
extern "C" char * __cdecl __locale_decpoint;
#endif

extern int HtmlNamedEntity(unsigned char *p, int length);

#define LS 0x2028 // UTF line separator
#define PS 0x2029 // UTF paragraph separator

/********************************************
 * Do our own char maps
 */

static unsigned char cmtable[256];

const int CMoctal =     0x1;
const int CMhex = 0x2;
const int CMidchar =    0x4;

inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
inline unsigned char ishex   (unsigned char c) { return cmtable[c] & CMhex; }
inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }

static void cmtable_init()
{
    for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
    {
      if ('0' <= c && c <= '7')
          cmtable[c] |= CMoctal;
      if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
          cmtable[c] |= CMhex;
      if (isalnum(c) || c == '_')
          cmtable[c] |= CMidchar;
    }
}


/************************* Token **********************************************/

const char *Token::tochars[TOKMAX];

void *Token::operator new(size_t size)
{   Token *t;

    if (Lexer::freelist)
    {
      t = Lexer::freelist;
      Lexer::freelist = t->next;
      return t;
    }

    return ::operator new(size);
}

#ifdef DEBUG
void Token::print()
{
    fprintf(stdmsg, "%s\n", toChars());
}
#endif

const char *Token::toChars()
{   const char *p;
    static char buffer[3 + 3 * sizeof(value) + 1];

    p = buffer;
    switch (value)
    {
      case TOKint32v:
          sprintf(buffer,"%d",(d_int32)int64value);
          break;

      case TOKuns32v:
      case TOKcharv:
      case TOKwcharv:
      case TOKdcharv:
          sprintf(buffer,"%uU",(d_uns32)uns64value);
          break;

      case TOKint64v:
          sprintf(buffer,"%jdL",int64value);
          break;

      case TOKuns64v:
          sprintf(buffer,"%juUL",uns64value);
          break;

#if IN_GCC
      case TOKfloat32v:
      case TOKfloat64v:
      case TOKfloat80v:
          float80value.format(buffer, sizeof(buffer));
          break;
      case TOKimaginary32v:
      case TOKimaginary64v:
      case TOKimaginary80v:
          float80value.format(buffer, sizeof(buffer));
          // %% buffer
          strcat(buffer, "i");
          break;
#else
      case TOKfloat32v:
          sprintf(buffer,"%Lgf", float80value);
          break;

      case TOKfloat64v:
          sprintf(buffer,"%Lg", float80value);
          break;

      case TOKfloat80v:
          sprintf(buffer,"%LgL", float80value);
          break;

      case TOKimaginary32v:
          sprintf(buffer,"%Lgfi", float80value);
          break;

      case TOKimaginary64v:
          sprintf(buffer,"%Lgi", float80value);
          break;

      case TOKimaginary80v:
          sprintf(buffer,"%LgLi", float80value);
          break;
#endif

      case TOKstring:
#if CSTRINGS
          p = string;
#else
      {   OutBuffer buf;

          buf.writeByte('"');
          for (size_t i = 0; i < len; )
          { unsigned c;

            utf_decodeChar((unsigned char *)ustring, len, &i, &c);
            switch (c)
            {
                case 0:
                  break;

                case '"':
                case '\\':
                  buf.writeByte('\\');
                default:
                  if (isprint(c))
                      buf.writeByte(c);
                  else if (c <= 0x7F)
                      buf.printf("\\x%02x", c);
                  else if (c <= 0xFFFF)
                      buf.printf("\\u%04x", c);
                  else
                      buf.printf("\\U%08x", c);
                  continue;
            }
            break;
          }
          buf.writeByte('"');
          if (postfix)
            buf.writeByte('"');
          buf.writeByte(0);
          p = (char *)buf.extractData();
      }
#endif
          break;

      case TOKidentifier:
      case TOKenum:
      case TOKstruct:
      case TOKimport:
      CASE_BASIC_TYPES:
          p = ident->toChars();
          break;

      default:
          p = toChars(value);
          break;
    }
    return p;
}

const char *Token::toChars(enum TOK value)
{   const char *p;
    static char buffer[3 + 3 * sizeof(value) + 1];

    p = tochars[value];
    if (!p)
    { sprintf(buffer,"TOK%d",value);
      p = buffer;
    }
    return p;
}

/*************************** Lexer ********************************************/

Token *Lexer::freelist = NULL;
StringTable Lexer::stringtable;
OutBuffer Lexer::stringbuffer;

Lexer::Lexer(Module *mod,
      unsigned char *base, unsigned begoffset, unsigned endoffset,
      int doDocComment, int commentToken)
    : loc(mod, 1)
{
    //printf("Lexer::Lexer(%p,%d)\n",base,length);
    //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
    memset(&token,0,sizeof(token));
    this->base = base;
    this->end  = base + endoffset;
    p = base + begoffset;
    this->mod = mod;
    this->doDocComment = doDocComment;
    this->anyToken = 0;
    this->commentToken = commentToken;
    //initKeywords();

    /* If first line starts with '#!', ignore the line
     */

    if (p[0] == '#' && p[1] =='!')
    {
      p += 2;
      while (1)
      {   unsigned char c = *p;
          switch (c)
          {
            case '\n':
                p++;
                break;

            case '\r':
                p++;
                if (*p == '\n')
                  p++;
                break;

            case 0:
            case 0x1A:
                break;

            default:
                if (c & 0x80)
                {   unsigned u = decodeUTF();
                  if (u == PS || u == LS)
                      break;
                }
                p++;
                continue;
          }
          break;
      }
      loc.linnum = 2;
    }
}


void Lexer::error(const char *format, ...)
{
    if (mod && !global.gag)
    {
      char *p = loc.toChars();
      if (*p)
          fprintf(stdmsg, "%s: ", p);
      mem.free(p);

      va_list ap;
      va_start(ap, format);
      vfprintf(stdmsg, format, ap);
      va_end(ap);

      fprintf(stdmsg, "\n");
      fflush(stdmsg);

      if (global.errors >= 20)      // moderate blizzard of cascading messages
          fatal();
    }
    global.errors++;
}

void Lexer::error(Loc loc, const char *format, ...)
{
    if (mod && !global.gag)
    {
      char *p = loc.toChars();
      if (*p)
          fprintf(stdmsg, "%s: ", p);
      mem.free(p);

      va_list ap;
      va_start(ap, format);
      vfprintf(stdmsg, format, ap);
      va_end(ap);

      fprintf(stdmsg, "\n");
      fflush(stdmsg);

      if (global.errors >= 20)      // moderate blizzard of cascading messages
          fatal();
    }
    global.errors++;
}

TOK Lexer::nextToken()
{   Token *t;

    if (token.next)
    {
      t = token.next;
      memcpy(&token,t,sizeof(Token));
      t->next = freelist;
      freelist = t;
    }
    else
    {
      scan(&token);
    }
    //token.print();
    return token.value;
}

Token *Lexer::peek(Token *ct)
{   Token *t;

    if (ct->next)
      t = ct->next;
    else
    {
      t = new Token();
      scan(t);
      t->next = NULL;
      ct->next = t;
    }
    return t;
}

/***********************
 * Look ahead at next token's value.
 */

TOK Lexer::peekNext()
{
    return peek(&token)->value;
}

/***********************
 * Look 2 tokens ahead at value.
 */

TOK Lexer::peekNext2()
{
    Token *t = peek(&token);
    return peek(t)->value;
}

/*********************************
 * tk is on the opening (.
 * Look ahead and return token that is past the closing ).
 */

Token *Lexer::peekPastParen(Token *tk)
{
    //printf("peekPastParen()\n");
    int parens = 1;
    int curlynest = 0;
    while (1)
    {
      tk = peek(tk);
      //tk->print();
      switch (tk->value)
      {
          case TOKlparen:
            parens++;
            continue;

          case TOKrparen:
            --parens;
            if (parens)
                continue;
            tk = peek(tk);
            break;

          case TOKlcurly:
            curlynest++;
            continue;

          case TOKrcurly:
            if (--curlynest >= 0)
                continue;
            break;

          case TOKsemicolon:
            if (curlynest)
                continue;
            break;

          case TOKeof:
            break;

          default:
            continue;
      }
      return tk;
    }
}

/**********************************
 * Determine if string is a valid Identifier.
 * Placed here because of commonality with Lexer functionality.
 * Returns:
 *    0     invalid
 */

int Lexer::isValidIdentifier(char *p)
{
    size_t len;
    size_t idx;

    if (!p || !*p)
      goto Linvalid;

    if (*p >= '0' && *p <= '9')           // beware of isdigit() on signed chars
      goto Linvalid;

    len = strlen(p);
    idx = 0;
    while (p[idx])
    {   dchar_t dc;

      const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
      if (q)
          goto Linvalid;

      if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
          goto Linvalid;
    }
    return 1;

Linvalid:
    return 0;
}

/****************************
 * Turn next token in buffer into a token.
 */

void Lexer::scan(Token *t)
{
    unsigned lastLine = loc.linnum;
    unsigned linnum;

    t->blockComment = NULL;
    t->lineComment = NULL;
    while (1)
    {
      t->ptr = p;
      //printf("p = %p, *p = '%c'\n",p,*p);
      switch (*p)
      {
          case 0:
          case 0x1A:
            t->value = TOKeof;                  // end of file
            return;

          case ' ':
          case '\t':
          case '\v':
          case '\f':
            p++;
            continue;               // skip white space

          case '\r':
            p++;
            if (*p != '\n')               // if CR stands by itself
                loc.linnum++;
            continue;               // skip white space

          case '\n':
            p++;
            loc.linnum++;
            continue;               // skip white space

          case '0':     case '1':   case '2':   case '3':   case '4':
          case '5':     case '6':   case '7':   case '8':   case '9':
            t->value = number(t);
            return;

#if CSTRINGS
          case '\'':
            t->value = charConstant(t, 0);
            return;

          case '"':
            t->value = stringConstant(t,0);
            return;

          case 'l':
          case 'L':
            if (p[1] == '\'')
            {
                p++;
                t->value = charConstant(t, 1);
                return;
            }
            else if (p[1] == '"')
            {
                p++;
                t->value = stringConstant(t, 1);
                return;
            }
#else
          case '\'':
            t->value = charConstant(t,0);
            return;

          case 'r':
            if (p[1] != '"')
                goto case_ident;
            p++;
          case '`':
            t->value = wysiwygStringConstant(t, *p);
            return;

          case 'x':
            if (p[1] != '"')
                goto case_ident;
            p++;
            t->value = hexStringConstant(t);
            return;

#if DMDV2
          case 'q':
            if (p[1] == '"')
            {
                p++;
                t->value = delimitedStringConstant(t);
                return;
            }
            else if (p[1] == '{')
            {
                p++;
                t->value = tokenStringConstant(t);
                return;
            }
            else
                goto case_ident;
#endif

          case '"':
            t->value = escapeStringConstant(t,0);
            return;
#if ! TEXTUAL_ASSEMBLY_OUT
          case '\\':                // escaped string literal
          { unsigned c;
            unsigned char *pstart = p;

            stringbuffer.reset();
            do
            {
                p++;
                switch (*p)
                {
                  case 'u':
                  case 'U':
                  case '&':
                      c = escapeSequence();
                      stringbuffer.writeUTF8(c);
                      break;

                  default:
                      c = escapeSequence();
                      stringbuffer.writeByte(c);
                      break;
                }
            } while (*p == '\\');
            t->len = stringbuffer.offset;
            stringbuffer.writeByte(0);
            t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
            memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
            t->postfix = 0;
            t->value = TOKstring;
            if (!global.params.useDeprecated)
                error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", (int)(p - pstart), pstart, (int)(p - pstart), pstart);
            return;
          }
#endif
          case 'l':
          case 'L':
#endif
          case 'a':     case 'b':   case 'c':   case 'd':   case 'e':
          case 'f':     case 'g':   case 'h':   case 'i':   case 'j':
          case 'k':                 case 'm':   case 'n':   case 'o':
#if DMDV2
          case 'p':     /*case 'q': case 'r':*/ case 's':   case 't':
#else
          case 'p':     case 'q': /*case 'r':*/ case 's':   case 't':
#endif
          case 'u':     case 'v':   case 'w': /*case 'x':*/ case 'y':
          case 'z':
          case 'A':     case 'B':   case 'C':   case 'D':   case 'E':
          case 'F':     case 'G':   case 'H':   case 'I':   case 'J':
          case 'K':                 case 'M':   case 'N':   case 'O':
          case 'P':     case 'Q':   case 'R':   case 'S':   case 'T':
          case 'U':     case 'V':   case 'W':   case 'X':   case 'Y':
          case 'Z':
          case '_':
          case_ident:
          {   unsigned char c;
            StringValue *sv;
            Identifier *id;

            do
            {
                c = *++p;
            } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
            sv = stringtable.update((char *)t->ptr, p - t->ptr);
            id = (Identifier *) sv->ptrvalue;
            if (!id)
            {   id = new Identifier(sv->lstring.string,TOKidentifier);
                sv->ptrvalue = id;
            }
            t->ident = id;
            t->value = (enum TOK) id->value;
            anyToken = 1;
            if (*t->ptr == '_')     // if special identifier token
            {
                static char date[11+1];
                static char time[8+1];
                static char timestamp[24+1];

                if (!date[0]) // lazy evaluation
                {   time_t t;
                  char *p;

                  ::time(&t);
                  p = ctime(&t);
                  assert(p);
                  sprintf(date, "%.6s %.4s", p + 4, p + 20);
                  sprintf(time, "%.8s", p + 11);
                  sprintf(timestamp, "%.24s", p);
                }

#if DMDV1
                if (mod && id == Id::FILE)
                {
                  t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
                  goto Lstr;
                }
                else if (mod && id == Id::LINE)
                {
                  t->value = TOKint64v;
                  t->uns64value = loc.linnum;
                }
                else
#endif
                if (id == Id::DATE)
                {
                  t->ustring = (unsigned char *)date;
                  goto Lstr;
                }
                else if (id == Id::TIME)
                {
                  t->ustring = (unsigned char *)time;
                  goto Lstr;
                }
                else if (id == Id::VENDOR)
                {
                  t->ustring = (unsigned char *)"LDC";
                  goto Lstr;
                }
                else if (id == Id::TIMESTAMP)
                {
                  t->ustring = (unsigned char *)timestamp;
                 Lstr:
                  t->value = TOKstring;
                 Llen:
                  t->postfix = 0;
                  t->len = strlen((char *)t->ustring);
                }
                else if (id == Id::VERSIONX)
                { unsigned major = 0;
                  unsigned minor = 0;

                  for (const char *p = global.version + 1; 1; p++)
                  {
                      char c = *p;
                      if (isdigit(c))
                        minor = minor * 10 + c - '0';
                      else if (c == '.')
                      { major = minor;
                        minor = 0;
                      }
                      else
                        break;
                  }
                  t->value = TOKint64v;
                  t->uns64value = major * 1000 + minor;
                }
#if DMDV2
                else if (id == Id::EOFX)
                {
                  t->value = TOKeof;
                  // Advance scanner to end of file
                  while (!(*p == 0 || *p == 0x1A))
                      p++;
                }
#endif
            }
            //printf("t->value = %d\n",t->value);
            return;
          }

          case '/':
            p++;
            switch (*p)
            {
                case '=':
                  p++;
                  t->value = TOKdivass;
                  return;

                case '*':
                  p++;
                  linnum = loc.linnum;
                  while (1)
                  {
                      while (1)
                      { unsigned char c = *p;
                        switch (c)
                        {
                            case '/':
                              break;

                            case '\n':
                              loc.linnum++;
                              p++;
                              continue;

                            case '\r':
                              p++;
                              if (*p != '\n')
                                  loc.linnum++;
                              continue;

                            case 0:
                            case 0x1A:
                              error("unterminated /* */ comment");
                              p = end;
                              t->value = TOKeof;
                              return;

                            default:
                              if (c & 0x80)
                              {   unsigned u = decodeUTF();
                                  if (u == PS || u == LS)
                                    loc.linnum++;
                              }
                              p++;
                              continue;
                        }
                        break;
                      }
                      p++;
                      if (p[-2] == '*' && p - 3 != t->ptr)
                        break;
                  }
                  if (commentToken)
                  {
                      t->value = TOKcomment;
                      return;
                  }
                  else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
                  {   // if /** but not /**/
                      getDocComment(t, lastLine == linnum);
                  }
                  continue;

                case '/':           // do // style comments
                  linnum = loc.linnum;
                  while (1)
                  {   unsigned char c = *++p;
                      switch (c)
                      {
                        case '\n':
                            break;

                        case '\r':
                            if (p[1] == '\n')
                              p++;
                            break;

                        case 0:
                        case 0x1A:
                            if (commentToken)
                            {
                              p = end;
                              t->value = TOKcomment;
                              return;
                            }
                            if (doDocComment && t->ptr[2] == '/')
                              getDocComment(t, lastLine == linnum);
                            p = end;
                            t->value = TOKeof;
                            return;

                        default:
                            if (c & 0x80)
                            {   unsigned u = decodeUTF();
                              if (u == PS || u == LS)
                                  break;
                            }
                            continue;
                      }
                      break;
                  }

                  if (commentToken)
                  {
                      p++;
                      loc.linnum++;
                      t->value = TOKcomment;
                      return;
                  }
                  if (doDocComment && t->ptr[2] == '/')
                      getDocComment(t, lastLine == linnum);

                  p++;
                  loc.linnum++;
                  continue;

                case '+':
                { int nest;

                  linnum = loc.linnum;
                  p++;
                  nest = 1;
                  while (1)
                  {   unsigned char c = *p;
                      switch (c)
                      {
                        case '/':
                            p++;
                            if (*p == '+')
                            {
                              p++;
                              nest++;
                            }
                            continue;

                        case '+':
                            p++;
                            if (*p == '/')
                            {
                              p++;
                              if (--nest == 0)
                                  break;
                            }
                            continue;

                        case '\r':
                            p++;
                            if (*p != '\n')
                              loc.linnum++;
                            continue;

                        case '\n':
                            loc.linnum++;
                            p++;
                            continue;

                        case 0:
                        case 0x1A:
                            error("unterminated /+ +/ comment");
                            p = end;
                            t->value = TOKeof;
                            return;

                        default:
                            if (c & 0x80)
                            {   unsigned u = decodeUTF();
                              if (u == PS || u == LS)
                                  loc.linnum++;
                            }
                            p++;
                            continue;
                      }
                      break;
                  }
                  if (commentToken)
                  {
                      t->value = TOKcomment;
                      return;
                  }
                  if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
                  {   // if /++ but not /++/
                      getDocComment(t, lastLine == linnum);
                  }
                  continue;
                }
            }
            t->value = TOKdiv;
            return;

          case '.':
            p++;
            if (isdigit(*p))
            {   /* Note that we don't allow ._1 and ._ as being
                 * valid floating point numbers.
                 */
                p--;
                t->value = inreal(t);
            }
            else if (p[0] == '.')
            {
                if (p[1] == '.')
                {   p += 2;
                  t->value = TOKdotdotdot;
                }
                else
                {   p++;
                  t->value = TOKslice;
                }
            }
            else
                t->value = TOKdot;
            return;

          case '&':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKandass;
            }
            else if (*p == '&')
            {   p++;
                t->value = TOKandand;
            }
            else
                t->value = TOKand;
            return;

          case '|':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKorass;
            }
            else if (*p == '|')
            {   p++;
                t->value = TOKoror;
            }
            else
                t->value = TOKor;
            return;

          case '-':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKminass;
            }
#if 0
            else if (*p == '>')
            {   p++;
                t->value = TOKarrow;
            }
#endif
            else if (*p == '-')
            {   p++;
                t->value = TOKminusminus;
            }
            else
                t->value = TOKmin;
            return;

          case '+':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKaddass;
            }
            else if (*p == '+')
            {   p++;
                t->value = TOKplusplus;
            }
            else
                t->value = TOKadd;
            return;

          case '<':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKle;               // <=
            }
            else if (*p == '<')
            {   p++;
                if (*p == '=')
                {   p++;
                  t->value = TOKshlass;         // <<=
                }
                else
                  t->value = TOKshl;            // <<
            }
            else if (*p == '>')
            {   p++;
                if (*p == '=')
                {   p++;
                  t->value = TOKleg;            // <>=
                }
                else
                  t->value = TOKlg;       // <>
            }
            else
                t->value = TOKlt;               // <
            return;

          case '>':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKge;               // >=
            }
            else if (*p == '>')
            {   p++;
                if (*p == '=')
                {   p++;
                  t->value = TOKshrass;         // >>=
                }
                else if (*p == '>')
                { p++;
                  if (*p == '=')
                  {   p++;
                      t->value = TOKushrass;    // >>>=
                  }
                  else
                      t->value = TOKushr;       // >>>
                }
                else
                  t->value = TOKshr;            // >>
            }
            else
                t->value = TOKgt;               // >
            return;

          case '!':
            p++;
            if (*p == '=')
            {   p++;
                if (*p == '=' && global.params.Dversion == 1)
                { p++;
                  t->value = TOKnotidentity;    // !==
                }
                else
                  t->value = TOKnotequal;       // !=
            }
            else if (*p == '<')
            {   p++;
                if (*p == '>')
                { p++;
                  if (*p == '=')
                  {   p++;
                      t->value = TOKunord; // !<>=
                  }
                  else
                      t->value = TOKue;   // !<>
                }
                else if (*p == '=')
                { p++;
                  t->value = TOKug; // !<=
                }
                else
                  t->value = TOKuge;      // !<
            }
            else if (*p == '>')
            {   p++;
                if (*p == '=')
                { p++;
                  t->value = TOKul; // !>=
                }
                else
                  t->value = TOKule;      // !>
            }
            else
                t->value = TOKnot;        // !
            return;

          case '=':
            p++;
            if (*p == '=')
            {   p++;
                if (*p == '=' && global.params.Dversion == 1)
                { p++;
                  t->value = TOKidentity;       // ===
                }
                else
                  t->value = TOKequal;          // ==
            }
            else
                t->value = TOKassign;           // =
            return;

          case '~':
            p++;
            if (*p == '=')
            {   p++;
                t->value = TOKcatass;           // ~=
            }
            else
                t->value = TOKtilde;            // ~
            return;

#define SINGLE(c,tok) case c: p++; t->value = tok; return;

          SINGLE('(',   TOKlparen)
          SINGLE(')', TOKrparen)
          SINGLE('[', TOKlbracket)
          SINGLE(']', TOKrbracket)
          SINGLE('{', TOKlcurly)
          SINGLE('}', TOKrcurly)
          SINGLE('?', TOKquestion)
          SINGLE(',', TOKcomma)
          SINGLE(';', TOKsemicolon)
          SINGLE(':', TOKcolon)
          SINGLE('$', TOKdollar)
          SINGLE('@', TOKat)

#undef SINGLE

#define DOUBLE(c1,tok1,c2,tok2)           \
          case c1:                  \
            p++;              \
            if (*p == c2)           \
            {   p++;          \
                t->value = tok2;    \
            }                 \
            else              \
                t->value = tok1;    \
            return;

          DOUBLE('*', TOKmul, '=', TOKmulass)
          DOUBLE('%', TOKmod, '=', TOKmodass)
          DOUBLE('^', TOKxor, '=', TOKxorass)

#undef DOUBLE

          case '#':
            p++;
            pragma();
            continue;

          default:
          { unsigned char c = *p;

            if (c & 0x80)
            {   unsigned u = decodeUTF();

                // Check for start of unicode identifier
                if (isUniAlpha(u))
                  goto case_ident;

                if (u == PS || u == LS)
                {
                  loc.linnum++;
                  p++;
                  continue;
                }
            }
            if (isprint(c))
                error("unsupported char '%c'", c);
            else
                error("unsupported char 0x%02x", c);
            p++;
            continue;
          }
      }
    }
}

/*******************************************
 * Parse escape sequence.
 */

unsigned Lexer::escapeSequence()
{   unsigned c = *p;

#ifdef TEXTUAL_ASSEMBLY_OUT
    return c;
#endif
    int n;
    int ndigits;

    switch (c)
    {
      case '\'':
      case '"':
      case '?':
      case '\\':
      Lconsume:
            p++;
            break;

      case 'a':   c = 7;            goto Lconsume;
      case 'b':   c = 8;            goto Lconsume;
      case 'f':   c = 12;           goto Lconsume;
      case 'n':   c = 10;           goto Lconsume;
      case 'r':   c = 13;           goto Lconsume;
      case 't':   c = 9;            goto Lconsume;
      case 'v':   c = 11;           goto Lconsume;

      case 'u':
            ndigits = 4;
            goto Lhex;
      case 'U':
            ndigits = 8;
            goto Lhex;
      case 'x':
            ndigits = 2;
      Lhex:
            p++;
            c = *p;
            if (ishex(c))
            {   unsigned v;

                n = 0;
                v = 0;
                while (1)
                {
                  if (isdigit(c))
                      c -= '0';
                  else if (islower(c))
                      c -= 'a' - 10;
                  else
                      c -= 'A' - 10;
                  v = v * 16 + c;
                  c = *++p;
                  if (++n == ndigits)
                      break;
                  if (!ishex(c))
                  {   error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
                      break;
                  }
                }
                if (ndigits != 2 && !utf_isValidDchar(v))
                { error("invalid UTF character \\U%08x", v);
                  v = '?';    // recover with valid UTF character
                }
                c = v;
            }
            else
                error("undefined escape hex sequence \\%c\n",c);
            break;

      case '&':               // named character entity
            for (unsigned char *idstart = ++p; 1; p++)
            {
                switch (*p)
                {
                  case ';':
                      c = HtmlNamedEntity(idstart, p - idstart);
                      if (c == ~0)
                      {   error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
                        c = ' ';
                      }
                      p++;
                      break;

                  default:
                      if (isalpha(*p) ||
                        (p != idstart + 1 && isdigit(*p)))
                        continue;
                      error("unterminated named entity");
                      break;
                }
                break;
            }
            break;

      case 0:
      case 0x1A:              // end of file
            c = '\\';
            break;

      default:
            if (isoctal(c))
            {   unsigned v;

                n = 0;
                v = 0;
                do
                {
                  v = v * 8 + (c - '0');
                  c = *++p;
                } while (++n < 3 && isoctal(c));
                c = v;
                if (c > 0xFF)
                  error("0%03o is larger than a byte", c);
            }
            else
                error("undefined escape sequence \\%c\n",c);
            break;
    }
    return c;
}

/**************************************
 */

TOK Lexer::wysiwygStringConstant(Token *t, int tc)
{   unsigned c;
    Loc start = loc;

    p++;
    stringbuffer.reset();
    while (1)
    {
      c = *p++;
      switch (c)
      {
          case '\n':
            loc.linnum++;
            break;

          case '\r':
            if (*p == '\n')
                continue;     // ignore
            c = '\n';   // treat EndOfLine as \n character
            loc.linnum++;
            break;

          case 0:
          case 0x1A:
            error("unterminated string constant starting at %s", start.toChars());
            t->ustring = (unsigned char *)"";
            t->len = 0;
            t->postfix = 0;
            return TOKstring;

          case '"':
          case '`':
            if (c == tc)
            {
                t->len = stringbuffer.offset;
                stringbuffer.writeByte(0);
                t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
                memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
                stringPostfix(t);
                return TOKstring;
            }
            break;

          default:
            if (c & 0x80)
            {   p--;
                unsigned u = decodeUTF();
                p++;
                if (u == PS || u == LS)
                  loc.linnum++;
                stringbuffer.writeUTF8(u);
                continue;
            }
            break;
      }
      stringbuffer.writeByte(c);
    }
}

/**************************************
 * Lex hex strings:
 *    x"0A ae 34FE BD"
 */

TOK Lexer::hexStringConstant(Token *t)
{   unsigned c;
    Loc start = loc;
    unsigned n = 0;
    unsigned v;

    p++;
    stringbuffer.reset();
    while (1)
    {
      c = *p++;
      switch (c)
      {
          case ' ':
          case '\t':
          case '\v':
          case '\f':
            continue;               // skip white space

          case '\r':
            if (*p == '\n')
                continue;                 // ignore
            // Treat isolated '\r' as if it were a '\n'
          case '\n':
            loc.linnum++;
            continue;

          case 0:
          case 0x1A:
            error("unterminated string constant starting at %s", start.toChars());
            t->ustring = (unsigned char *)"";
            t->len = 0;
            t->postfix = 0;
            return TOKstring;

          case '"':
            if (n & 1)
            {   error("odd number (%d) of hex characters in hex string", n);
                stringbuffer.writeByte(v);
            }
            t->len = stringbuffer.offset;
            stringbuffer.writeByte(0);
            t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
            memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
            stringPostfix(t);
            return TOKstring;

          default:
            if (c >= '0' && c <= '9')
                c -= '0';
            else if (c >= 'a' && c <= 'f')
                c -= 'a' - 10;
            else if (c >= 'A' && c <= 'F')
                c -= 'A' - 10;
            else if (c & 0x80)
            {   p--;
                unsigned u = decodeUTF();
                p++;
                if (u == PS || u == LS)
                  loc.linnum++;
                else
                  error("non-hex character \\u%x", u);
            }
            else
                error("non-hex character '%c'", c);
            if (n & 1)
            {   v = (v << 4) | c;
                stringbuffer.writeByte(v);
            }
            else
                v = c;
            n++;
            break;
      }
    }
}


#if DMDV2
/**************************************
 * Lex delimited strings:
 *    q"(foo(xxx))"   // "foo(xxx)"
 *    q"[foo(]"       // "foo("
 *    q"/foo]/"       // "foo]"
 *    q"HERE
 *    foo
 *    HERE"       // "foo\n"
 * Input:
 *    p is on the "
 */

TOK Lexer::delimitedStringConstant(Token *t)
{   unsigned c;
    Loc start = loc;
    unsigned delimleft = 0;
    unsigned delimright = 0;
    unsigned nest = 1;
    unsigned nestcount;
    Identifier *hereid = NULL;
    unsigned blankrol = 0;
    unsigned startline = 0;

    p++;
    stringbuffer.reset();
    while (1)
    {
      c = *p++;
      //printf("c = '%c'\n", c);
      switch (c)
      {
          case '\n':
          Lnextline:
            loc.linnum++;
            startline = 1;
            if (blankrol)
            {   blankrol = 0;
                continue;
            }
            if (hereid)
            {
                stringbuffer.writeUTF8(c);
                continue;
            }
            break;

          case '\r':
            if (*p == '\n')
                continue;     // ignore
            c = '\n';   // treat EndOfLine as \n character
            goto Lnextline;

          case 0:
          case 0x1A:
            goto Lerror;

          default:
            if (c & 0x80)
            {   p--;
                c = decodeUTF();
                p++;
                if (c == PS || c == LS)
                  goto Lnextline;
            }
            break;
      }
      if (delimleft == 0)
      {   delimleft = c;
          nest = 1;
          nestcount = 1;
          if (c == '(')
            delimright = ')';
          else if (c == '{')
            delimright = '}';
          else if (c == '[')
            delimright = ']';
          else if (c == '<')
            delimright = '>';
          else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
          { // Start of identifier; must be a heredoc
            Token t;
            p--;
            scan(&t);         // read in heredoc identifier
            if (t.value != TOKidentifier)
            {   error("identifier expected for heredoc, not %s", t.toChars());
                delimright = c;
            }
            else
            {   hereid = t.ident;
                //printf("hereid = '%s'\n", hereid->toChars());
                blankrol = 1;
            }
            nest = 0;
          }
          else
          { delimright = c;
            nest = 0;
            if (isspace(c))
                error("delimiter cannot be whitespace");
          }
      }
      else
      {
          if (blankrol)
          { error("heredoc rest of line should be blank");
            blankrol = 0;
            continue;
          }
          if (nest == 1)
          {
            if (c == delimleft)
                nestcount++;
            else if (c == delimright)
            {   nestcount--;
                if (nestcount == 0)
                  goto Ldone;
            }
          }
          else if (c == delimright)
            goto Ldone;
          if (startline && isalpha(c) && hereid)
          { Token t;
            unsigned char *psave = p;
            p--;
            scan(&t);         // read in possible heredoc identifier
            //printf("endid = '%s'\n", t.ident->toChars());
            if (t.value == TOKidentifier && t.ident->equals(hereid))
            {   /* should check that rest of line is blank
                 */
                goto Ldone;
            }
            p = psave;
          }
          stringbuffer.writeUTF8(c);
          startline = 0;
      }
    }

Ldone:
    if (*p == '"')
      p++;
    else
      error("delimited string must end in %c\"", delimright);
    t->len = stringbuffer.offset;
    stringbuffer.writeByte(0);
    t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
    memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
    stringPostfix(t);
    return TOKstring;

Lerror:
    error("unterminated string constant starting at %s", start.toChars());
    t->ustring = (unsigned char *)"";
    t->len = 0;
    t->postfix = 0;
    return TOKstring;
}

/**************************************
 * Lex delimited strings:
 *    q{ foo(xxx) } // " foo(xxx) "
 *    q{foo(}       // "foo("
 *    q{{foo}"}"}   // "{foo}"}""
 * Input:
 *    p is on the q
 */

TOK Lexer::tokenStringConstant(Token *t)
{
    unsigned nest = 1;
    Loc start = loc;
    unsigned char *pstart = ++p;

    while (1)
    { Token tok;

      scan(&tok);
      switch (tok.value)
      {
          case TOKlcurly:
            nest++;
            continue;

          case TOKrcurly:
            if (--nest == 0)
                goto Ldone;
            continue;

          case TOKeof:
            goto Lerror;

          default:
            continue;
      }
    }

Ldone:
    t->len = p - 1 - pstart;
    t->ustring = (unsigned char *)mem.malloc(t->len + 1);
    memcpy(t->ustring, pstart, t->len);
    t->ustring[t->len] = 0;
    stringPostfix(t);
    return TOKstring;

Lerror:
    error("unterminated token string constant starting at %s", start.toChars());
    t->ustring = (unsigned char *)"";
    t->len = 0;
    t->postfix = 0;
    return TOKstring;
}

#endif


/**************************************
 */

TOK Lexer::escapeStringConstant(Token *t, int wide)
{   unsigned c;
    Loc start = loc;

    p++;
    stringbuffer.reset();
    while (1)
    {
      c = *p++;
      switch (c)
      {
#if !( TEXTUAL_ASSEMBLY_OUT )
          case '\\':
            switch (*p)
            {
                case 'u':
                case 'U':
                case '&':
                  c = escapeSequence();
                  stringbuffer.writeUTF8(c);
                  continue;

                default:
                  c = escapeSequence();
                  break;
            }
            break;
#endif
          case '\n':
            loc.linnum++;
            break;

          case '\r':
            if (*p == '\n')
                continue;     // ignore
            c = '\n';   // treat EndOfLine as \n character
            loc.linnum++;
            break;

          case '"':
            t->len = stringbuffer.offset;
            stringbuffer.writeByte(0);
            t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
            memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
            stringPostfix(t);
            return TOKstring;

          case 0:
          case 0x1A:
            p--;
            error("unterminated string constant starting at %s", start.toChars());
            t->ustring = (unsigned char *)"";
            t->len = 0;
            t->postfix = 0;
            return TOKstring;

          default:
            if (c & 0x80)
            {
                p--;
                c = decodeUTF();
                if (c == LS || c == PS)
                { c = '\n';
                  loc.linnum++;
                }
                p++;
                stringbuffer.writeUTF8(c);
                continue;
            }
            break;
      }
      stringbuffer.writeByte(c);
    }
}

/**************************************
 */

TOK Lexer::charConstant(Token *t, int wide)
{
    unsigned c;
    TOK tk = TOKcharv;

    //printf("Lexer::charConstant\n");
    p++;
    c = *p++;
    switch (c)
    {
#if ! TEXTUAL_ASSEMBLY_OUT
      case '\\':
          switch (*p)
          {
            case 'u':
                t->uns64value = escapeSequence();
                tk = TOKwcharv;
                break;

            case 'U':
            case '&':
                t->uns64value = escapeSequence();
                tk = TOKdcharv;
                break;

            default:
                t->uns64value = escapeSequence();
                break;
          }
          break;
#endif
      case '\n':
      L1:
          loc.linnum++;
      case '\r':
      case 0:
      case 0x1A:
      case '\'':
          error("unterminated character constant");
          return tk;

      default:
          if (c & 0x80)
          {
            p--;
            c = decodeUTF();
            p++;
            if (c == LS || c == PS)
                goto L1;
            if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
                tk = TOKwcharv;
            else
                tk = TOKdcharv;
          }
          t->uns64value = c;
          break;
    }

    if (*p != '\'')
    { error("unterminated character constant");
      return tk;
    }
    p++;
    return tk;
}

/***************************************
 * Get postfix of string literal.
 */

void Lexer::stringPostfix(Token *t)
{
    switch (*p)
    {
      case 'c':
      case 'w':
      case 'd':
          t->postfix = *p;
          p++;
          break;

      default:
          t->postfix = 0;
          break;
    }
}

/***************************************
 * Read \u or \U unicode sequence
 * Input:
 *    u     'u' or 'U'
 */

#if 0
unsigned Lexer::wchar(unsigned u)
{
    unsigned value;
    unsigned n;
    unsigned char c;
    unsigned nchars;

    nchars = (u == 'U') ? 8 : 4;
    value = 0;
    for (n = 0; 1; n++)
    {
      ++p;
      if (n == nchars)
          break;
      c = *p;
      if (!ishex(c))
      {   error("\\%c sequence must be followed by %d hex characters", u, nchars);
          break;
      }
      if (isdigit(c))
          c -= '0';
      else if (islower(c))
          c -= 'a' - 10;
      else
          c -= 'A' - 10;
      value <<= 4;
      value |= c;
    }
    return value;
}
#endif

/**************************************
 * Read in a number.
 * If it's an integer, store it in tok.TKutok.Vlong.
 *    integers can be decimal, octal or hex
 *    Handle the suffixes U, UL, LU, L, etc.
 * If it's double, store it in tok.TKutok.Vdouble.
 * Returns:
 *    TKnum
 *    TKdouble,...
 */

TOK Lexer::number(Token *t)
{
    // We use a state machine to collect numbers
    enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
      STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
      STATE_hexh, STATE_error };
    enum STATE state;

    enum FLAGS
    { FLAGS_decimal  = 1,           // decimal
      FLAGS_unsigned = 2,           // u or U suffix
      FLAGS_long     = 4,           // l or L suffix
    };
    enum FLAGS flags = FLAGS_decimal;

    int i;
    int base;
    unsigned c;
    unsigned char *start;
    TOK result;

    //printf("Lexer::number()\n");
    state = STATE_initial;
    base = 0;
    stringbuffer.reset();
    start = p;
    while (1)
    {
      c = *p;
      switch (state)
      {
          case STATE_initial:       // opening state
            if (c == '0')
                state = STATE_0;
            else
                state = STATE_decimal;
            break;

          case STATE_0:
            flags = (FLAGS) (flags & ~FLAGS_decimal);
            switch (c)
            {
#if ZEROH
                case 'H':                 // 0h
                case 'h':
                  goto hexh;
#endif
                case 'X':
                case 'x':
                  state = STATE_hex0;
                  break;

                case '.':
                  if (p[1] == '.')  // .. is a separate token
                      goto done;
                case 'i':
                case 'f':
                case 'F':
                  goto real;
#if ZEROH
                case 'E':
                case 'e':
                  goto case_hex;
#endif
                case 'B':
                case 'b':
                  state = STATE_binary0;
                  break;

                case '0': case '1': case '2': case '3':
                case '4': case '5': case '6': case '7':
                  state = STATE_octal;
                  break;

#if ZEROH
                case '8': case '9': case 'A':
                case 'C': case 'D': case 'F':
                case 'a': case 'c': case 'd': case 'f':
                case_hex:
                  state = STATE_hexh;
                  break;
#endif
                case '_':
                  state = STATE_octal;
                  p++;
                  continue;

                case 'L':
                  if (p[1] == 'i')
                      goto real;
                  goto done;

                default:
                  goto done;
            }
            break;

          case STATE_decimal:       // reading decimal number
            if (!isdigit(c))
            {
#if ZEROH
                if (ishex(c)
                  || c == 'H' || c == 'h'
                   )
                  goto hexh;
#endif
                if (c == '_')       // ignore embedded _
                { p++;
                  continue;
                }
                if (c == '.' && p[1] != '.')
                  goto real;
                else if (c == 'i' || c == 'f' || c == 'F' ||
                       c == 'e' || c == 'E')
                {
          real:   // It's a real number. Back up and rescan as a real
                  p = start;
                  return inreal(t);
                }
                else if (c == 'L' && p[1] == 'i')
                  goto real;
                goto done;
            }
            break;

          case STATE_hex0:          // reading hex number
          case STATE_hex:
            if (!ishex(c))
            {
                if (c == '_')       // ignore embedded _
                { p++;
                  continue;
                }
                if (c == '.' && p[1] != '.')
                  goto real;
                if (c == 'P' || c == 'p' || c == 'i')
                  goto real;
                if (state == STATE_hex0)
                  error("Hex digit expected, not '%c'", c);
                goto done;
            }
            state = STATE_hex;
            break;

#if ZEROH
          hexh:
            state = STATE_hexh;
          case STATE_hexh:          // parse numbers like 0FFh
            if (!ishex(c))
            {
                if (c == 'H' || c == 'h')
                {
                  p++;
                  base = 16;
                  goto done;
                }
                else
                {
                  // Check for something like 1E3 or 0E24
                  if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
                      memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
                      goto real;
                  error("Hex digit expected, not '%c'", c);
                  goto done;
                }
            }
            break;
#endif

          case STATE_octal:         // reading octal number
          case STATE_octale:        // reading octal number with non-octal digits
            if (!isoctal(c))
            {
#if ZEROH
                if (ishex(c)
                  || c == 'H' || c == 'h'
                   )
                  goto hexh;
#endif
                if (c == '_')       // ignore embedded _
                { p++;
                  continue;
                }
                if (c == '.' && p[1] != '.')
                  goto real;
                if (c == 'i')
                  goto real;
                if (isdigit(c))
                {
                  state = STATE_octale;
                }
                else
                  goto done;
            }
            break;

          case STATE_binary0:       // starting binary number
          case STATE_binary:        // reading binary number
            if (c != '0' && c != '1')
            {
#if ZEROH
                if (ishex(c)
                  || c == 'H' || c == 'h'
                   )
                  goto hexh;
#endif
                if (c == '_')       // ignore embedded _
                { p++;
                  continue;
                }
                if (state == STATE_binary0)
                { error("binary digit expected");
                  state = STATE_error;
                  break;
                }
                else
                  goto done;
            }
            state = STATE_binary;
            break;

          case STATE_error:         // for error recovery
            if (!isdigit(c))  // scan until non-digit
                goto done;
            break;

          default:
            assert(0);
      }
      stringbuffer.writeByte(c);
      p++;
    }
done:
    stringbuffer.writeByte(0);            // terminate string
    if (state == STATE_octale)
      error("Octal digit expected");

    uinteger_t n;             // unsigned >=64 bit integer type

    if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
      n = stringbuffer.data[0] - '0';
    else
    {
      // Convert string to integer
#if __DMC__
      errno = 0;
      n = strtoull((char *)stringbuffer.data,NULL,base);
      if (errno == ERANGE)
          error("integer overflow");
#else
      // Not everybody implements strtoull()
      char *p = (char *)stringbuffer.data;
      int r = 10, d;

      if (*p == '0')
      {
          if (p[1] == 'x' || p[1] == 'X')
            p += 2, r = 16;
          else if (p[1] == 'b' || p[1] == 'B')
            p += 2, r = 2;
          else if (isdigit(p[1]))
            p += 1, r = 8;
      }

      n = 0;
      while (1)
      {
          if (*p >= '0' && *p <= '9')
            d = *p - '0';
          else if (*p >= 'a' && *p <= 'z')
            d = *p - 'a' + 10;
          else if (*p >= 'A' && *p <= 'Z')
            d = *p - 'A' + 10;
          else
            break;
          if (d >= r)
            break;
          uinteger_t n2 = n * r;
          //printf("n2 / r = %llx, n = %llx\n", n2/r, n);
          if (n2 / r != n || n2 + d < n)
          {
            error ("integer overflow");
            break;
          }

          n = n2 + d;
          p++;
      }
#endif
      if (sizeof(n) > 8 &&
          n > 0xFFFFFFFFFFFFFFFFULL)      // if n needs more than 64 bits
          error("integer overflow");
    }

    // Parse trailing 'u', 'U', 'l' or 'L' in any combination
    while (1)
    {   unsigned char f;

      switch (*p)
      {   case 'U':
          case 'u':
            f = FLAGS_unsigned;
            goto L1;

          case 'l':
            if (1 || !global.params.useDeprecated)
                error("'l' suffix is deprecated, use 'L' instead");
          case 'L':
            f = FLAGS_long;
          L1:
            p++;
            if (flags & f)
                error("unrecognized token");
            flags = (FLAGS) (flags | f);
            continue;
          default:
            break;
      }
      break;
    }

    switch (flags)
    {
      case 0:
          /* Octal or Hexadecimal constant.
           * First that fits: int, uint, long, ulong
           */
          if (n & 0x8000000000000000LL)
                result = TOKuns64v;
          else if (n & 0xFFFFFFFF00000000LL)
                result = TOKint64v;
          else if (n & 0x80000000)
                result = TOKuns32v;
          else
                result = TOKint32v;
          break;

      case FLAGS_decimal:
          /* First that fits: int, long, long long
           */
          if (n & 0x8000000000000000LL)
          {     error("signed integer overflow");
                result = TOKuns64v;
          }
          else if (n & 0xFFFFFFFF80000000LL)
                result = TOKint64v;
          else
                result = TOKint32v;
          break;

      case FLAGS_unsigned:
      case FLAGS_decimal | FLAGS_unsigned:
          /* First that fits: uint, ulong
           */
          if (n & 0xFFFFFFFF00000000LL)
                result = TOKuns64v;
          else
                result = TOKuns32v;
          break;

      case FLAGS_decimal | FLAGS_long:
          if (n & 0x8000000000000000LL)
          {     error("signed integer overflow");
                result = TOKuns64v;
          }
          else
                result = TOKint64v;
          break;

      case FLAGS_long:
          if (n & 0x8000000000000000LL)
                result = TOKuns64v;
          else
                result = TOKint64v;
          break;

      case FLAGS_unsigned | FLAGS_long:
      case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
          result = TOKuns64v;
          break;

      default:
          #ifdef DEBUG
            printf("%x\n",flags);
          #endif
          assert(0);
    }
    t->uns64value = n;
    return result;
}

/**************************************
 * Read in characters, converting them to real.
 * Bugs:
 *    Exponent overflow not detected.
 *    Too much requested precision is not detected.
 */

TOK Lexer::inreal(Token *t)
#ifdef __DMC__
__in
{
    assert(*p == '.' || isdigit(*p));
}
__out (result)
{
    switch (result)
    {
      case TOKfloat32v:
      case TOKfloat64v:
      case TOKfloat80v:
      case TOKimaginary32v:
      case TOKimaginary64v:
      case TOKimaginary80v:
          break;

      default:
          assert(0);
    }
}
__body
#endif /* __DMC__ */
{   int dblstate;
    unsigned c;
    char hex;                 // is this a hexadecimal-floating-constant?
    TOK result;

    //printf("Lexer::inreal()\n");
    stringbuffer.reset();
    dblstate = 0;
    hex = 0;
Lnext:
    while (1)
    {
      // Get next char from input
      c = *p++;
      //printf("dblstate = %d, c = '%c'\n", dblstate, c);
      while (1)
      {
          switch (dblstate)
          {
            case 0:                 // opening state
                if (c == '0')
                  dblstate = 9;
                else if (c == '.')
                  dblstate = 3;
                else
                  dblstate = 1;
                break;

            case 9:
                dblstate = 1;
                if (c == 'X' || c == 'x')
                { hex++;
                  break;
                }
            case 1:                 // digits to left of .
            case 3:                 // digits to right of .
            case 7:                 // continuing exponent digits
                if (!isdigit(c) && !(hex && isxdigit(c)))
                {
                  if (c == '_')
                      goto Lnext;   // ignore embedded '_'
                  dblstate++;
                  continue;
                }
                break;

            case 2:                 // no more digits to left of .
                if (c == '.')
                {   dblstate++;
                  break;
                }
            case 4:                 // no more digits to right of .
                if ((c == 'E' || c == 'e') ||
                  hex && (c == 'P' || c == 'p'))
                {   dblstate = 5;
                  hex = 0;    // exponent is always decimal
                  break;
                }
                if (hex)
                  error("binary-exponent-part required");
                goto done;

            case 5:                 // looking immediately to right of E
                dblstate++;
                if (c == '-' || c == '+')
                  break;
            case 6:                 // 1st exponent digit expected
                if (!isdigit(c))
                  error("exponent expected");
                dblstate++;
                break;

            case 8:                 // past end of exponent digits
                goto done;
          }
          break;
      }
      stringbuffer.writeByte(c);
    }
done:
    p--;

    stringbuffer.writeByte(0);

#if _WIN32 && __DMC__
    char *save = __locale_decpoint;
    __locale_decpoint = ".";
#endif
#ifdef IN_GCC
    t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
#else
    t->float80value = strtold((char *)stringbuffer.data, NULL);
#endif
    errno = 0;
    float strtofres;
    double strtodres;
    switch (*p)
    {
      case 'F':
      case 'f':
#ifdef IN_GCC
          real_t::parse((char *)stringbuffer.data, real_t::Float);
#else
          strtofres = strtof((char *)stringbuffer.data, NULL);
          // LDC change: don't error on gradual underflow
          if (errno == ERANGE && 
                strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF)
            errno = 0;
#endif
          result = TOKfloat32v;
          p++;
          break;

      default:
#ifdef IN_GCC
          real_t::parse((char *)stringbuffer.data, real_t::Double);
#else     
          strtodres = strtod((char *)stringbuffer.data, NULL);
          // LDC change: don't error on gradual underflow
          if (errno == ERANGE && 
                strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL)
            errno = 0;
#endif
          result = TOKfloat64v;
          break;

      case 'l':
          if (!global.params.useDeprecated)
            error("'l' suffix is deprecated, use 'L' instead");
      case 'L':
          result = TOKfloat80v;
          p++;
          break;
    }
    if (*p == 'i' || *p == 'I')
    {
      if (!global.params.useDeprecated && *p == 'I')
          error("'I' suffix is deprecated, use 'i' instead");
      p++;
      switch (result)
      {
          case TOKfloat32v:
            result = TOKimaginary32v;
            break;
          case TOKfloat64v:
            result = TOKimaginary64v;
            break;
          case TOKfloat80v:
            result = TOKimaginary80v;
            break;
      }
    }
#if _WIN32 && __DMC__
    __locale_decpoint = save;
#endif
    if (errno == ERANGE)
      error("number is not representable");
    return result;
}

/*********************************************
 * Do pragma.
 * Currently, the only pragma supported is:
 *    #line linnum [filespec]
 */

void Lexer::pragma()
{
    Token tok;
    int linnum;
    char *filespec = NULL;
    Loc loc = this->loc;

    scan(&tok);
    if (tok.value != TOKidentifier || tok.ident != Id::line)
      goto Lerr;

    scan(&tok);
    if (tok.value == TOKint32v || tok.value == TOKint64v)
      linnum = tok.uns64value - 1;
    else
      goto Lerr;

    while (1)
    {
      switch (*p)
      {
          case 0:
          case 0x1A:
          case '\n':
          Lnewline:
            this->loc.linnum = linnum;
            if (filespec)
                this->loc.filename = filespec;
            return;

          case '\r':
            p++;
            if (*p != '\n')
            {   p--;
                goto Lnewline;
            }
            continue;

          case ' ':
          case '\t':
          case '\v':
          case '\f':
            p++;
            continue;               // skip white space

          case '_':
            if (mod && memcmp(p, "__FILE__", 8) == 0)
            {
                p += 8;
                filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
            }
            continue;

          case '"':
            if (filespec)
                goto Lerr;
            stringbuffer.reset();
            p++;
            while (1)
            {   unsigned c;

                c = *p;
                switch (c)
                {
                  case '\n':
                  case '\r':
                  case 0:
                  case 0x1A:
                      goto Lerr;

                  case '"':
                      stringbuffer.writeByte(0);
                      filespec = mem.strdup((char *)stringbuffer.data);
                      p++;
                      break;

                  default:
                      if (c & 0x80)
                      {   unsigned u = decodeUTF();
                        if (u == PS || u == LS)
                            goto Lerr;
                      }
                      stringbuffer.writeByte(c);
                      p++;
                      continue;
                }
                break;
            }
            continue;

          default:
            if (*p & 0x80)
            {   unsigned u = decodeUTF();
                if (u == PS || u == LS)
                  goto Lnewline;
            }
            goto Lerr;
      }
    }

Lerr:
    error(loc, "#line integer [\"filespec\"]\\n expected");
}


/********************************************
 * Decode UTF character.
 * Issue error messages for invalid sequences.
 * Return decoded character, advance p to last character in UTF sequence.
 */

unsigned Lexer::decodeUTF()
{
    dchar_t u;
    unsigned char c;
    unsigned char *s = p;
    size_t len;
    size_t idx;
    const char *msg;

    c = *s;
    assert(c & 0x80);

    // Check length of remaining string up to 6 UTF-8 characters
    for (len = 1; len < 6 && s[len]; len++)
      ;

    idx = 0;
    msg = utf_decodeChar(s, len, &idx, &u);
    p += idx - 1;
    if (msg)
    {
      error("%s", msg);
    }
    return u;
}


/***************************************************
 * Parse doc comment embedded between t->ptr and p.
 * Remove trailing blanks and tabs from lines.
 * Replace all newlines with \n.
 * Remove leading comment character from each line.
 * Decide if it's a lineComment or a blockComment.
 * Append to previous one for this token.
 */

void Lexer::getDocComment(Token *t, unsigned lineComment)
{
    /* ct tells us which kind of comment it is: '/', '*', or '+'
     */
    unsigned char ct = t->ptr[2];

    /* Start of comment text skips over / * *, / + +, or / / /
     */
    unsigned char *q = t->ptr + 3;  // start of comment text

    unsigned char *qend = p;
    if (ct == '*' || ct == '+')
      qend -= 2;

    /* Scan over initial row of ****'s or ++++'s or ////'s
     */
    for (; q < qend; q++)
    {
      if (*q != ct)
          break;
    }

    /* Remove trailing row of ****'s or ++++'s
     */
    if (ct != '/')
    {
      for (; q < qend; qend--)
      {
          if (qend[-1] != ct)
            break;
      }
    }

    /* Comment is now [q .. qend].
     * Canonicalize it into buf[].
     */
    OutBuffer buf;
    int linestart = 0;

    for (; q < qend; q++)
    {
      unsigned char c = *q;

      switch (c)
      {
          case '*':
          case '+':
            if (linestart && c == ct)
            {   linestart = 0;
                /* Trim preceding whitespace up to preceding \n
                 */
                while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
                  buf.offset--;
                continue;
            }
            break;

          case ' ':
          case '\t':
            break;

          case '\r':
            if (q[1] == '\n')
                continue;           // skip the \r
            goto Lnewline;

          default:
            if (c == 226)
            {
                // If LS or PS
                if (q[1] == 128 &&
                  (q[2] == 168 || q[2] == 169))
                {
                  q += 2;
                  goto Lnewline;
                }
            }
            linestart = 0;
            break;

          Lnewline:
            c = '\n';         // replace all newlines with \n
          case '\n':
            linestart = 1;

            /* Trim trailing whitespace
             */
            while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
                buf.offset--;

            break;
      }
      buf.writeByte(c);
    }

    // Always end with a newline
    if (!buf.offset || buf.data[buf.offset - 1] != '\n')
      buf.writeByte('\n');

    buf.writeByte(0);

    // It's a line comment if the start of the doc comment comes
    // after other non-whitespace on the same line.
    unsigned char** dc = (lineComment && anyToken)
                   ? &t->lineComment
                   : &t->blockComment;

    // Combine with previous doc comment, if any
    if (*dc)
      *dc = combineComments(*dc, (unsigned char *)buf.data);
    else
      *dc = (unsigned char *)buf.extractData();
}

/********************************************
 * Combine two document comments into one,
 * separated by a newline.
 */

unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
{
    //printf("Lexer::combineComments('%s', '%s')\n", c1, c2);

    unsigned char *c = c2;

    if (c1)
    { c = c1;
      if (c2)
      {   size_t len1 = strlen((char *)c1);
          size_t len2 = strlen((char *)c2);

          c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
          memcpy(c, c1, len1);
          if (len1 && c1[len1 - 1] != '\n')
          { c[len1] = '\n';
            len1++;
          }
          memcpy(c + len1, c2, len2);
          c[len1 + len2] = 0;
      }
    }
    return c;
}

/********************************************
 * Create an identifier in the string table.
 */

Identifier *Lexer::idPool(const char *s)
{
    size_t len = strlen(s);
    StringValue *sv = stringtable.update(s, len);
    Identifier *id = (Identifier *) sv->ptrvalue;
    if (!id)
    {
      id = new Identifier(sv->lstring.string, TOKidentifier);
      sv->ptrvalue = id;
    }
    return id;
}

/*********************************************
 * Create a unique identifier using the prefix s.
 */

Identifier *Lexer::uniqueId(const char *s, int num)
{   char buffer[32];
    size_t slen = strlen(s);

    assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
    sprintf(buffer, "%s%d", s, num);
    return idPool(buffer);
}

Identifier *Lexer::uniqueId(const char *s)
{
    static int num;
    return uniqueId(s, ++num);
}

/****************************************
 */

struct Keyword
{   const char *name;
    enum TOK value;
};

static Keyword keywords[] =
{
//    {     "",         TOK   },

    { "this",           TOKthis           },
    { "super",    TOKsuper    },
    { "assert",   TOKassert   },
    { "null",           TOKnull           },
    { "true",           TOKtrue           },
    { "false",    TOKfalse    },
    { "cast",           TOKcast           },
    { "new",            TOKnew            },
    { "delete",   TOKdelete   },
    { "throw",    TOKthrow    },
    { "module",   TOKmodule   },
    { "pragma",   TOKpragma   },
    { "typeof",   TOKtypeof   },
    { "typeid",   TOKtypeid   },

    { "template", TOKtemplate },

    { "void",           TOKvoid           },
    { "byte",           TOKint8           },
    { "ubyte",    TOKuns8           },
    { "short",    TOKint16    },
    { "ushort",   TOKuns16    },
    { "int",            TOKint32    },
    { "uint",           TOKuns32    },
    { "long",           TOKint64    },
    { "ulong",    TOKuns64    },
    { "cent",           TOKcent,    },
    { "ucent",    TOKucent,   },
    { "float",    TOKfloat32  },
    { "double",   TOKfloat64  },
    { "real",           TOKfloat80  },

    { "bool",           TOKbool           },
    { "char",           TOKchar           },
    { "wchar",    TOKwchar    },
    { "dchar",    TOKdchar    },

    { "ifloat",   TOKimaginary32    },
    { "idouble",  TOKimaginary64    },
    { "ireal",    TOKimaginary80    },

    { "cfloat",   TOKcomplex32      },
    { "cdouble",  TOKcomplex64      },
    { "creal",    TOKcomplex80      },

    { "delegate", TOKdelegate },
    { "function", TOKfunction },

    { "is",       TOKis       },
    { "if",       TOKif       },
    { "else",           TOKelse           },
    { "while",    TOKwhile    },
    { "for",            TOKfor            },
    { "do",       TOKdo       },
    { "switch",   TOKswitch   },
    { "case",           TOKcase           },
    { "default",  TOKdefault  },
    { "break",    TOKbreak    },
    { "continue", TOKcontinue },
    { "synchronized",   TOKsynchronized   },
    { "return",   TOKreturn   },
    { "goto",           TOKgoto           },
    { "try",            TOKtry            },
    { "catch",    TOKcatch    },
    { "finally",  TOKfinally  },
    { "with",           TOKwith           },
    { "asm",            TOKasm            },
    { "foreach",  TOKforeach  },
    { "foreach_reverse",      TOKforeach_reverse      },
    { "scope",    TOKscope    },

    { "struct",   TOKstruct   },
    { "class",    TOKclass    },
    { "interface",      TOKinterface      },
    { "union",    TOKunion    },
    { "enum",           TOKenum           },
    { "import",   TOKimport   },
    { "mixin",    TOKmixin    },
    { "static",   TOKstatic   },
    { "final",    TOKfinal    },
    { "const",    TOKconst    },
    { "typedef",  TOKtypedef  },
    { "alias",    TOKalias    },
    { "override", TOKoverride },
    { "abstract", TOKabstract },
    { "volatile", TOKvolatile },
    { "debug",    TOKdebug    },
    { "deprecated",     TOKdeprecated     },
    { "in",       TOKin       },
    { "out",            TOKout            },
    { "inout",    TOKinout    },
    { "lazy",           TOKlazy           },
    { "auto",           TOKauto           },

    { "align",    TOKalign    },
    { "extern",   TOKextern   },
    { "private",  TOKprivate  },
    { "package",  TOKpackage  },
    { "protected",      TOKprotected      },
    { "public",   TOKpublic   },
    { "export",   TOKexport   },

    { "body",           TOKbody           },
    { "invariant",      TOKinvariant      },
    { "unittest", TOKunittest },
    { "version",  TOKversion  },
    //{     "manifest", TOKmanifest },

    // Added after 1.0
    { "ref",            TOKref            },
    { "macro",    TOKmacro    },
#if DMDV2
    { "pure",           TOKpure           },
    { "nothrow",  TOKnothrow  },
    { "__thread", TOKtls            },
    { "__gshared",      TOKgshared  },
    { "__traits", TOKtraits   },
    { "__overloadset", TOKoverloadset     },
    { "__FILE__", TOKfile           },
    { "__LINE__", TOKline           },
    { "shared",   TOKshared   },
    { "immutable",      TOKimmutable      },
#endif
};

int Token::isKeyword()
{
    for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
    {
      if (keywords[u].value == value)
          return 1;
    }
    return 0;
}

void Lexer::initKeywords()
{   StringValue *sv;
    unsigned u;
    enum TOK v;
    unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);

    if (global.params.Dversion == 1)
      nkeywords -= 2;

    cmtable_init();

    for (u = 0; u < nkeywords; u++)
    { const char *s;

      //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
      s = keywords[u].name;
      v = keywords[u].value;
      sv = stringtable.insert(s, strlen(s));
      sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);

      //printf("tochars[%d] = '%s'\n",v, s);
      Token::tochars[v] = s;
    }

    Token::tochars[TOKeof]          = "EOF";
    Token::tochars[TOKlcurly]       = "{";
    Token::tochars[TOKrcurly]       = "}";
    Token::tochars[TOKlparen]       = "(";
    Token::tochars[TOKrparen]       = ")";
    Token::tochars[TOKlbracket]           = "[";
    Token::tochars[TOKrbracket]           = "]";
    Token::tochars[TOKsemicolon]    = ";";
    Token::tochars[TOKcolon]        = ":";
    Token::tochars[TOKcomma]        = ",";
    Token::tochars[TOKdot]          = ".";
    Token::tochars[TOKxor]          = "^";
    Token::tochars[TOKxorass]       = "^=";
    Token::tochars[TOKassign]       = "=";
    Token::tochars[TOKconstruct]    = "=";
#if DMDV2
    Token::tochars[TOKblit]         = "=";
#endif
    Token::tochars[TOKlt]           = "<";
    Token::tochars[TOKgt]           = ">";
    Token::tochars[TOKle]           = "<=";
    Token::tochars[TOKge]           = ">=";
    Token::tochars[TOKequal]        = "==";
    Token::tochars[TOKnotequal]           = "!=";
    Token::tochars[TOKnotidentity]  = "!is";
    Token::tochars[TOKtobool]       = "!!";

    Token::tochars[TOKunord]        = "!<>=";
    Token::tochars[TOKue]           = "!<>";
    Token::tochars[TOKlg]           = "<>";
    Token::tochars[TOKleg]          = "<>=";
    Token::tochars[TOKule]          = "!>";
    Token::tochars[TOKul]           = "!>=";
    Token::tochars[TOKuge]          = "!<";
    Token::tochars[TOKug]           = "!<=";

    Token::tochars[TOKnot]          = "!";
    Token::tochars[TOKtobool]       = "!!";
    Token::tochars[TOKshl]          = "<<";
    Token::tochars[TOKshr]          = ">>";
    Token::tochars[TOKushr]         = ">>>";
    Token::tochars[TOKadd]          = "+";
    Token::tochars[TOKmin]          = "-";
    Token::tochars[TOKmul]          = "*";
    Token::tochars[TOKdiv]          = "/";
    Token::tochars[TOKmod]          = "%";
    Token::tochars[TOKslice]        = "..";
    Token::tochars[TOKdotdotdot]    = "...";
    Token::tochars[TOKand]          = "&";
    Token::tochars[TOKandand]       = "&&";
    Token::tochars[TOKor]           = "|";
    Token::tochars[TOKoror]         = "||";
    Token::tochars[TOKarray]        = "[]";
    Token::tochars[TOKindex]        = "[i]";
    Token::tochars[TOKaddress]            = "&";
    Token::tochars[TOKstar]         = "*";
    Token::tochars[TOKtilde]        = "~";
    Token::tochars[TOKdollar]       = "$";
    Token::tochars[TOKcast]         = "cast";
    Token::tochars[TOKplusplus]           = "++";
    Token::tochars[TOKminusminus]   = "--";
    Token::tochars[TOKtype]         = "type";
    Token::tochars[TOKquestion]           = "?";
    Token::tochars[TOKneg]          = "-";
    Token::tochars[TOKuadd]         = "+";
    Token::tochars[TOKvar]          = "var";
    Token::tochars[TOKaddass]       = "+=";
    Token::tochars[TOKminass]       = "-=";
    Token::tochars[TOKmulass]       = "*=";
    Token::tochars[TOKdivass]       = "/=";
    Token::tochars[TOKmodass]       = "%=";
    Token::tochars[TOKshlass]       = "<<=";
    Token::tochars[TOKshrass]       = ">>=";
    Token::tochars[TOKushrass]            = ">>>=";
    Token::tochars[TOKandass]       = "&=";
    Token::tochars[TOKorass]        = "|=";
    Token::tochars[TOKcatass]       = "~=";
    Token::tochars[TOKcat]          = "~";
    Token::tochars[TOKcall]         = "call";
    Token::tochars[TOKidentity]           = "is";
    Token::tochars[TOKnotidentity]  = "!is";

    Token::tochars[TOKorass]        = "|=";
    Token::tochars[TOKidentifier]   = "identifier";
    Token::tochars[TOKat]           = "@";

     // For debugging
    Token::tochars[TOKdotexp]       = "dotexp";
    Token::tochars[TOKdotti]        = "dotti";
    Token::tochars[TOKdotvar]       = "dotvar";
    Token::tochars[TOKdottype]            = "dottype";
    Token::tochars[TOKsymoff]       = "symoff";
    Token::tochars[TOKarraylength]  = "arraylength";
    Token::tochars[TOKarrayliteral] = "arrayliteral";
    Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
    Token::tochars[TOKstructliteral]      = "structliteral";
    Token::tochars[TOKstring]       = "string";
    Token::tochars[TOKdsymbol]            = "symbol";
    Token::tochars[TOKtuple]        = "tuple";
    Token::tochars[TOKdeclaration]  = "declaration";
    Token::tochars[TOKdottd]        = "dottd";
    Token::tochars[TOKon_scope_exit]      = "scope(exit)";
    Token::tochars[TOKon_scope_success]   = "scope(success)";
    Token::tochars[TOKon_scope_failure]   = "scope(failure)";
}

Generated by  Doxygen 1.6.0   Back to index