1206 lines
27 KiB
Go
1206 lines
27 KiB
Go
package parser
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"math/big"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf16"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/unicode/rangetable"
|
|
|
|
"github.com/dop251/goja/file"
|
|
"github.com/dop251/goja/token"
|
|
"github.com/dop251/goja/unistring"
|
|
)
|
|
|
|
var (
|
|
unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
|
|
unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
|
|
unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
|
|
)
|
|
|
|
func isDecimalDigit(chr rune) bool {
|
|
return '0' <= chr && chr <= '9'
|
|
}
|
|
|
|
func IsIdentifier(s string) bool {
|
|
if s == "" {
|
|
return false
|
|
}
|
|
r, size := utf8.DecodeRuneInString(s)
|
|
if !isIdentifierStart(r) {
|
|
return false
|
|
}
|
|
for _, r := range s[size:] {
|
|
if !isIdentifierPart(r) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func digitValue(chr rune) int {
|
|
switch {
|
|
case '0' <= chr && chr <= '9':
|
|
return int(chr - '0')
|
|
case 'a' <= chr && chr <= 'f':
|
|
return int(chr - 'a' + 10)
|
|
case 'A' <= chr && chr <= 'F':
|
|
return int(chr - 'A' + 10)
|
|
}
|
|
return 16 // Larger than any legal digit value
|
|
}
|
|
|
|
func isDigit(chr rune, base int) bool {
|
|
return digitValue(chr) < base
|
|
}
|
|
|
|
func isIdStartUnicode(r rune) bool {
|
|
return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
|
|
}
|
|
|
|
func isIdPartUnicode(r rune) bool {
|
|
return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
|
|
}
|
|
|
|
func isIdentifierStart(chr rune) bool {
|
|
return chr == '$' || chr == '_' || chr == '\\' ||
|
|
'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
|
|
chr >= utf8.RuneSelf && isIdStartUnicode(chr)
|
|
}
|
|
|
|
func isIdentifierPart(chr rune) bool {
|
|
return chr == '$' || chr == '_' || chr == '\\' ||
|
|
'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
|
|
'0' <= chr && chr <= '9' ||
|
|
chr >= utf8.RuneSelf && isIdPartUnicode(chr)
|
|
}
|
|
|
|
func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
|
|
offset := self.chrOffset
|
|
hasEscape := false
|
|
isUnicode := false
|
|
length := 0
|
|
for isIdentifierPart(self.chr) {
|
|
r := self.chr
|
|
length++
|
|
if r == '\\' {
|
|
hasEscape = true
|
|
distance := self.chrOffset - offset
|
|
self.read()
|
|
if self.chr != 'u' {
|
|
return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
|
|
}
|
|
var value rune
|
|
if self._peek() == '{' {
|
|
self.read()
|
|
value = -1
|
|
for value <= utf8.MaxRune {
|
|
self.read()
|
|
if self.chr == '}' {
|
|
break
|
|
}
|
|
decimal, ok := hex2decimal(byte(self.chr))
|
|
if !ok {
|
|
return "", "", false, "Invalid Unicode escape sequence"
|
|
}
|
|
if value == -1 {
|
|
value = decimal
|
|
} else {
|
|
value = value<<4 | decimal
|
|
}
|
|
}
|
|
if value == -1 {
|
|
return "", "", false, "Invalid Unicode escape sequence"
|
|
}
|
|
} else {
|
|
for j := 0; j < 4; j++ {
|
|
self.read()
|
|
decimal, ok := hex2decimal(byte(self.chr))
|
|
if !ok {
|
|
return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
|
|
}
|
|
value = value<<4 | decimal
|
|
}
|
|
}
|
|
if value == '\\' {
|
|
return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
|
|
} else if distance == 0 {
|
|
if !isIdentifierStart(value) {
|
|
return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
|
|
}
|
|
} else if distance > 0 {
|
|
if !isIdentifierPart(value) {
|
|
return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
|
|
}
|
|
}
|
|
r = value
|
|
}
|
|
if r >= utf8.RuneSelf {
|
|
isUnicode = true
|
|
if r > 0xFFFF {
|
|
length++
|
|
}
|
|
}
|
|
self.read()
|
|
}
|
|
|
|
literal := self.str[offset:self.chrOffset]
|
|
var parsed unistring.String
|
|
if hasEscape || isUnicode {
|
|
var err string
|
|
// TODO strict
|
|
parsed, err = parseStringLiteral(literal, length, isUnicode, false)
|
|
if err != "" {
|
|
return "", "", false, err
|
|
}
|
|
} else {
|
|
parsed = unistring.String(literal)
|
|
}
|
|
|
|
return literal, parsed, hasEscape, ""
|
|
}
|
|
|
|
// 7.2
|
|
func isLineWhiteSpace(chr rune) bool {
|
|
switch chr {
|
|
case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
|
|
return true
|
|
case '\u000a', '\u000d', '\u2028', '\u2029':
|
|
return false
|
|
case '\u0085':
|
|
return false
|
|
}
|
|
return unicode.IsSpace(chr)
|
|
}
|
|
|
|
// 7.3
|
|
func isLineTerminator(chr rune) bool {
|
|
switch chr {
|
|
case '\u000a', '\u000d', '\u2028', '\u2029':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
type parserState struct {
|
|
idx file.Idx
|
|
tok token.Token
|
|
literal string
|
|
parsedLiteral unistring.String
|
|
implicitSemicolon, insertSemicolon bool
|
|
chr rune
|
|
chrOffset, offset int
|
|
errorCount int
|
|
}
|
|
|
|
func (self *_parser) mark(state *parserState) *parserState {
|
|
if state == nil {
|
|
state = &parserState{}
|
|
}
|
|
state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
|
|
self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
|
|
|
|
state.errorCount = len(self.errors)
|
|
return state
|
|
}
|
|
|
|
func (self *_parser) restore(state *parserState) {
|
|
self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
|
|
state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
|
|
self.errors = self.errors[:state.errorCount]
|
|
}
|
|
|
|
func (self *_parser) peek() token.Token {
|
|
implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
|
|
tok, _, _, _ := self.scan()
|
|
self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
|
|
return tok
|
|
}
|
|
|
|
func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
|
|
|
|
self.implicitSemicolon = false
|
|
|
|
for {
|
|
self.skipWhiteSpace()
|
|
|
|
idx = self.idxOf(self.chrOffset)
|
|
insertSemicolon := false
|
|
|
|
switch chr := self.chr; {
|
|
case isIdentifierStart(chr):
|
|
var err string
|
|
var hasEscape bool
|
|
literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
|
|
if err != "" {
|
|
tkn = token.ILLEGAL
|
|
break
|
|
}
|
|
if len(parsedLiteral) > 1 {
|
|
// Keywords are longer than 1 character, avoid lookup otherwise
|
|
var strict bool
|
|
tkn, strict = token.IsKeyword(string(parsedLiteral))
|
|
if hasEscape {
|
|
self.insertSemicolon = true
|
|
if tkn == 0 || self.isBindingId(tkn) {
|
|
tkn = token.IDENTIFIER
|
|
} else {
|
|
tkn = token.ESCAPED_RESERVED_WORD
|
|
}
|
|
return
|
|
}
|
|
switch tkn {
|
|
case 0: // Not a keyword
|
|
// no-op
|
|
case token.KEYWORD:
|
|
if strict {
|
|
// TODO If strict and in strict mode, then this is not a break
|
|
break
|
|
}
|
|
return
|
|
|
|
case
|
|
token.BOOLEAN,
|
|
token.NULL,
|
|
token.THIS,
|
|
token.BREAK,
|
|
token.THROW, // A newline after a throw is not allowed, but we need to detect it
|
|
token.YIELD,
|
|
token.RETURN,
|
|
token.CONTINUE,
|
|
token.DEBUGGER:
|
|
self.insertSemicolon = true
|
|
return
|
|
|
|
case token.ASYNC:
|
|
// async only has special meaning if not followed by a LineTerminator
|
|
if self.skipWhiteSpaceCheckLineTerminator() {
|
|
self.insertSemicolon = true
|
|
tkn = token.IDENTIFIER
|
|
}
|
|
return
|
|
default:
|
|
return
|
|
|
|
}
|
|
}
|
|
self.insertSemicolon = true
|
|
tkn = token.IDENTIFIER
|
|
return
|
|
case '0' <= chr && chr <= '9':
|
|
self.insertSemicolon = true
|
|
tkn, literal = self.scanNumericLiteral(false)
|
|
return
|
|
default:
|
|
self.read()
|
|
switch chr {
|
|
case -1:
|
|
if self.insertSemicolon {
|
|
self.insertSemicolon = false
|
|
self.implicitSemicolon = true
|
|
}
|
|
tkn = token.EOF
|
|
case '\r', '\n', '\u2028', '\u2029':
|
|
self.insertSemicolon = false
|
|
self.implicitSemicolon = true
|
|
continue
|
|
case ':':
|
|
tkn = token.COLON
|
|
case '.':
|
|
if digitValue(self.chr) < 10 {
|
|
insertSemicolon = true
|
|
tkn, literal = self.scanNumericLiteral(true)
|
|
} else {
|
|
if self.chr == '.' {
|
|
self.read()
|
|
if self.chr == '.' {
|
|
self.read()
|
|
tkn = token.ELLIPSIS
|
|
} else {
|
|
tkn = token.ILLEGAL
|
|
}
|
|
} else {
|
|
tkn = token.PERIOD
|
|
}
|
|
}
|
|
case ',':
|
|
tkn = token.COMMA
|
|
case ';':
|
|
tkn = token.SEMICOLON
|
|
case '(':
|
|
tkn = token.LEFT_PARENTHESIS
|
|
case ')':
|
|
tkn = token.RIGHT_PARENTHESIS
|
|
insertSemicolon = true
|
|
case '[':
|
|
tkn = token.LEFT_BRACKET
|
|
case ']':
|
|
tkn = token.RIGHT_BRACKET
|
|
insertSemicolon = true
|
|
case '{':
|
|
tkn = token.LEFT_BRACE
|
|
case '}':
|
|
tkn = token.RIGHT_BRACE
|
|
insertSemicolon = true
|
|
case '+':
|
|
tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
|
|
if tkn == token.INCREMENT {
|
|
insertSemicolon = true
|
|
}
|
|
case '-':
|
|
tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
|
|
if tkn == token.DECREMENT {
|
|
insertSemicolon = true
|
|
}
|
|
case '*':
|
|
if self.chr == '*' {
|
|
self.read()
|
|
tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN)
|
|
} else {
|
|
tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
|
|
}
|
|
case '/':
|
|
if self.chr == '/' {
|
|
self.skipSingleLineComment()
|
|
continue
|
|
} else if self.chr == '*' {
|
|
if self.skipMultiLineComment() {
|
|
self.insertSemicolon = false
|
|
self.implicitSemicolon = true
|
|
}
|
|
continue
|
|
} else {
|
|
// Could be division, could be RegExp literal
|
|
tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
|
|
insertSemicolon = true
|
|
}
|
|
case '%':
|
|
tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
|
|
case '^':
|
|
tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
|
|
case '<':
|
|
tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
|
|
case '>':
|
|
tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
|
|
case '=':
|
|
if self.chr == '>' {
|
|
self.read()
|
|
if self.implicitSemicolon {
|
|
tkn = token.ILLEGAL
|
|
} else {
|
|
tkn = token.ARROW
|
|
}
|
|
} else {
|
|
tkn = self.switch2(token.ASSIGN, token.EQUAL)
|
|
if tkn == token.EQUAL && self.chr == '=' {
|
|
self.read()
|
|
tkn = token.STRICT_EQUAL
|
|
}
|
|
}
|
|
case '!':
|
|
tkn = self.switch2(token.NOT, token.NOT_EQUAL)
|
|
if tkn == token.NOT_EQUAL && self.chr == '=' {
|
|
self.read()
|
|
tkn = token.STRICT_NOT_EQUAL
|
|
}
|
|
case '&':
|
|
tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
|
|
case '|':
|
|
tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
|
|
case '~':
|
|
tkn = token.BITWISE_NOT
|
|
case '?':
|
|
if self.chr == '.' && !isDecimalDigit(self._peek()) {
|
|
self.read()
|
|
tkn = token.QUESTION_DOT
|
|
} else if self.chr == '?' {
|
|
self.read()
|
|
tkn = token.COALESCE
|
|
} else {
|
|
tkn = token.QUESTION_MARK
|
|
}
|
|
case '"', '\'':
|
|
insertSemicolon = true
|
|
tkn = token.STRING
|
|
var err string
|
|
literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
|
|
if err != "" {
|
|
tkn = token.ILLEGAL
|
|
}
|
|
case '`':
|
|
tkn = token.BACKTICK
|
|
case '#':
|
|
if self.chrOffset == 1 && self.chr == '!' {
|
|
self.skipSingleLineComment()
|
|
continue
|
|
}
|
|
|
|
var err string
|
|
literal, parsedLiteral, _, err = self.scanIdentifier()
|
|
if err != "" || literal == "" {
|
|
tkn = token.ILLEGAL
|
|
break
|
|
}
|
|
self.insertSemicolon = true
|
|
tkn = token.PRIVATE_IDENTIFIER
|
|
return
|
|
default:
|
|
self.errorUnexpected(idx, chr)
|
|
tkn = token.ILLEGAL
|
|
}
|
|
}
|
|
self.insertSemicolon = insertSemicolon
|
|
return
|
|
}
|
|
}
|
|
|
|
func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn1
|
|
}
|
|
return tkn0
|
|
}
|
|
|
|
func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn1
|
|
}
|
|
if self.chr == chr2 {
|
|
self.read()
|
|
return tkn2
|
|
}
|
|
return tkn0
|
|
}
|
|
|
|
func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn1
|
|
}
|
|
if self.chr == chr2 {
|
|
self.read()
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn3
|
|
}
|
|
return tkn2
|
|
}
|
|
return tkn0
|
|
}
|
|
|
|
func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn1
|
|
}
|
|
if self.chr == chr2 {
|
|
self.read()
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn3
|
|
}
|
|
if self.chr == chr3 {
|
|
self.read()
|
|
if self.chr == '=' {
|
|
self.read()
|
|
return tkn5
|
|
}
|
|
return tkn4
|
|
}
|
|
return tkn2
|
|
}
|
|
return tkn0
|
|
}
|
|
|
|
func (self *_parser) _peek() rune {
|
|
if self.offset < self.length {
|
|
return rune(self.str[self.offset])
|
|
}
|
|
return -1
|
|
}
|
|
|
|
func (self *_parser) read() {
|
|
if self.offset < self.length {
|
|
self.chrOffset = self.offset
|
|
chr, width := rune(self.str[self.offset]), 1
|
|
if chr >= utf8.RuneSelf { // !ASCII
|
|
chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
|
|
if chr == utf8.RuneError && width == 1 {
|
|
self.error(self.chrOffset, "Invalid UTF-8 character")
|
|
}
|
|
}
|
|
self.offset += width
|
|
self.chr = chr
|
|
} else {
|
|
self.chrOffset = self.length
|
|
self.chr = -1 // EOF
|
|
}
|
|
}
|
|
|
|
func (self *_parser) skipSingleLineComment() {
|
|
for self.chr != -1 {
|
|
self.read()
|
|
if isLineTerminator(self.chr) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) {
|
|
self.read()
|
|
for self.chr >= 0 {
|
|
chr := self.chr
|
|
if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' {
|
|
hasLineTerminator = true
|
|
break
|
|
}
|
|
self.read()
|
|
if chr == '*' && self.chr == '/' {
|
|
self.read()
|
|
return
|
|
}
|
|
}
|
|
for self.chr >= 0 {
|
|
chr := self.chr
|
|
self.read()
|
|
if chr == '*' && self.chr == '/' {
|
|
self.read()
|
|
return
|
|
}
|
|
}
|
|
|
|
self.errorUnexpected(0, self.chr)
|
|
return
|
|
}
|
|
|
|
func (self *_parser) skipWhiteSpaceCheckLineTerminator() bool {
|
|
for {
|
|
switch self.chr {
|
|
case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
|
|
self.read()
|
|
continue
|
|
case '\r':
|
|
if self._peek() == '\n' {
|
|
self.read()
|
|
}
|
|
fallthrough
|
|
case '\u2028', '\u2029', '\n':
|
|
return true
|
|
}
|
|
if self.chr >= utf8.RuneSelf {
|
|
if unicode.IsSpace(self.chr) {
|
|
self.read()
|
|
continue
|
|
}
|
|
}
|
|
break
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (self *_parser) skipWhiteSpace() {
|
|
for {
|
|
switch self.chr {
|
|
case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
|
|
self.read()
|
|
continue
|
|
case '\r':
|
|
if self._peek() == '\n' {
|
|
self.read()
|
|
}
|
|
fallthrough
|
|
case '\u2028', '\u2029', '\n':
|
|
if self.insertSemicolon {
|
|
return
|
|
}
|
|
self.read()
|
|
continue
|
|
}
|
|
if self.chr >= utf8.RuneSelf {
|
|
if unicode.IsSpace(self.chr) {
|
|
self.read()
|
|
continue
|
|
}
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
func (self *_parser) scanMantissa(base int, allowSeparator bool) {
|
|
for digitValue(self.chr) < base || (allowSeparator && self.chr == '_') {
|
|
afterUnderscore := self.chr == '_'
|
|
self.read()
|
|
if afterUnderscore && !isDigit(self.chr, base) {
|
|
self.error(self.chrOffset, "Only one underscore is allowed as numeric separator")
|
|
}
|
|
}
|
|
}
|
|
|
|
func (self *_parser) scanEscape(quote rune) (int, bool) {
|
|
|
|
var length, base uint32
|
|
chr := self.chr
|
|
switch chr {
|
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
|
// Octal:
|
|
length, base = 3, 8
|
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
|
|
self.read()
|
|
return 1, false
|
|
case '\r':
|
|
self.read()
|
|
if self.chr == '\n' {
|
|
self.read()
|
|
return 2, false
|
|
}
|
|
return 1, false
|
|
case '\n':
|
|
self.read()
|
|
return 1, false
|
|
case '\u2028', '\u2029':
|
|
self.read()
|
|
return 1, true
|
|
case 'x':
|
|
self.read()
|
|
length, base = 2, 16
|
|
case 'u':
|
|
self.read()
|
|
if self.chr == '{' {
|
|
self.read()
|
|
length, base = 0, 16
|
|
} else {
|
|
length, base = 4, 16
|
|
}
|
|
default:
|
|
self.read() // Always make progress
|
|
}
|
|
|
|
if base > 0 {
|
|
var value uint32
|
|
if length > 0 {
|
|
for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
|
|
digit := uint32(digitValue(self.chr))
|
|
if digit >= base {
|
|
break
|
|
}
|
|
value = value*base + digit
|
|
self.read()
|
|
}
|
|
} else {
|
|
for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
|
|
if self.chr == '}' {
|
|
self.read()
|
|
break
|
|
}
|
|
digit := uint32(digitValue(self.chr))
|
|
if digit >= base {
|
|
break
|
|
}
|
|
value = value*base + digit
|
|
self.read()
|
|
}
|
|
}
|
|
chr = rune(value)
|
|
}
|
|
if chr >= utf8.RuneSelf {
|
|
if chr > 0xFFFF {
|
|
return 2, true
|
|
}
|
|
return 1, true
|
|
}
|
|
return 1, false
|
|
}
|
|
|
|
func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
|
|
// " ' /
|
|
quote := rune(self.str[offset])
|
|
length := 0
|
|
isUnicode := false
|
|
for self.chr != quote {
|
|
chr := self.chr
|
|
if chr == '\n' || chr == '\r' || chr < 0 {
|
|
goto newline
|
|
}
|
|
if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') {
|
|
goto newline
|
|
}
|
|
self.read()
|
|
if chr == '\\' {
|
|
if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
|
|
if quote == '/' {
|
|
goto newline
|
|
}
|
|
self.scanNewline()
|
|
} else {
|
|
l, u := self.scanEscape(quote)
|
|
length += l
|
|
if u {
|
|
isUnicode = true
|
|
}
|
|
}
|
|
continue
|
|
} else if chr == '[' && quote == '/' {
|
|
// Allow a slash (/) in a bracket character class ([...])
|
|
// TODO Fix this, this is hacky...
|
|
quote = -1
|
|
} else if chr == ']' && quote == -1 {
|
|
quote = '/'
|
|
}
|
|
if chr >= utf8.RuneSelf {
|
|
isUnicode = true
|
|
if chr > 0xFFFF {
|
|
length++
|
|
}
|
|
}
|
|
length++
|
|
}
|
|
|
|
// " ' /
|
|
self.read()
|
|
literal = self.str[offset:self.chrOffset]
|
|
if parse {
|
|
// TODO strict
|
|
parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
|
|
}
|
|
return
|
|
|
|
newline:
|
|
self.scanNewline()
|
|
errStr := "String not terminated"
|
|
if quote == '/' {
|
|
errStr = "Invalid regular expression: missing /"
|
|
self.error(self.idxOf(offset), errStr)
|
|
}
|
|
return "", "", errStr
|
|
}
|
|
|
|
func (self *_parser) scanNewline() {
|
|
if self.chr == '\u2028' || self.chr == '\u2029' {
|
|
self.read()
|
|
return
|
|
}
|
|
if self.chr == '\r' {
|
|
self.read()
|
|
if self.chr != '\n' {
|
|
return
|
|
}
|
|
}
|
|
self.read()
|
|
}
|
|
|
|
func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
|
|
offset := self.chrOffset
|
|
var end int
|
|
length := 0
|
|
isUnicode := false
|
|
hasCR := false
|
|
for {
|
|
chr := self.chr
|
|
if chr < 0 {
|
|
goto unterminated
|
|
}
|
|
self.read()
|
|
if chr == '`' {
|
|
finished = true
|
|
end = self.chrOffset - 1
|
|
break
|
|
}
|
|
if chr == '\\' {
|
|
if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
|
|
if self.chr == '\r' {
|
|
hasCR = true
|
|
}
|
|
self.scanNewline()
|
|
} else {
|
|
if self.chr == '8' || self.chr == '9' {
|
|
if parseErr == "" {
|
|
parseErr = "\\8 and \\9 are not allowed in template strings."
|
|
}
|
|
}
|
|
l, u := self.scanEscape('`')
|
|
length += l
|
|
if u {
|
|
isUnicode = true
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
if chr == '$' && self.chr == '{' {
|
|
self.read()
|
|
end = self.chrOffset - 2
|
|
break
|
|
}
|
|
if chr >= utf8.RuneSelf {
|
|
isUnicode = true
|
|
if chr > 0xFFFF {
|
|
length++
|
|
}
|
|
} else if chr == '\r' {
|
|
hasCR = true
|
|
if self.chr == '\n' {
|
|
length--
|
|
}
|
|
}
|
|
length++
|
|
}
|
|
literal = self.str[offset:end]
|
|
if hasCR {
|
|
literal = normaliseCRLF(literal)
|
|
}
|
|
if parseErr == "" {
|
|
parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
|
|
}
|
|
self.insertSemicolon = true
|
|
return
|
|
unterminated:
|
|
err = err_UnexpectedEndOfInput
|
|
finished = true
|
|
return
|
|
}
|
|
|
|
func normaliseCRLF(s string) string {
|
|
var buf strings.Builder
|
|
buf.Grow(len(s))
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] == '\r' {
|
|
buf.WriteByte('\n')
|
|
if i < len(s)-1 && s[i+1] == '\n' {
|
|
i++
|
|
}
|
|
} else {
|
|
buf.WriteByte(s[i])
|
|
}
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
func hex2decimal(chr byte) (value rune, ok bool) {
|
|
{
|
|
chr := rune(chr)
|
|
switch {
|
|
case '0' <= chr && chr <= '9':
|
|
return chr - '0', true
|
|
case 'a' <= chr && chr <= 'f':
|
|
return chr - 'a' + 10, true
|
|
case 'A' <= chr && chr <= 'F':
|
|
return chr - 'A' + 10, true
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
func parseNumberLiteral(literal string) (value interface{}, err error) {
|
|
// TODO Is Uint okay? What about -MAX_UINT
|
|
value, err = strconv.ParseInt(literal, 0, 64)
|
|
if err == nil {
|
|
return
|
|
}
|
|
|
|
parseIntErr := err // Save this first error, just in case
|
|
|
|
value, err = strconv.ParseFloat(literal, 64)
|
|
if err == nil {
|
|
return
|
|
} else if err.(*strconv.NumError).Err == strconv.ErrRange {
|
|
// Infinity, etc.
|
|
return value, nil
|
|
}
|
|
|
|
err = parseIntErr
|
|
|
|
if err.(*strconv.NumError).Err == strconv.ErrRange {
|
|
if len(literal) > 2 &&
|
|
literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') &&
|
|
literal[len(literal)-1] != 'n' {
|
|
// Could just be a very large number (e.g. 0x8000000000000000)
|
|
var value float64
|
|
literal = literal[2:]
|
|
for _, chr := range literal {
|
|
digit := digitValue(chr)
|
|
if digit >= 16 {
|
|
goto error
|
|
}
|
|
value = value*16 + float64(digit)
|
|
}
|
|
return value, nil
|
|
}
|
|
}
|
|
|
|
if len(literal) > 1 && literal[len(literal)-1] == 'n' {
|
|
if literal[0] == '0' {
|
|
if len(literal) > 2 && isDecimalDigit(rune(literal[1])) {
|
|
goto error
|
|
}
|
|
}
|
|
// Parse as big.Int
|
|
bigInt := new(big.Int)
|
|
_, ok := bigInt.SetString(literal[:len(literal)-1], 0)
|
|
if !ok {
|
|
goto error
|
|
}
|
|
return bigInt, nil
|
|
}
|
|
|
|
error:
|
|
return nil, errors.New("Illegal numeric literal")
|
|
}
|
|
|
|
func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
|
|
var sb strings.Builder
|
|
var chars []uint16
|
|
if unicode {
|
|
chars = make([]uint16, 1, length+1)
|
|
chars[0] = unistring.BOM
|
|
} else {
|
|
sb.Grow(length)
|
|
}
|
|
str := literal
|
|
for len(str) > 0 {
|
|
switch chr := str[0]; {
|
|
// We do not explicitly handle the case of the quote
|
|
// value, which can be: " ' /
|
|
// This assumes we're already passed a partially well-formed literal
|
|
case chr >= utf8.RuneSelf:
|
|
chr, size := utf8.DecodeRuneInString(str)
|
|
if chr <= 0xFFFF {
|
|
chars = append(chars, uint16(chr))
|
|
} else {
|
|
first, second := utf16.EncodeRune(chr)
|
|
chars = append(chars, uint16(first), uint16(second))
|
|
}
|
|
str = str[size:]
|
|
continue
|
|
case chr != '\\':
|
|
if unicode {
|
|
chars = append(chars, uint16(chr))
|
|
} else {
|
|
sb.WriteByte(chr)
|
|
}
|
|
str = str[1:]
|
|
continue
|
|
}
|
|
|
|
if len(str) <= 1 {
|
|
panic("len(str) <= 1")
|
|
}
|
|
chr := str[1]
|
|
var value rune
|
|
if chr >= utf8.RuneSelf {
|
|
str = str[1:]
|
|
var size int
|
|
value, size = utf8.DecodeRuneInString(str)
|
|
str = str[size:] // \ + <character>
|
|
if value == '\u2028' || value == '\u2029' {
|
|
continue
|
|
}
|
|
} else {
|
|
str = str[2:] // \<character>
|
|
switch chr {
|
|
case 'b':
|
|
value = '\b'
|
|
case 'f':
|
|
value = '\f'
|
|
case 'n':
|
|
value = '\n'
|
|
case 'r':
|
|
value = '\r'
|
|
case 't':
|
|
value = '\t'
|
|
case 'v':
|
|
value = '\v'
|
|
case 'x', 'u':
|
|
size := 0
|
|
switch chr {
|
|
case 'x':
|
|
size = 2
|
|
case 'u':
|
|
if str == "" || str[0] != '{' {
|
|
size = 4
|
|
}
|
|
}
|
|
if size > 0 {
|
|
if len(str) < size {
|
|
return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
|
|
}
|
|
for j := 0; j < size; j++ {
|
|
decimal, ok := hex2decimal(str[j])
|
|
if !ok {
|
|
return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
|
|
}
|
|
value = value<<4 | decimal
|
|
}
|
|
} else {
|
|
str = str[1:]
|
|
var val rune
|
|
value = -1
|
|
for ; size < len(str); size++ {
|
|
if str[size] == '}' {
|
|
if size == 0 {
|
|
return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
|
|
}
|
|
size++
|
|
value = val
|
|
break
|
|
}
|
|
decimal, ok := hex2decimal(str[size])
|
|
if !ok {
|
|
return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
|
|
}
|
|
val = val<<4 | decimal
|
|
if val > utf8.MaxRune {
|
|
return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
|
|
}
|
|
}
|
|
if value == -1 {
|
|
return "", fmt.Sprintf("unterminated \\u{: %q", str)
|
|
}
|
|
}
|
|
str = str[size:]
|
|
if chr == 'x' {
|
|
break
|
|
}
|
|
if value > utf8.MaxRune {
|
|
panic("value > utf8.MaxRune")
|
|
}
|
|
case '0':
|
|
if len(str) == 0 || '0' > str[0] || str[0] > '7' {
|
|
value = 0
|
|
break
|
|
}
|
|
fallthrough
|
|
case '1', '2', '3', '4', '5', '6', '7':
|
|
if strict {
|
|
return "", "Octal escape sequences are not allowed in this context"
|
|
}
|
|
value = rune(chr) - '0'
|
|
j := 0
|
|
for ; j < 2; j++ {
|
|
if len(str) < j+1 {
|
|
break
|
|
}
|
|
chr := str[j]
|
|
if '0' > chr || chr > '7' {
|
|
break
|
|
}
|
|
decimal := rune(str[j]) - '0'
|
|
value = (value << 3) | decimal
|
|
}
|
|
str = str[j:]
|
|
case '\\':
|
|
value = '\\'
|
|
case '\'', '"':
|
|
value = rune(chr)
|
|
case '\r':
|
|
if len(str) > 0 {
|
|
if str[0] == '\n' {
|
|
str = str[1:]
|
|
}
|
|
}
|
|
fallthrough
|
|
case '\n':
|
|
continue
|
|
default:
|
|
value = rune(chr)
|
|
}
|
|
}
|
|
if unicode {
|
|
if value <= 0xFFFF {
|
|
chars = append(chars, uint16(value))
|
|
} else {
|
|
first, second := utf16.EncodeRune(value)
|
|
chars = append(chars, uint16(first), uint16(second))
|
|
}
|
|
} else {
|
|
if value >= utf8.RuneSelf {
|
|
return "", "Unexpected unicode character"
|
|
}
|
|
sb.WriteByte(byte(value))
|
|
}
|
|
}
|
|
|
|
if unicode {
|
|
if len(chars) != length+1 {
|
|
panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
|
|
}
|
|
return unistring.FromUtf16(chars), ""
|
|
}
|
|
if sb.Len() != length {
|
|
panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
|
|
}
|
|
return unistring.String(sb.String()), ""
|
|
}
|
|
|
|
func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
|
|
|
|
offset := self.chrOffset
|
|
tkn := token.NUMBER
|
|
|
|
if decimalPoint {
|
|
offset--
|
|
self.scanMantissa(10, true)
|
|
} else {
|
|
if self.chr == '0' {
|
|
self.read()
|
|
base := 0
|
|
switch self.chr {
|
|
case 'x', 'X':
|
|
base = 16
|
|
case 'o', 'O':
|
|
base = 8
|
|
case 'b', 'B':
|
|
base = 2
|
|
case '.', 'e', 'E':
|
|
// no-op
|
|
default:
|
|
// legacy octal
|
|
self.scanMantissa(8, false)
|
|
goto end
|
|
}
|
|
if base > 0 {
|
|
self.read()
|
|
if !isDigit(self.chr, base) {
|
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
|
}
|
|
self.scanMantissa(base, true)
|
|
goto end
|
|
}
|
|
} else {
|
|
self.scanMantissa(10, true)
|
|
}
|
|
if self.chr == '.' {
|
|
self.read()
|
|
self.scanMantissa(10, true)
|
|
}
|
|
}
|
|
|
|
if self.chr == 'e' || self.chr == 'E' {
|
|
self.read()
|
|
if self.chr == '-' || self.chr == '+' {
|
|
self.read()
|
|
}
|
|
if isDecimalDigit(self.chr) {
|
|
self.read()
|
|
self.scanMantissa(10, true)
|
|
} else {
|
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
|
}
|
|
}
|
|
end:
|
|
if self.chr == 'n' || self.chr == 'N' {
|
|
self.read()
|
|
return tkn, self.str[offset:self.chrOffset]
|
|
}
|
|
if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
|
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
|
}
|
|
|
|
return tkn, self.str[offset:self.chrOffset]
|
|
}
|