ai_old/goja/parser/lexer.go
Star 767d87ac3e update throw exception
add util.load and util.save
2024-09-24 13:34:32 +08:00

1206 lines
27 KiB
Go

package parser
import (
"errors"
"fmt"
"math/big"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
"golang.org/x/text/unicode/rangetable"
"apigo.cc/ai/ai/goja/file"
"apigo.cc/ai/ai/goja/token"
"apigo.cc/ai/ai/goja/unistring"
)
var (
unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
)
func isDecimalDigit(chr rune) bool {
return '0' <= chr && chr <= '9'
}
func IsIdentifier(s string) bool {
if s == "" {
return false
}
r, size := utf8.DecodeRuneInString(s)
if !isIdentifierStart(r) {
return false
}
for _, r := range s[size:] {
if !isIdentifierPart(r) {
return false
}
}
return true
}
func digitValue(chr rune) int {
switch {
case '0' <= chr && chr <= '9':
return int(chr - '0')
case 'a' <= chr && chr <= 'f':
return int(chr - 'a' + 10)
case 'A' <= chr && chr <= 'F':
return int(chr - 'A' + 10)
}
return 16 // Larger than any legal digit value
}
func isDigit(chr rune, base int) bool {
return digitValue(chr) < base
}
func isIdStartUnicode(r rune) bool {
return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
}
func isIdPartUnicode(r rune) bool {
return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
}
func isIdentifierStart(chr rune) bool {
return chr == '$' || chr == '_' || chr == '\\' ||
'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
chr >= utf8.RuneSelf && isIdStartUnicode(chr)
}
func isIdentifierPart(chr rune) bool {
return chr == '$' || chr == '_' || chr == '\\' ||
'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
'0' <= chr && chr <= '9' ||
chr >= utf8.RuneSelf && isIdPartUnicode(chr)
}
func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
offset := self.chrOffset
hasEscape := false
isUnicode := false
length := 0
for isIdentifierPart(self.chr) {
r := self.chr
length++
if r == '\\' {
hasEscape = true
distance := self.chrOffset - offset
self.read()
if self.chr != 'u' {
return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
}
var value rune
if self._peek() == '{' {
self.read()
value = -1
for value <= utf8.MaxRune {
self.read()
if self.chr == '}' {
break
}
decimal, ok := hex2decimal(byte(self.chr))
if !ok {
return "", "", false, "Invalid Unicode escape sequence"
}
if value == -1 {
value = decimal
} else {
value = value<<4 | decimal
}
}
if value == -1 {
return "", "", false, "Invalid Unicode escape sequence"
}
} else {
for j := 0; j < 4; j++ {
self.read()
decimal, ok := hex2decimal(byte(self.chr))
if !ok {
return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
}
value = value<<4 | decimal
}
}
if value == '\\' {
return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
} else if distance == 0 {
if !isIdentifierStart(value) {
return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
}
} else if distance > 0 {
if !isIdentifierPart(value) {
return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
}
}
r = value
}
if r >= utf8.RuneSelf {
isUnicode = true
if r > 0xFFFF {
length++
}
}
self.read()
}
literal := self.str[offset:self.chrOffset]
var parsed unistring.String
if hasEscape || isUnicode {
var err string
// TODO strict
parsed, err = parseStringLiteral(literal, length, isUnicode, false)
if err != "" {
return "", "", false, err
}
} else {
parsed = unistring.String(literal)
}
return literal, parsed, hasEscape, ""
}
// 7.2
func isLineWhiteSpace(chr rune) bool {
switch chr {
case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
return true
case '\u000a', '\u000d', '\u2028', '\u2029':
return false
case '\u0085':
return false
}
return unicode.IsSpace(chr)
}
// 7.3
func isLineTerminator(chr rune) bool {
switch chr {
case '\u000a', '\u000d', '\u2028', '\u2029':
return true
}
return false
}
type parserState struct {
idx file.Idx
tok token.Token
literal string
parsedLiteral unistring.String
implicitSemicolon, insertSemicolon bool
chr rune
chrOffset, offset int
errorCount int
}
func (self *_parser) mark(state *parserState) *parserState {
if state == nil {
state = &parserState{}
}
state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
state.errorCount = len(self.errors)
return state
}
func (self *_parser) restore(state *parserState) {
self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
self.errors = self.errors[:state.errorCount]
}
func (self *_parser) peek() token.Token {
implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
tok, _, _, _ := self.scan()
self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
return tok
}
func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
self.implicitSemicolon = false
for {
self.skipWhiteSpace()
idx = self.idxOf(self.chrOffset)
insertSemicolon := false
switch chr := self.chr; {
case isIdentifierStart(chr):
var err string
var hasEscape bool
literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
if err != "" {
tkn = token.ILLEGAL
break
}
if len(parsedLiteral) > 1 {
// Keywords are longer than 1 character, avoid lookup otherwise
var strict bool
tkn, strict = token.IsKeyword(string(parsedLiteral))
if hasEscape {
self.insertSemicolon = true
if tkn == 0 || self.isBindingId(tkn) {
tkn = token.IDENTIFIER
} else {
tkn = token.ESCAPED_RESERVED_WORD
}
return
}
switch tkn {
case 0: // Not a keyword
// no-op
case token.KEYWORD:
if strict {
// TODO If strict and in strict mode, then this is not a break
break
}
return
case
token.BOOLEAN,
token.NULL,
token.THIS,
token.BREAK,
token.THROW, // A newline after a throw is not allowed, but we need to detect it
token.YIELD,
token.RETURN,
token.CONTINUE,
token.DEBUGGER:
self.insertSemicolon = true
return
case token.ASYNC:
// async only has special meaning if not followed by a LineTerminator
if self.skipWhiteSpaceCheckLineTerminator() {
self.insertSemicolon = true
tkn = token.IDENTIFIER
}
return
default:
return
}
}
self.insertSemicolon = true
tkn = token.IDENTIFIER
return
case '0' <= chr && chr <= '9':
self.insertSemicolon = true
tkn, literal = self.scanNumericLiteral(false)
return
default:
self.read()
switch chr {
case -1:
if self.insertSemicolon {
self.insertSemicolon = false
self.implicitSemicolon = true
}
tkn = token.EOF
case '\r', '\n', '\u2028', '\u2029':
self.insertSemicolon = false
self.implicitSemicolon = true
continue
case ':':
tkn = token.COLON
case '.':
if digitValue(self.chr) < 10 {
insertSemicolon = true
tkn, literal = self.scanNumericLiteral(true)
} else {
if self.chr == '.' {
self.read()
if self.chr == '.' {
self.read()
tkn = token.ELLIPSIS
} else {
tkn = token.ILLEGAL
}
} else {
tkn = token.PERIOD
}
}
case ',':
tkn = token.COMMA
case ';':
tkn = token.SEMICOLON
case '(':
tkn = token.LEFT_PARENTHESIS
case ')':
tkn = token.RIGHT_PARENTHESIS
insertSemicolon = true
case '[':
tkn = token.LEFT_BRACKET
case ']':
tkn = token.RIGHT_BRACKET
insertSemicolon = true
case '{':
tkn = token.LEFT_BRACE
case '}':
tkn = token.RIGHT_BRACE
insertSemicolon = true
case '+':
tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
if tkn == token.INCREMENT {
insertSemicolon = true
}
case '-':
tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
if tkn == token.DECREMENT {
insertSemicolon = true
}
case '*':
if self.chr == '*' {
self.read()
tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN)
} else {
tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
}
case '/':
if self.chr == '/' {
self.skipSingleLineComment()
continue
} else if self.chr == '*' {
if self.skipMultiLineComment() {
self.insertSemicolon = false
self.implicitSemicolon = true
}
continue
} else {
// Could be division, could be RegExp literal
tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
insertSemicolon = true
}
case '%':
tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
case '^':
tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
case '<':
tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
case '>':
tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
case '=':
if self.chr == '>' {
self.read()
if self.implicitSemicolon {
tkn = token.ILLEGAL
} else {
tkn = token.ARROW
}
} else {
tkn = self.switch2(token.ASSIGN, token.EQUAL)
if tkn == token.EQUAL && self.chr == '=' {
self.read()
tkn = token.STRICT_EQUAL
}
}
case '!':
tkn = self.switch2(token.NOT, token.NOT_EQUAL)
if tkn == token.NOT_EQUAL && self.chr == '=' {
self.read()
tkn = token.STRICT_NOT_EQUAL
}
case '&':
tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
case '|':
tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
case '~':
tkn = token.BITWISE_NOT
case '?':
if self.chr == '.' && !isDecimalDigit(self._peek()) {
self.read()
tkn = token.QUESTION_DOT
} else if self.chr == '?' {
self.read()
tkn = token.COALESCE
} else {
tkn = token.QUESTION_MARK
}
case '"', '\'':
insertSemicolon = true
tkn = token.STRING
var err string
literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
if err != "" {
tkn = token.ILLEGAL
}
case '`':
tkn = token.BACKTICK
case '#':
if self.chrOffset == 1 && self.chr == '!' {
self.skipSingleLineComment()
continue
}
var err string
literal, parsedLiteral, _, err = self.scanIdentifier()
if err != "" || literal == "" {
tkn = token.ILLEGAL
break
}
self.insertSemicolon = true
tkn = token.PRIVATE_IDENTIFIER
return
default:
self.errorUnexpected(idx, chr)
tkn = token.ILLEGAL
}
}
self.insertSemicolon = insertSemicolon
return
}
}
func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
if self.chr == '=' {
self.read()
return tkn1
}
return tkn0
}
func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
if self.chr == '=' {
self.read()
return tkn1
}
if self.chr == chr2 {
self.read()
return tkn2
}
return tkn0
}
func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
if self.chr == '=' {
self.read()
return tkn1
}
if self.chr == chr2 {
self.read()
if self.chr == '=' {
self.read()
return tkn3
}
return tkn2
}
return tkn0
}
func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
if self.chr == '=' {
self.read()
return tkn1
}
if self.chr == chr2 {
self.read()
if self.chr == '=' {
self.read()
return tkn3
}
if self.chr == chr3 {
self.read()
if self.chr == '=' {
self.read()
return tkn5
}
return tkn4
}
return tkn2
}
return tkn0
}
func (self *_parser) _peek() rune {
if self.offset < self.length {
return rune(self.str[self.offset])
}
return -1
}
func (self *_parser) read() {
if self.offset < self.length {
self.chrOffset = self.offset
chr, width := rune(self.str[self.offset]), 1
if chr >= utf8.RuneSelf { // !ASCII
chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
if chr == utf8.RuneError && width == 1 {
self.error(self.chrOffset, "Invalid UTF-8 character")
}
}
self.offset += width
self.chr = chr
} else {
self.chrOffset = self.length
self.chr = -1 // EOF
}
}
func (self *_parser) skipSingleLineComment() {
for self.chr != -1 {
self.read()
if isLineTerminator(self.chr) {
return
}
}
}
func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) {
self.read()
for self.chr >= 0 {
chr := self.chr
if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' {
hasLineTerminator = true
break
}
self.read()
if chr == '*' && self.chr == '/' {
self.read()
return
}
}
for self.chr >= 0 {
chr := self.chr
self.read()
if chr == '*' && self.chr == '/' {
self.read()
return
}
}
self.errorUnexpected(0, self.chr)
return
}
func (self *_parser) skipWhiteSpaceCheckLineTerminator() bool {
for {
switch self.chr {
case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
self.read()
continue
case '\r':
if self._peek() == '\n' {
self.read()
}
fallthrough
case '\u2028', '\u2029', '\n':
return true
}
if self.chr >= utf8.RuneSelf {
if unicode.IsSpace(self.chr) {
self.read()
continue
}
}
break
}
return false
}
func (self *_parser) skipWhiteSpace() {
for {
switch self.chr {
case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
self.read()
continue
case '\r':
if self._peek() == '\n' {
self.read()
}
fallthrough
case '\u2028', '\u2029', '\n':
if self.insertSemicolon {
return
}
self.read()
continue
}
if self.chr >= utf8.RuneSelf {
if unicode.IsSpace(self.chr) {
self.read()
continue
}
}
break
}
}
func (self *_parser) scanMantissa(base int, allowSeparator bool) {
for digitValue(self.chr) < base || (allowSeparator && self.chr == '_') {
afterUnderscore := self.chr == '_'
self.read()
if afterUnderscore && !isDigit(self.chr, base) {
self.error(self.chrOffset, "Only one underscore is allowed as numeric separator")
}
}
}
func (self *_parser) scanEscape(quote rune) (int, bool) {
var length, base uint32
chr := self.chr
switch chr {
case '0', '1', '2', '3', '4', '5', '6', '7':
// Octal:
length, base = 3, 8
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
self.read()
return 1, false
case '\r':
self.read()
if self.chr == '\n' {
self.read()
return 2, false
}
return 1, false
case '\n':
self.read()
return 1, false
case '\u2028', '\u2029':
self.read()
return 1, true
case 'x':
self.read()
length, base = 2, 16
case 'u':
self.read()
if self.chr == '{' {
self.read()
length, base = 0, 16
} else {
length, base = 4, 16
}
default:
self.read() // Always make progress
}
if base > 0 {
var value uint32
if length > 0 {
for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
digit := uint32(digitValue(self.chr))
if digit >= base {
break
}
value = value*base + digit
self.read()
}
} else {
for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
if self.chr == '}' {
self.read()
break
}
digit := uint32(digitValue(self.chr))
if digit >= base {
break
}
value = value*base + digit
self.read()
}
}
chr = rune(value)
}
if chr >= utf8.RuneSelf {
if chr > 0xFFFF {
return 2, true
}
return 1, true
}
return 1, false
}
func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
// " ' /
quote := rune(self.str[offset])
length := 0
isUnicode := false
for self.chr != quote {
chr := self.chr
if chr == '\n' || chr == '\r' || chr < 0 {
goto newline
}
if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') {
goto newline
}
self.read()
if chr == '\\' {
if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
if quote == '/' {
goto newline
}
self.scanNewline()
} else {
l, u := self.scanEscape(quote)
length += l
if u {
isUnicode = true
}
}
continue
} else if chr == '[' && quote == '/' {
// Allow a slash (/) in a bracket character class ([...])
// TODO Fix this, this is hacky...
quote = -1
} else if chr == ']' && quote == -1 {
quote = '/'
}
if chr >= utf8.RuneSelf {
isUnicode = true
if chr > 0xFFFF {
length++
}
}
length++
}
// " ' /
self.read()
literal = self.str[offset:self.chrOffset]
if parse {
// TODO strict
parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
}
return
newline:
self.scanNewline()
errStr := "String not terminated"
if quote == '/' {
errStr = "Invalid regular expression: missing /"
self.error(self.idxOf(offset), errStr)
}
return "", "", errStr
}
func (self *_parser) scanNewline() {
if self.chr == '\u2028' || self.chr == '\u2029' {
self.read()
return
}
if self.chr == '\r' {
self.read()
if self.chr != '\n' {
return
}
}
self.read()
}
func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
offset := self.chrOffset
var end int
length := 0
isUnicode := false
hasCR := false
for {
chr := self.chr
if chr < 0 {
goto unterminated
}
self.read()
if chr == '`' {
finished = true
end = self.chrOffset - 1
break
}
if chr == '\\' {
if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
if self.chr == '\r' {
hasCR = true
}
self.scanNewline()
} else {
if self.chr == '8' || self.chr == '9' {
if parseErr == "" {
parseErr = "\\8 and \\9 are not allowed in template strings."
}
}
l, u := self.scanEscape('`')
length += l
if u {
isUnicode = true
}
}
continue
}
if chr == '$' && self.chr == '{' {
self.read()
end = self.chrOffset - 2
break
}
if chr >= utf8.RuneSelf {
isUnicode = true
if chr > 0xFFFF {
length++
}
} else if chr == '\r' {
hasCR = true
if self.chr == '\n' {
length--
}
}
length++
}
literal = self.str[offset:end]
if hasCR {
literal = normaliseCRLF(literal)
}
if parseErr == "" {
parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
}
self.insertSemicolon = true
return
unterminated:
err = err_UnexpectedEndOfInput
finished = true
return
}
func normaliseCRLF(s string) string {
var buf strings.Builder
buf.Grow(len(s))
for i := 0; i < len(s); i++ {
if s[i] == '\r' {
buf.WriteByte('\n')
if i < len(s)-1 && s[i+1] == '\n' {
i++
}
} else {
buf.WriteByte(s[i])
}
}
return buf.String()
}
func hex2decimal(chr byte) (value rune, ok bool) {
{
chr := rune(chr)
switch {
case '0' <= chr && chr <= '9':
return chr - '0', true
case 'a' <= chr && chr <= 'f':
return chr - 'a' + 10, true
case 'A' <= chr && chr <= 'F':
return chr - 'A' + 10, true
}
return
}
}
func parseNumberLiteral(literal string) (value interface{}, err error) {
// TODO Is Uint okay? What about -MAX_UINT
value, err = strconv.ParseInt(literal, 0, 64)
if err == nil {
return
}
parseIntErr := err // Save this first error, just in case
value, err = strconv.ParseFloat(literal, 64)
if err == nil {
return
} else if err.(*strconv.NumError).Err == strconv.ErrRange {
// Infinity, etc.
return value, nil
}
err = parseIntErr
if err.(*strconv.NumError).Err == strconv.ErrRange {
if len(literal) > 2 &&
literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') &&
literal[len(literal)-1] != 'n' {
// Could just be a very large number (e.g. 0x8000000000000000)
var value float64
literal = literal[2:]
for _, chr := range literal {
digit := digitValue(chr)
if digit >= 16 {
goto error
}
value = value*16 + float64(digit)
}
return value, nil
}
}
if len(literal) > 1 && literal[len(literal)-1] == 'n' {
if literal[0] == '0' {
if len(literal) > 2 && isDecimalDigit(rune(literal[1])) {
goto error
}
}
// Parse as big.Int
bigInt := new(big.Int)
_, ok := bigInt.SetString(literal[:len(literal)-1], 0)
if !ok {
goto error
}
return bigInt, nil
}
error:
return nil, errors.New("Illegal numeric literal")
}
func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
var sb strings.Builder
var chars []uint16
if unicode {
chars = make([]uint16, 1, length+1)
chars[0] = unistring.BOM
} else {
sb.Grow(length)
}
str := literal
for len(str) > 0 {
switch chr := str[0]; {
// We do not explicitly handle the case of the quote
// value, which can be: " ' /
// This assumes we're already passed a partially well-formed literal
case chr >= utf8.RuneSelf:
chr, size := utf8.DecodeRuneInString(str)
if chr <= 0xFFFF {
chars = append(chars, uint16(chr))
} else {
first, second := utf16.EncodeRune(chr)
chars = append(chars, uint16(first), uint16(second))
}
str = str[size:]
continue
case chr != '\\':
if unicode {
chars = append(chars, uint16(chr))
} else {
sb.WriteByte(chr)
}
str = str[1:]
continue
}
if len(str) <= 1 {
panic("len(str) <= 1")
}
chr := str[1]
var value rune
if chr >= utf8.RuneSelf {
str = str[1:]
var size int
value, size = utf8.DecodeRuneInString(str)
str = str[size:] // \ + <character>
if value == '\u2028' || value == '\u2029' {
continue
}
} else {
str = str[2:] // \<character>
switch chr {
case 'b':
value = '\b'
case 'f':
value = '\f'
case 'n':
value = '\n'
case 'r':
value = '\r'
case 't':
value = '\t'
case 'v':
value = '\v'
case 'x', 'u':
size := 0
switch chr {
case 'x':
size = 2
case 'u':
if str == "" || str[0] != '{' {
size = 4
}
}
if size > 0 {
if len(str) < size {
return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
}
for j := 0; j < size; j++ {
decimal, ok := hex2decimal(str[j])
if !ok {
return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
}
value = value<<4 | decimal
}
} else {
str = str[1:]
var val rune
value = -1
for ; size < len(str); size++ {
if str[size] == '}' {
if size == 0 {
return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
}
size++
value = val
break
}
decimal, ok := hex2decimal(str[size])
if !ok {
return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
}
val = val<<4 | decimal
if val > utf8.MaxRune {
return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
}
}
if value == -1 {
return "", fmt.Sprintf("unterminated \\u{: %q", str)
}
}
str = str[size:]
if chr == 'x' {
break
}
if value > utf8.MaxRune {
panic("value > utf8.MaxRune")
}
case '0':
if len(str) == 0 || '0' > str[0] || str[0] > '7' {
value = 0
break
}
fallthrough
case '1', '2', '3', '4', '5', '6', '7':
if strict {
return "", "Octal escape sequences are not allowed in this context"
}
value = rune(chr) - '0'
j := 0
for ; j < 2; j++ {
if len(str) < j+1 {
break
}
chr := str[j]
if '0' > chr || chr > '7' {
break
}
decimal := rune(str[j]) - '0'
value = (value << 3) | decimal
}
str = str[j:]
case '\\':
value = '\\'
case '\'', '"':
value = rune(chr)
case '\r':
if len(str) > 0 {
if str[0] == '\n' {
str = str[1:]
}
}
fallthrough
case '\n':
continue
default:
value = rune(chr)
}
}
if unicode {
if value <= 0xFFFF {
chars = append(chars, uint16(value))
} else {
first, second := utf16.EncodeRune(value)
chars = append(chars, uint16(first), uint16(second))
}
} else {
if value >= utf8.RuneSelf {
return "", "Unexpected unicode character"
}
sb.WriteByte(byte(value))
}
}
if unicode {
if len(chars) != length+1 {
panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
}
return unistring.FromUtf16(chars), ""
}
if sb.Len() != length {
panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
}
return unistring.String(sb.String()), ""
}
func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
offset := self.chrOffset
tkn := token.NUMBER
if decimalPoint {
offset--
self.scanMantissa(10, true)
} else {
if self.chr == '0' {
self.read()
base := 0
switch self.chr {
case 'x', 'X':
base = 16
case 'o', 'O':
base = 8
case 'b', 'B':
base = 2
case '.', 'e', 'E':
// no-op
default:
// legacy octal
self.scanMantissa(8, false)
goto end
}
if base > 0 {
self.read()
if !isDigit(self.chr, base) {
return token.ILLEGAL, self.str[offset:self.chrOffset]
}
self.scanMantissa(base, true)
goto end
}
} else {
self.scanMantissa(10, true)
}
if self.chr == '.' {
self.read()
self.scanMantissa(10, true)
}
}
if self.chr == 'e' || self.chr == 'E' {
self.read()
if self.chr == '-' || self.chr == '+' {
self.read()
}
if isDecimalDigit(self.chr) {
self.read()
self.scanMantissa(10, true)
} else {
return token.ILLEGAL, self.str[offset:self.chrOffset]
}
}
end:
if self.chr == 'n' || self.chr == 'N' {
self.read()
return tkn, self.str[offset:self.chrOffset]
}
if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
return token.ILLEGAL, self.str[offset:self.chrOffset]
}
return tkn, self.str[offset:self.chrOffset]
}