1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved. 2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style 3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file. 4*1c12ee1eSDan Willemsen 5*1c12ee1eSDan Willemsenpackage text 6*1c12ee1eSDan Willemsen 7*1c12ee1eSDan Willemsen// parseNumberValue parses a number from the input and returns a Token object. 8*1c12ee1eSDan Willemsenfunc (d *Decoder) parseNumberValue() (Token, bool) { 9*1c12ee1eSDan Willemsen in := d.in 10*1c12ee1eSDan Willemsen num := parseNumber(in) 11*1c12ee1eSDan Willemsen if num.size == 0 { 12*1c12ee1eSDan Willemsen return Token{}, false 13*1c12ee1eSDan Willemsen } 14*1c12ee1eSDan Willemsen numAttrs := num.kind 15*1c12ee1eSDan Willemsen if num.neg { 16*1c12ee1eSDan Willemsen numAttrs |= isNegative 17*1c12ee1eSDan Willemsen } 18*1c12ee1eSDan Willemsen tok := Token{ 19*1c12ee1eSDan Willemsen kind: Scalar, 20*1c12ee1eSDan Willemsen attrs: numberValue, 21*1c12ee1eSDan Willemsen pos: len(d.orig) - len(d.in), 22*1c12ee1eSDan Willemsen raw: d.in[:num.size], 23*1c12ee1eSDan Willemsen str: num.string(d.in), 24*1c12ee1eSDan Willemsen numAttrs: numAttrs, 25*1c12ee1eSDan Willemsen } 26*1c12ee1eSDan Willemsen d.consume(num.size) 27*1c12ee1eSDan Willemsen return tok, true 28*1c12ee1eSDan Willemsen} 29*1c12ee1eSDan Willemsen 30*1c12ee1eSDan Willemsenconst ( 31*1c12ee1eSDan Willemsen numDec uint8 = (1 << iota) / 2 32*1c12ee1eSDan Willemsen numHex 33*1c12ee1eSDan Willemsen numOct 34*1c12ee1eSDan Willemsen numFloat 35*1c12ee1eSDan Willemsen) 36*1c12ee1eSDan Willemsen 37*1c12ee1eSDan Willemsen// number is the result of parsing out a valid number from parseNumber. It 38*1c12ee1eSDan Willemsen// contains data for doing float or integer conversion via the strconv package 39*1c12ee1eSDan Willemsen// in conjunction with the input bytes. 40*1c12ee1eSDan Willemsentype number struct { 41*1c12ee1eSDan Willemsen kind uint8 42*1c12ee1eSDan Willemsen neg bool 43*1c12ee1eSDan Willemsen size int 44*1c12ee1eSDan Willemsen // if neg, this is the length of whitespace and comments between 45*1c12ee1eSDan Willemsen // the minus sign and the rest fo the number literal 46*1c12ee1eSDan Willemsen sep int 47*1c12ee1eSDan Willemsen} 48*1c12ee1eSDan Willemsen 49*1c12ee1eSDan Willemsenfunc (num number) string(data []byte) string { 50*1c12ee1eSDan Willemsen strSize := num.size 51*1c12ee1eSDan Willemsen last := num.size - 1 52*1c12ee1eSDan Willemsen if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') { 53*1c12ee1eSDan Willemsen strSize = last 54*1c12ee1eSDan Willemsen } 55*1c12ee1eSDan Willemsen if num.neg && num.sep > 0 { 56*1c12ee1eSDan Willemsen // strip whitespace/comments between negative sign and the rest 57*1c12ee1eSDan Willemsen strLen := strSize - num.sep 58*1c12ee1eSDan Willemsen str := make([]byte, strLen) 59*1c12ee1eSDan Willemsen str[0] = data[0] 60*1c12ee1eSDan Willemsen copy(str[1:], data[num.sep+1:strSize]) 61*1c12ee1eSDan Willemsen return string(str) 62*1c12ee1eSDan Willemsen } 63*1c12ee1eSDan Willemsen return string(data[:strSize]) 64*1c12ee1eSDan Willemsen 65*1c12ee1eSDan Willemsen} 66*1c12ee1eSDan Willemsen 67*1c12ee1eSDan Willemsen// parseNumber constructs a number object from given input. It allows for the 68*1c12ee1eSDan Willemsen// following patterns: 69*1c12ee1eSDan Willemsen// 70*1c12ee1eSDan Willemsen// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*) 71*1c12ee1eSDan Willemsen// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?) 72*1c12ee1eSDan Willemsen// 73*1c12ee1eSDan Willemsen// It also returns the number of parsed bytes for the given number, 0 if it is 74*1c12ee1eSDan Willemsen// not a number. 75*1c12ee1eSDan Willemsenfunc parseNumber(input []byte) number { 76*1c12ee1eSDan Willemsen kind := numDec 77*1c12ee1eSDan Willemsen var size int 78*1c12ee1eSDan Willemsen var neg bool 79*1c12ee1eSDan Willemsen 80*1c12ee1eSDan Willemsen s := input 81*1c12ee1eSDan Willemsen if len(s) == 0 { 82*1c12ee1eSDan Willemsen return number{} 83*1c12ee1eSDan Willemsen } 84*1c12ee1eSDan Willemsen 85*1c12ee1eSDan Willemsen // Optional - 86*1c12ee1eSDan Willemsen var sep int 87*1c12ee1eSDan Willemsen if s[0] == '-' { 88*1c12ee1eSDan Willemsen neg = true 89*1c12ee1eSDan Willemsen s = s[1:] 90*1c12ee1eSDan Willemsen size++ 91*1c12ee1eSDan Willemsen // Consume any whitespace or comments between the 92*1c12ee1eSDan Willemsen // negative sign and the rest of the number 93*1c12ee1eSDan Willemsen lenBefore := len(s) 94*1c12ee1eSDan Willemsen s = consume(s, 0) 95*1c12ee1eSDan Willemsen sep = lenBefore - len(s) 96*1c12ee1eSDan Willemsen size += sep 97*1c12ee1eSDan Willemsen if len(s) == 0 { 98*1c12ee1eSDan Willemsen return number{} 99*1c12ee1eSDan Willemsen } 100*1c12ee1eSDan Willemsen } 101*1c12ee1eSDan Willemsen 102*1c12ee1eSDan Willemsen switch { 103*1c12ee1eSDan Willemsen case s[0] == '0': 104*1c12ee1eSDan Willemsen if len(s) > 1 { 105*1c12ee1eSDan Willemsen switch { 106*1c12ee1eSDan Willemsen case s[1] == 'x' || s[1] == 'X': 107*1c12ee1eSDan Willemsen // Parse as hex number. 108*1c12ee1eSDan Willemsen kind = numHex 109*1c12ee1eSDan Willemsen n := 2 110*1c12ee1eSDan Willemsen s = s[2:] 111*1c12ee1eSDan Willemsen for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') || 112*1c12ee1eSDan Willemsen ('a' <= s[0] && s[0] <= 'f') || 113*1c12ee1eSDan Willemsen ('A' <= s[0] && s[0] <= 'F')) { 114*1c12ee1eSDan Willemsen s = s[1:] 115*1c12ee1eSDan Willemsen n++ 116*1c12ee1eSDan Willemsen } 117*1c12ee1eSDan Willemsen if n == 2 { 118*1c12ee1eSDan Willemsen return number{} 119*1c12ee1eSDan Willemsen } 120*1c12ee1eSDan Willemsen size += n 121*1c12ee1eSDan Willemsen 122*1c12ee1eSDan Willemsen case '0' <= s[1] && s[1] <= '7': 123*1c12ee1eSDan Willemsen // Parse as octal number. 124*1c12ee1eSDan Willemsen kind = numOct 125*1c12ee1eSDan Willemsen n := 2 126*1c12ee1eSDan Willemsen s = s[2:] 127*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '7' { 128*1c12ee1eSDan Willemsen s = s[1:] 129*1c12ee1eSDan Willemsen n++ 130*1c12ee1eSDan Willemsen } 131*1c12ee1eSDan Willemsen size += n 132*1c12ee1eSDan Willemsen } 133*1c12ee1eSDan Willemsen 134*1c12ee1eSDan Willemsen if kind&(numHex|numOct) > 0 { 135*1c12ee1eSDan Willemsen if len(s) > 0 && !isDelim(s[0]) { 136*1c12ee1eSDan Willemsen return number{} 137*1c12ee1eSDan Willemsen } 138*1c12ee1eSDan Willemsen return number{kind: kind, neg: neg, size: size, sep: sep} 139*1c12ee1eSDan Willemsen } 140*1c12ee1eSDan Willemsen } 141*1c12ee1eSDan Willemsen s = s[1:] 142*1c12ee1eSDan Willemsen size++ 143*1c12ee1eSDan Willemsen 144*1c12ee1eSDan Willemsen case '1' <= s[0] && s[0] <= '9': 145*1c12ee1eSDan Willemsen n := 1 146*1c12ee1eSDan Willemsen s = s[1:] 147*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 148*1c12ee1eSDan Willemsen s = s[1:] 149*1c12ee1eSDan Willemsen n++ 150*1c12ee1eSDan Willemsen } 151*1c12ee1eSDan Willemsen size += n 152*1c12ee1eSDan Willemsen 153*1c12ee1eSDan Willemsen case s[0] == '.': 154*1c12ee1eSDan Willemsen // Set kind to numFloat to signify the intent to parse as float. And 155*1c12ee1eSDan Willemsen // that it needs to have other digits after '.'. 156*1c12ee1eSDan Willemsen kind = numFloat 157*1c12ee1eSDan Willemsen 158*1c12ee1eSDan Willemsen default: 159*1c12ee1eSDan Willemsen return number{} 160*1c12ee1eSDan Willemsen } 161*1c12ee1eSDan Willemsen 162*1c12ee1eSDan Willemsen // . followed by 0 or more digits. 163*1c12ee1eSDan Willemsen if len(s) > 0 && s[0] == '.' { 164*1c12ee1eSDan Willemsen n := 1 165*1c12ee1eSDan Willemsen s = s[1:] 166*1c12ee1eSDan Willemsen // If decimal point was before any digits, it should be followed by 167*1c12ee1eSDan Willemsen // other digits. 168*1c12ee1eSDan Willemsen if len(s) == 0 && kind == numFloat { 169*1c12ee1eSDan Willemsen return number{} 170*1c12ee1eSDan Willemsen } 171*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 172*1c12ee1eSDan Willemsen s = s[1:] 173*1c12ee1eSDan Willemsen n++ 174*1c12ee1eSDan Willemsen } 175*1c12ee1eSDan Willemsen size += n 176*1c12ee1eSDan Willemsen kind = numFloat 177*1c12ee1eSDan Willemsen } 178*1c12ee1eSDan Willemsen 179*1c12ee1eSDan Willemsen // e or E followed by an optional - or + and 1 or more digits. 180*1c12ee1eSDan Willemsen if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { 181*1c12ee1eSDan Willemsen kind = numFloat 182*1c12ee1eSDan Willemsen s = s[1:] 183*1c12ee1eSDan Willemsen n := 1 184*1c12ee1eSDan Willemsen if s[0] == '+' || s[0] == '-' { 185*1c12ee1eSDan Willemsen s = s[1:] 186*1c12ee1eSDan Willemsen n++ 187*1c12ee1eSDan Willemsen if len(s) == 0 { 188*1c12ee1eSDan Willemsen return number{} 189*1c12ee1eSDan Willemsen } 190*1c12ee1eSDan Willemsen } 191*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 192*1c12ee1eSDan Willemsen s = s[1:] 193*1c12ee1eSDan Willemsen n++ 194*1c12ee1eSDan Willemsen } 195*1c12ee1eSDan Willemsen size += n 196*1c12ee1eSDan Willemsen } 197*1c12ee1eSDan Willemsen 198*1c12ee1eSDan Willemsen // Optional suffix f or F for floats. 199*1c12ee1eSDan Willemsen if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') { 200*1c12ee1eSDan Willemsen kind = numFloat 201*1c12ee1eSDan Willemsen s = s[1:] 202*1c12ee1eSDan Willemsen size++ 203*1c12ee1eSDan Willemsen } 204*1c12ee1eSDan Willemsen 205*1c12ee1eSDan Willemsen // Check that next byte is a delimiter or it is at the end. 206*1c12ee1eSDan Willemsen if len(s) > 0 && !isDelim(s[0]) { 207*1c12ee1eSDan Willemsen return number{} 208*1c12ee1eSDan Willemsen } 209*1c12ee1eSDan Willemsen 210*1c12ee1eSDan Willemsen return number{kind: kind, neg: neg, size: size, sep: sep} 211*1c12ee1eSDan Willemsen} 212