1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved. 2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style 3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file. 4*1c12ee1eSDan Willemsen 5*1c12ee1eSDan Willemsenpackage json 6*1c12ee1eSDan Willemsen 7*1c12ee1eSDan Willemsenimport ( 8*1c12ee1eSDan Willemsen "bytes" 9*1c12ee1eSDan Willemsen "strconv" 10*1c12ee1eSDan Willemsen) 11*1c12ee1eSDan Willemsen 12*1c12ee1eSDan Willemsen// parseNumber reads the given []byte for a valid JSON number. If it is valid, 13*1c12ee1eSDan Willemsen// it returns the number of bytes. Parsing logic follows the definition in 14*1c12ee1eSDan Willemsen// https://tools.ietf.org/html/rfc7159#section-6, and is based off 15*1c12ee1eSDan Willemsen// encoding/json.isValidNumber function. 16*1c12ee1eSDan Willemsenfunc parseNumber(input []byte) (int, bool) { 17*1c12ee1eSDan Willemsen var n int 18*1c12ee1eSDan Willemsen 19*1c12ee1eSDan Willemsen s := input 20*1c12ee1eSDan Willemsen if len(s) == 0 { 21*1c12ee1eSDan Willemsen return 0, false 22*1c12ee1eSDan Willemsen } 23*1c12ee1eSDan Willemsen 24*1c12ee1eSDan Willemsen // Optional - 25*1c12ee1eSDan Willemsen if s[0] == '-' { 26*1c12ee1eSDan Willemsen s = s[1:] 27*1c12ee1eSDan Willemsen n++ 28*1c12ee1eSDan Willemsen if len(s) == 0 { 29*1c12ee1eSDan Willemsen return 0, false 30*1c12ee1eSDan Willemsen } 31*1c12ee1eSDan Willemsen } 32*1c12ee1eSDan Willemsen 33*1c12ee1eSDan Willemsen // Digits 34*1c12ee1eSDan Willemsen switch { 35*1c12ee1eSDan Willemsen case s[0] == '0': 36*1c12ee1eSDan Willemsen s = s[1:] 37*1c12ee1eSDan Willemsen n++ 38*1c12ee1eSDan Willemsen 39*1c12ee1eSDan Willemsen case '1' <= s[0] && s[0] <= '9': 40*1c12ee1eSDan Willemsen s = s[1:] 41*1c12ee1eSDan Willemsen n++ 42*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 43*1c12ee1eSDan Willemsen s = s[1:] 44*1c12ee1eSDan Willemsen n++ 45*1c12ee1eSDan Willemsen } 46*1c12ee1eSDan Willemsen 47*1c12ee1eSDan Willemsen default: 48*1c12ee1eSDan Willemsen return 0, false 49*1c12ee1eSDan Willemsen } 50*1c12ee1eSDan Willemsen 51*1c12ee1eSDan Willemsen // . followed by 1 or more digits. 52*1c12ee1eSDan Willemsen if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { 53*1c12ee1eSDan Willemsen s = s[2:] 54*1c12ee1eSDan Willemsen n += 2 55*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 56*1c12ee1eSDan Willemsen s = s[1:] 57*1c12ee1eSDan Willemsen n++ 58*1c12ee1eSDan Willemsen } 59*1c12ee1eSDan Willemsen } 60*1c12ee1eSDan Willemsen 61*1c12ee1eSDan Willemsen // e or E followed by an optional - or + and 62*1c12ee1eSDan Willemsen // 1 or more digits. 63*1c12ee1eSDan Willemsen if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { 64*1c12ee1eSDan Willemsen s = s[1:] 65*1c12ee1eSDan Willemsen n++ 66*1c12ee1eSDan Willemsen if s[0] == '+' || s[0] == '-' { 67*1c12ee1eSDan Willemsen s = s[1:] 68*1c12ee1eSDan Willemsen n++ 69*1c12ee1eSDan Willemsen if len(s) == 0 { 70*1c12ee1eSDan Willemsen return 0, false 71*1c12ee1eSDan Willemsen } 72*1c12ee1eSDan Willemsen } 73*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 74*1c12ee1eSDan Willemsen s = s[1:] 75*1c12ee1eSDan Willemsen n++ 76*1c12ee1eSDan Willemsen } 77*1c12ee1eSDan Willemsen } 78*1c12ee1eSDan Willemsen 79*1c12ee1eSDan Willemsen // Check that next byte is a delimiter or it is at the end. 80*1c12ee1eSDan Willemsen if n < len(input) && isNotDelim(input[n]) { 81*1c12ee1eSDan Willemsen return 0, false 82*1c12ee1eSDan Willemsen } 83*1c12ee1eSDan Willemsen 84*1c12ee1eSDan Willemsen return n, true 85*1c12ee1eSDan Willemsen} 86*1c12ee1eSDan Willemsen 87*1c12ee1eSDan Willemsen// numberParts is the result of parsing out a valid JSON number. It contains 88*1c12ee1eSDan Willemsen// the parts of a number. The parts are used for integer conversion. 89*1c12ee1eSDan Willemsentype numberParts struct { 90*1c12ee1eSDan Willemsen neg bool 91*1c12ee1eSDan Willemsen intp []byte 92*1c12ee1eSDan Willemsen frac []byte 93*1c12ee1eSDan Willemsen exp []byte 94*1c12ee1eSDan Willemsen} 95*1c12ee1eSDan Willemsen 96*1c12ee1eSDan Willemsen// parseNumber constructs numberParts from given []byte. The logic here is 97*1c12ee1eSDan Willemsen// similar to consumeNumber above with the difference of having to construct 98*1c12ee1eSDan Willemsen// numberParts. The slice fields in numberParts are subslices of the input. 99*1c12ee1eSDan Willemsenfunc parseNumberParts(input []byte) (numberParts, bool) { 100*1c12ee1eSDan Willemsen var neg bool 101*1c12ee1eSDan Willemsen var intp []byte 102*1c12ee1eSDan Willemsen var frac []byte 103*1c12ee1eSDan Willemsen var exp []byte 104*1c12ee1eSDan Willemsen 105*1c12ee1eSDan Willemsen s := input 106*1c12ee1eSDan Willemsen if len(s) == 0 { 107*1c12ee1eSDan Willemsen return numberParts{}, false 108*1c12ee1eSDan Willemsen } 109*1c12ee1eSDan Willemsen 110*1c12ee1eSDan Willemsen // Optional - 111*1c12ee1eSDan Willemsen if s[0] == '-' { 112*1c12ee1eSDan Willemsen neg = true 113*1c12ee1eSDan Willemsen s = s[1:] 114*1c12ee1eSDan Willemsen if len(s) == 0 { 115*1c12ee1eSDan Willemsen return numberParts{}, false 116*1c12ee1eSDan Willemsen } 117*1c12ee1eSDan Willemsen } 118*1c12ee1eSDan Willemsen 119*1c12ee1eSDan Willemsen // Digits 120*1c12ee1eSDan Willemsen switch { 121*1c12ee1eSDan Willemsen case s[0] == '0': 122*1c12ee1eSDan Willemsen // Skip first 0 and no need to store. 123*1c12ee1eSDan Willemsen s = s[1:] 124*1c12ee1eSDan Willemsen 125*1c12ee1eSDan Willemsen case '1' <= s[0] && s[0] <= '9': 126*1c12ee1eSDan Willemsen intp = s 127*1c12ee1eSDan Willemsen n := 1 128*1c12ee1eSDan Willemsen s = s[1:] 129*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 130*1c12ee1eSDan Willemsen s = s[1:] 131*1c12ee1eSDan Willemsen n++ 132*1c12ee1eSDan Willemsen } 133*1c12ee1eSDan Willemsen intp = intp[:n] 134*1c12ee1eSDan Willemsen 135*1c12ee1eSDan Willemsen default: 136*1c12ee1eSDan Willemsen return numberParts{}, false 137*1c12ee1eSDan Willemsen } 138*1c12ee1eSDan Willemsen 139*1c12ee1eSDan Willemsen // . followed by 1 or more digits. 140*1c12ee1eSDan Willemsen if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { 141*1c12ee1eSDan Willemsen frac = s[1:] 142*1c12ee1eSDan Willemsen n := 1 143*1c12ee1eSDan Willemsen s = s[2:] 144*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 145*1c12ee1eSDan Willemsen s = s[1:] 146*1c12ee1eSDan Willemsen n++ 147*1c12ee1eSDan Willemsen } 148*1c12ee1eSDan Willemsen frac = frac[:n] 149*1c12ee1eSDan Willemsen } 150*1c12ee1eSDan Willemsen 151*1c12ee1eSDan Willemsen // e or E followed by an optional - or + and 152*1c12ee1eSDan Willemsen // 1 or more digits. 153*1c12ee1eSDan Willemsen if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { 154*1c12ee1eSDan Willemsen s = s[1:] 155*1c12ee1eSDan Willemsen exp = s 156*1c12ee1eSDan Willemsen n := 0 157*1c12ee1eSDan Willemsen if s[0] == '+' || s[0] == '-' { 158*1c12ee1eSDan Willemsen s = s[1:] 159*1c12ee1eSDan Willemsen n++ 160*1c12ee1eSDan Willemsen if len(s) == 0 { 161*1c12ee1eSDan Willemsen return numberParts{}, false 162*1c12ee1eSDan Willemsen } 163*1c12ee1eSDan Willemsen } 164*1c12ee1eSDan Willemsen for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { 165*1c12ee1eSDan Willemsen s = s[1:] 166*1c12ee1eSDan Willemsen n++ 167*1c12ee1eSDan Willemsen } 168*1c12ee1eSDan Willemsen exp = exp[:n] 169*1c12ee1eSDan Willemsen } 170*1c12ee1eSDan Willemsen 171*1c12ee1eSDan Willemsen return numberParts{ 172*1c12ee1eSDan Willemsen neg: neg, 173*1c12ee1eSDan Willemsen intp: intp, 174*1c12ee1eSDan Willemsen frac: bytes.TrimRight(frac, "0"), // Remove unnecessary 0s to the right. 175*1c12ee1eSDan Willemsen exp: exp, 176*1c12ee1eSDan Willemsen }, true 177*1c12ee1eSDan Willemsen} 178*1c12ee1eSDan Willemsen 179*1c12ee1eSDan Willemsen// normalizeToIntString returns an integer string in normal form without the 180*1c12ee1eSDan Willemsen// E-notation for given numberParts. It will return false if it is not an 181*1c12ee1eSDan Willemsen// integer or if the exponent exceeds than max/min int value. 182*1c12ee1eSDan Willemsenfunc normalizeToIntString(n numberParts) (string, bool) { 183*1c12ee1eSDan Willemsen intpSize := len(n.intp) 184*1c12ee1eSDan Willemsen fracSize := len(n.frac) 185*1c12ee1eSDan Willemsen 186*1c12ee1eSDan Willemsen if intpSize == 0 && fracSize == 0 { 187*1c12ee1eSDan Willemsen return "0", true 188*1c12ee1eSDan Willemsen } 189*1c12ee1eSDan Willemsen 190*1c12ee1eSDan Willemsen var exp int 191*1c12ee1eSDan Willemsen if len(n.exp) > 0 { 192*1c12ee1eSDan Willemsen i, err := strconv.ParseInt(string(n.exp), 10, 32) 193*1c12ee1eSDan Willemsen if err != nil { 194*1c12ee1eSDan Willemsen return "", false 195*1c12ee1eSDan Willemsen } 196*1c12ee1eSDan Willemsen exp = int(i) 197*1c12ee1eSDan Willemsen } 198*1c12ee1eSDan Willemsen 199*1c12ee1eSDan Willemsen var num []byte 200*1c12ee1eSDan Willemsen if exp >= 0 { 201*1c12ee1eSDan Willemsen // For positive E, shift fraction digits into integer part and also pad 202*1c12ee1eSDan Willemsen // with zeroes as needed. 203*1c12ee1eSDan Willemsen 204*1c12ee1eSDan Willemsen // If there are more digits in fraction than the E value, then the 205*1c12ee1eSDan Willemsen // number is not an integer. 206*1c12ee1eSDan Willemsen if fracSize > exp { 207*1c12ee1eSDan Willemsen return "", false 208*1c12ee1eSDan Willemsen } 209*1c12ee1eSDan Willemsen 210*1c12ee1eSDan Willemsen // Make sure resulting digits are within max value limit to avoid 211*1c12ee1eSDan Willemsen // unnecessarily constructing a large byte slice that may simply fail 212*1c12ee1eSDan Willemsen // later on. 213*1c12ee1eSDan Willemsen const maxDigits = 20 // Max uint64 value has 20 decimal digits. 214*1c12ee1eSDan Willemsen if intpSize+exp > maxDigits { 215*1c12ee1eSDan Willemsen return "", false 216*1c12ee1eSDan Willemsen } 217*1c12ee1eSDan Willemsen 218*1c12ee1eSDan Willemsen // Set cap to make a copy of integer part when appended. 219*1c12ee1eSDan Willemsen num = n.intp[:len(n.intp):len(n.intp)] 220*1c12ee1eSDan Willemsen num = append(num, n.frac...) 221*1c12ee1eSDan Willemsen for i := 0; i < exp-fracSize; i++ { 222*1c12ee1eSDan Willemsen num = append(num, '0') 223*1c12ee1eSDan Willemsen } 224*1c12ee1eSDan Willemsen } else { 225*1c12ee1eSDan Willemsen // For negative E, shift digits in integer part out. 226*1c12ee1eSDan Willemsen 227*1c12ee1eSDan Willemsen // If there are fractions, then the number is not an integer. 228*1c12ee1eSDan Willemsen if fracSize > 0 { 229*1c12ee1eSDan Willemsen return "", false 230*1c12ee1eSDan Willemsen } 231*1c12ee1eSDan Willemsen 232*1c12ee1eSDan Willemsen // index is where the decimal point will be after adjusting for negative 233*1c12ee1eSDan Willemsen // exponent. 234*1c12ee1eSDan Willemsen index := intpSize + exp 235*1c12ee1eSDan Willemsen if index < 0 { 236*1c12ee1eSDan Willemsen return "", false 237*1c12ee1eSDan Willemsen } 238*1c12ee1eSDan Willemsen 239*1c12ee1eSDan Willemsen num = n.intp 240*1c12ee1eSDan Willemsen // If any of the digits being shifted to the right of the decimal point 241*1c12ee1eSDan Willemsen // is non-zero, then the number is not an integer. 242*1c12ee1eSDan Willemsen for i := index; i < intpSize; i++ { 243*1c12ee1eSDan Willemsen if num[i] != '0' { 244*1c12ee1eSDan Willemsen return "", false 245*1c12ee1eSDan Willemsen } 246*1c12ee1eSDan Willemsen } 247*1c12ee1eSDan Willemsen num = num[:index] 248*1c12ee1eSDan Willemsen } 249*1c12ee1eSDan Willemsen 250*1c12ee1eSDan Willemsen if n.neg { 251*1c12ee1eSDan Willemsen return "-" + string(num), true 252*1c12ee1eSDan Willemsen } 253*1c12ee1eSDan Willemsen return string(num), true 254*1c12ee1eSDan Willemsen} 255