decimal.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a MIT license found in the LICENSE file.
  3. package codec
  4. import (
  5. "math"
  6. "strconv"
  7. )
  8. // Per go spec, floats are represented in memory as
  9. // IEEE single or double precision floating point values.
  10. //
  11. // We also looked at the source for stdlib math/modf.go,
  12. // reviewed https://github.com/chewxy/math32
  13. // and read wikipedia documents describing the formats.
  14. //
  15. // It became clear that we could easily look at the bits to determine
  16. // whether any fraction exists.
  17. func parseFloat32(b []byte) (f float32, err error) {
  18. return parseFloat32_custom(b)
  19. }
  20. func parseFloat64(b []byte) (f float64, err error) {
  21. return parseFloat64_custom(b)
  22. }
  23. func parseFloat32_strconv(b []byte) (f float32, err error) {
  24. f64, err := strconv.ParseFloat(stringView(b), 32)
  25. f = float32(f64)
  26. return
  27. }
  28. func parseFloat64_strconv(b []byte) (f float64, err error) {
  29. return strconv.ParseFloat(stringView(b), 64)
  30. }
  31. // ------ parseFloat custom below --------
  32. // JSON really supports decimal numbers in base 10 notation, with exponent support.
  33. //
  34. // We assume the following:
  35. // - a lot of floating point numbers in json files will have defined precision
  36. // (in terms of number of digits after decimal point), etc.
  37. // - these (referenced above) can be written in exact format.
  38. //
  39. // strconv.ParseFloat has some unnecessary overhead which we can do without
  40. // for the common case:
  41. //
  42. // - expensive char-by-char check to see if underscores are in right place
  43. // - testing for and skipping underscores
  44. // - check if the string matches ignorecase +/- inf, +/- infinity, nan
  45. // - support for base 16 (0xFFFF...)
  46. //
  47. // The functions below will try a fast-path for floats which can be decoded
  48. // without any loss of precision, meaning they:
  49. //
  50. // - fits within the significand bits of the 32-bits or 64-bits
  51. // - exponent fits within the exponent value
  52. // - there is no truncation (any extra numbers are all trailing zeros)
  53. //
  54. // To figure out what the values are for maxMantDigits, use this idea below:
  55. //
  56. // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
  57. // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
  58. // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
  59. // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
  60. //
  61. // Note: we only allow for up to what can comfortably fit into the significand
  62. // ignoring the exponent, and we only try to parse iff significand fits.
  63. const (
  64. fMaxMultiplierForExactPow10_64 = 1e15
  65. fMaxMultiplierForExactPow10_32 = 1e7
  66. fUint64Cutoff = (1<<64-1)/10 + 1
  67. // fUint32Cutoff = (1<<32-1)/10 + 1
  68. fBase = 10
  69. )
  70. const (
  71. thousand = 1000
  72. million = thousand * thousand
  73. billion = thousand * million
  74. trillion = thousand * billion
  75. quadrillion = thousand * trillion
  76. quintillion = thousand * quadrillion
  77. )
  78. // Exact powers of 10.
  79. var uint64pow10 = [...]uint64{
  80. 1, 10, 100,
  81. 1 * thousand, 10 * thousand, 100 * thousand,
  82. 1 * million, 10 * million, 100 * million,
  83. 1 * billion, 10 * billion, 100 * billion,
  84. 1 * trillion, 10 * trillion, 100 * trillion,
  85. 1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
  86. 1 * quintillion, 10 * quintillion,
  87. }
  88. var float64pow10 = [...]float64{
  89. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
  90. 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
  91. 1e20, 1e21, 1e22,
  92. }
  93. var float32pow10 = [...]float32{
  94. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
  95. }
  96. type floatinfo struct {
  97. mantbits uint8
  98. // expbits uint8 // (unused)
  99. // bias int16 // (unused)
  100. // is32bit bool // (unused)
  101. exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
  102. exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0)
  103. // maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
  104. mantCutoffIsUint64Cutoff bool
  105. mantCutoff uint64
  106. }
  107. var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1}
  108. var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1}
  109. var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff}
  110. func noFrac64(fbits uint64) bool {
  111. if fbits == 0 {
  112. return true
  113. }
  114. exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias
  115. // clear top 12+e bits, the integer part; if the rest is 0, then no fraction.
  116. return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part
  117. }
  118. func noFrac32(fbits uint32) bool {
  119. if fbits == 0 {
  120. return true
  121. }
  122. exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias
  123. // clear top 9+e bits, the integer part; if the rest is 0, then no fraction.
  124. return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part
  125. }
  126. func strconvParseErr(b []byte, fn string) error {
  127. return &strconv.NumError{
  128. Func: fn,
  129. Err: strconv.ErrSyntax,
  130. Num: string(b),
  131. }
  132. }
  133. func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
  134. f = float32(r.mantissa)
  135. if r.exp == 0 {
  136. } else if r.exp < 0 { // int / 10^k
  137. f /= float32pow10[uint8(-r.exp)]
  138. } else { // exp > 0
  139. if r.exp > fi32.exactPow10 {
  140. f *= float32pow10[r.exp-fi32.exactPow10]
  141. if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range
  142. fail = true
  143. return // ok = false
  144. }
  145. f *= float32pow10[fi32.exactPow10]
  146. } else {
  147. f *= float32pow10[uint8(r.exp)]
  148. }
  149. }
  150. if r.neg {
  151. f = -f
  152. }
  153. return
  154. }
  155. func parseFloat32_custom(b []byte) (f float32, err error) {
  156. r := readFloat(b, fi32)
  157. if r.bad {
  158. return 0, strconvParseErr(b, "ParseFloat")
  159. }
  160. if r.ok {
  161. f, r.bad = parseFloat32_reader(r)
  162. if !r.bad {
  163. return
  164. }
  165. }
  166. return parseFloat32_strconv(b)
  167. }
  168. func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
  169. f = float64(r.mantissa)
  170. if r.exp == 0 {
  171. } else if r.exp < 0 { // int / 10^k
  172. f /= float64pow10[-uint8(r.exp)]
  173. } else { // exp > 0
  174. if r.exp > fi64.exactPow10 {
  175. f *= float64pow10[r.exp-fi64.exactPow10]
  176. if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range
  177. fail = true
  178. return
  179. }
  180. f *= float64pow10[fi64.exactPow10]
  181. } else {
  182. f *= float64pow10[uint8(r.exp)]
  183. }
  184. }
  185. if r.neg {
  186. f = -f
  187. }
  188. return
  189. }
  190. func parseFloat64_custom(b []byte) (f float64, err error) {
  191. r := readFloat(b, fi64)
  192. if r.bad {
  193. return 0, strconvParseErr(b, "ParseFloat")
  194. }
  195. if r.ok {
  196. f, r.bad = parseFloat64_reader(r)
  197. if !r.bad {
  198. return
  199. }
  200. }
  201. return parseFloat64_strconv(b)
  202. }
  203. func parseUint64_simple(b []byte) (n uint64, ok bool) {
  204. var i int
  205. var n1 uint64
  206. var c uint8
  207. LOOP:
  208. if i < len(b) {
  209. c = b[i]
  210. // unsigned integers don't overflow well on multiplication, so check cutoff here
  211. // e.g. (maxUint64-5)*10 doesn't overflow well ...
  212. // if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' {
  213. if n >= fUint64Cutoff || c < '0' || c > '9' {
  214. return
  215. } else if c == '0' {
  216. n *= fBase
  217. } else {
  218. n1 = n
  219. n = n*fBase + uint64(c-'0')
  220. if n < n1 {
  221. return
  222. }
  223. }
  224. i++
  225. goto LOOP
  226. }
  227. ok = true
  228. return
  229. }
  230. func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
  231. f = r.mantissa
  232. if r.exp == 0 {
  233. } else if r.exp < 0 { // int / 10^k
  234. if f%uint64pow10[uint8(-r.exp)] != 0 {
  235. fail = true
  236. } else {
  237. f /= uint64pow10[uint8(-r.exp)]
  238. }
  239. } else { // exp > 0
  240. f *= uint64pow10[uint8(r.exp)]
  241. }
  242. return
  243. }
  244. func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) {
  245. if len(b) == 0 {
  246. ok = true
  247. return
  248. }
  249. if b[0] == '-' {
  250. if len(b) == 1 {
  251. return
  252. }
  253. neg = true
  254. b = b[1:]
  255. }
  256. u, ok = parseUint64_simple(b)
  257. if ok {
  258. return
  259. }
  260. r := readFloat(b, fi64u)
  261. if r.ok {
  262. var fail bool
  263. u, fail = parseUint64_reader(r)
  264. if fail {
  265. f, err := parseFloat64(b)
  266. if err != nil {
  267. return
  268. }
  269. if !noFrac64(math.Float64bits(f)) {
  270. return
  271. }
  272. u = uint64(f)
  273. }
  274. ok = true
  275. return
  276. }
  277. return
  278. }
  279. // parseNumber will return an integer if only composed of [-]?[0-9]+
  280. // Else it will return a float.
  281. func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
  282. var ok, neg bool
  283. var f uint64
  284. if len(b) == 0 {
  285. return
  286. }
  287. if b[0] == '-' {
  288. neg = true
  289. f, ok = parseUint64_simple(b[1:])
  290. } else {
  291. f, ok = parseUint64_simple(b)
  292. }
  293. if ok {
  294. if neg {
  295. z.v = valueTypeInt
  296. if chkOvf.Uint2Int(f, neg) {
  297. return strconvParseErr(b, "ParseInt")
  298. }
  299. z.i = -int64(f)
  300. } else if preferSignedInt {
  301. z.v = valueTypeInt
  302. if chkOvf.Uint2Int(f, neg) {
  303. return strconvParseErr(b, "ParseInt")
  304. }
  305. z.i = int64(f)
  306. } else {
  307. z.v = valueTypeUint
  308. z.u = f
  309. }
  310. return
  311. }
  312. z.v = valueTypeFloat
  313. z.f, err = parseFloat64_custom(b)
  314. return
  315. }
  316. type readFloatResult struct {
  317. mantissa uint64
  318. exp int8
  319. neg bool
  320. trunc bool
  321. bad bool // bad decimal string
  322. hardexp bool // exponent is hard to handle (> 2 digits, etc)
  323. ok bool
  324. // sawdot bool
  325. // sawexp bool
  326. //_ [2]bool // padding
  327. }
  328. func readFloat(s []byte, y floatinfo) (r readFloatResult) {
  329. var i uint // uint, so that we eliminate bounds checking
  330. var slen = uint(len(s))
  331. if slen == 0 {
  332. // read an empty string as the zero value
  333. // r.bad = true
  334. r.ok = true
  335. return
  336. }
  337. if s[0] == '-' {
  338. r.neg = true
  339. i++
  340. }
  341. // we considered punting early if string has length > maxMantDigits, but this doesn't account
  342. // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
  343. var nd, ndMant, dp int8
  344. var sawdot, sawexp bool
  345. var xu uint64
  346. LOOP:
  347. for ; i < slen; i++ {
  348. switch s[i] {
  349. case '.':
  350. if sawdot {
  351. r.bad = true
  352. return
  353. }
  354. sawdot = true
  355. dp = nd
  356. case 'e', 'E':
  357. sawexp = true
  358. break LOOP
  359. case '0':
  360. if nd == 0 {
  361. dp--
  362. continue LOOP
  363. }
  364. nd++
  365. if r.mantissa < y.mantCutoff {
  366. r.mantissa *= fBase
  367. ndMant++
  368. }
  369. case '1', '2', '3', '4', '5', '6', '7', '8', '9':
  370. nd++
  371. if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff {
  372. r.mantissa *= fBase
  373. xu = r.mantissa + uint64(s[i]-'0')
  374. if xu < r.mantissa {
  375. r.trunc = true
  376. return
  377. }
  378. r.mantissa = xu
  379. } else if r.mantissa < y.mantCutoff {
  380. // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
  381. r.mantissa = r.mantissa*fBase + uint64(s[i]-'0')
  382. } else {
  383. r.trunc = true
  384. return
  385. }
  386. ndMant++
  387. default:
  388. r.bad = true
  389. return
  390. }
  391. }
  392. if !sawdot {
  393. dp = nd
  394. }
  395. if sawexp {
  396. i++
  397. if i < slen {
  398. var eneg bool
  399. if s[i] == '+' {
  400. i++
  401. } else if s[i] == '-' {
  402. i++
  403. eneg = true
  404. }
  405. if i < slen {
  406. // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
  407. // exit quick if exponent is more than 2 digits.
  408. if i+2 < slen {
  409. r.hardexp = true
  410. return
  411. }
  412. var e int8
  413. if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
  414. r.bad = true
  415. return
  416. }
  417. e = int8(s[i] - '0')
  418. i++
  419. if i < slen {
  420. if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
  421. r.bad = true
  422. return
  423. }
  424. e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
  425. i++
  426. }
  427. if eneg {
  428. dp -= e
  429. } else {
  430. dp += e
  431. }
  432. }
  433. }
  434. }
  435. if r.mantissa != 0 {
  436. r.exp = dp - ndMant
  437. // do not set ok=true for cases we cannot handle
  438. if r.exp < -y.exactPow10 ||
  439. r.exp > y.exactInts+y.exactPow10 ||
  440. (y.mantbits != 0 && r.mantissa>>y.mantbits != 0) {
  441. r.hardexp = true
  442. return
  443. }
  444. }
  445. r.ok = true
  446. return
  447. }