parser.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. package toml
  2. import (
  3. "bytes"
  4. "unicode"
  5. "github.com/pelletier/go-toml/v2/internal/ast"
  6. "github.com/pelletier/go-toml/v2/internal/danger"
  7. )
  8. type parser struct {
  9. builder ast.Builder
  10. ref ast.Reference
  11. data []byte
  12. left []byte
  13. err error
  14. first bool
  15. }
  16. func (p *parser) Range(b []byte) ast.Range {
  17. return ast.Range{
  18. Offset: uint32(danger.SubsliceOffset(p.data, b)),
  19. Length: uint32(len(b)),
  20. }
  21. }
  22. func (p *parser) Raw(raw ast.Range) []byte {
  23. return p.data[raw.Offset : raw.Offset+raw.Length]
  24. }
  25. func (p *parser) Reset(b []byte) {
  26. p.builder.Reset()
  27. p.ref = ast.InvalidReference
  28. p.data = b
  29. p.left = b
  30. p.err = nil
  31. p.first = true
  32. }
  33. //nolint:cyclop
  34. func (p *parser) NextExpression() bool {
  35. if len(p.left) == 0 || p.err != nil {
  36. return false
  37. }
  38. p.builder.Reset()
  39. p.ref = ast.InvalidReference
  40. for {
  41. if len(p.left) == 0 || p.err != nil {
  42. return false
  43. }
  44. if !p.first {
  45. p.left, p.err = p.parseNewline(p.left)
  46. }
  47. if len(p.left) == 0 || p.err != nil {
  48. return false
  49. }
  50. p.ref, p.left, p.err = p.parseExpression(p.left)
  51. if p.err != nil {
  52. return false
  53. }
  54. p.first = false
  55. if p.ref.Valid() {
  56. return true
  57. }
  58. }
  59. }
  60. func (p *parser) Expression() *ast.Node {
  61. return p.builder.NodeAt(p.ref)
  62. }
  63. func (p *parser) Error() error {
  64. return p.err
  65. }
  66. func (p *parser) parseNewline(b []byte) ([]byte, error) {
  67. if b[0] == '\n' {
  68. return b[1:], nil
  69. }
  70. if b[0] == '\r' {
  71. _, rest, err := scanWindowsNewline(b)
  72. return rest, err
  73. }
  74. return nil, newDecodeError(b[0:1], "expected newline but got %#U", b[0])
  75. }
  76. func (p *parser) parseExpression(b []byte) (ast.Reference, []byte, error) {
  77. // expression = ws [ comment ]
  78. // expression =/ ws keyval ws [ comment ]
  79. // expression =/ ws table ws [ comment ]
  80. ref := ast.InvalidReference
  81. b = p.parseWhitespace(b)
  82. if len(b) == 0 {
  83. return ref, b, nil
  84. }
  85. if b[0] == '#' {
  86. _, rest, err := scanComment(b)
  87. return ref, rest, err
  88. }
  89. if b[0] == '\n' || b[0] == '\r' {
  90. return ref, b, nil
  91. }
  92. var err error
  93. if b[0] == '[' {
  94. ref, b, err = p.parseTable(b)
  95. } else {
  96. ref, b, err = p.parseKeyval(b)
  97. }
  98. if err != nil {
  99. return ref, nil, err
  100. }
  101. b = p.parseWhitespace(b)
  102. if len(b) > 0 && b[0] == '#' {
  103. _, rest, err := scanComment(b)
  104. return ref, rest, err
  105. }
  106. return ref, b, nil
  107. }
  108. func (p *parser) parseTable(b []byte) (ast.Reference, []byte, error) {
  109. // table = std-table / array-table
  110. if len(b) > 1 && b[1] == '[' {
  111. return p.parseArrayTable(b)
  112. }
  113. return p.parseStdTable(b)
  114. }
  115. func (p *parser) parseArrayTable(b []byte) (ast.Reference, []byte, error) {
  116. // array-table = array-table-open key array-table-close
  117. // array-table-open = %x5B.5B ws ; [[ Double left square bracket
  118. // array-table-close = ws %x5D.5D ; ]] Double right square bracket
  119. ref := p.builder.Push(ast.Node{
  120. Kind: ast.ArrayTable,
  121. })
  122. b = b[2:]
  123. b = p.parseWhitespace(b)
  124. k, b, err := p.parseKey(b)
  125. if err != nil {
  126. return ref, nil, err
  127. }
  128. p.builder.AttachChild(ref, k)
  129. b = p.parseWhitespace(b)
  130. b, err = expect(']', b)
  131. if err != nil {
  132. return ref, nil, err
  133. }
  134. b, err = expect(']', b)
  135. return ref, b, err
  136. }
  137. func (p *parser) parseStdTable(b []byte) (ast.Reference, []byte, error) {
  138. // std-table = std-table-open key std-table-close
  139. // std-table-open = %x5B ws ; [ Left square bracket
  140. // std-table-close = ws %x5D ; ] Right square bracket
  141. ref := p.builder.Push(ast.Node{
  142. Kind: ast.Table,
  143. })
  144. b = b[1:]
  145. b = p.parseWhitespace(b)
  146. key, b, err := p.parseKey(b)
  147. if err != nil {
  148. return ref, nil, err
  149. }
  150. p.builder.AttachChild(ref, key)
  151. b = p.parseWhitespace(b)
  152. b, err = expect(']', b)
  153. return ref, b, err
  154. }
  155. func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) {
  156. // keyval = key keyval-sep val
  157. ref := p.builder.Push(ast.Node{
  158. Kind: ast.KeyValue,
  159. })
  160. key, b, err := p.parseKey(b)
  161. if err != nil {
  162. return ast.InvalidReference, nil, err
  163. }
  164. // keyval-sep = ws %x3D ws ; =
  165. b = p.parseWhitespace(b)
  166. if len(b) == 0 {
  167. return ast.InvalidReference, nil, newDecodeError(b, "expected = after a key, but the document ends there")
  168. }
  169. b, err = expect('=', b)
  170. if err != nil {
  171. return ast.InvalidReference, nil, err
  172. }
  173. b = p.parseWhitespace(b)
  174. valRef, b, err := p.parseVal(b)
  175. if err != nil {
  176. return ref, b, err
  177. }
  178. p.builder.Chain(valRef, key)
  179. p.builder.AttachChild(ref, valRef)
  180. return ref, b, err
  181. }
  182. //nolint:cyclop,funlen
  183. func (p *parser) parseVal(b []byte) (ast.Reference, []byte, error) {
  184. // val = string / boolean / array / inline-table / date-time / float / integer
  185. ref := ast.InvalidReference
  186. if len(b) == 0 {
  187. return ref, nil, newDecodeError(b, "expected value, not eof")
  188. }
  189. var err error
  190. c := b[0]
  191. switch c {
  192. case '"':
  193. var raw []byte
  194. var v []byte
  195. if scanFollowsMultilineBasicStringDelimiter(b) {
  196. raw, v, b, err = p.parseMultilineBasicString(b)
  197. } else {
  198. raw, v, b, err = p.parseBasicString(b)
  199. }
  200. if err == nil {
  201. ref = p.builder.Push(ast.Node{
  202. Kind: ast.String,
  203. Raw: p.Range(raw),
  204. Data: v,
  205. })
  206. }
  207. return ref, b, err
  208. case '\'':
  209. var raw []byte
  210. var v []byte
  211. if scanFollowsMultilineLiteralStringDelimiter(b) {
  212. raw, v, b, err = p.parseMultilineLiteralString(b)
  213. } else {
  214. raw, v, b, err = p.parseLiteralString(b)
  215. }
  216. if err == nil {
  217. ref = p.builder.Push(ast.Node{
  218. Kind: ast.String,
  219. Raw: p.Range(raw),
  220. Data: v,
  221. })
  222. }
  223. return ref, b, err
  224. case 't':
  225. if !scanFollowsTrue(b) {
  226. return ref, nil, newDecodeError(atmost(b, 4), "expected 'true'")
  227. }
  228. ref = p.builder.Push(ast.Node{
  229. Kind: ast.Bool,
  230. Data: b[:4],
  231. })
  232. return ref, b[4:], nil
  233. case 'f':
  234. if !scanFollowsFalse(b) {
  235. return ref, nil, newDecodeError(atmost(b, 5), "expected 'false'")
  236. }
  237. ref = p.builder.Push(ast.Node{
  238. Kind: ast.Bool,
  239. Data: b[:5],
  240. })
  241. return ref, b[5:], nil
  242. case '[':
  243. return p.parseValArray(b)
  244. case '{':
  245. return p.parseInlineTable(b)
  246. default:
  247. return p.parseIntOrFloatOrDateTime(b)
  248. }
  249. }
  250. func atmost(b []byte, n int) []byte {
  251. if n >= len(b) {
  252. return b
  253. }
  254. return b[:n]
  255. }
  256. func (p *parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
  257. v, rest, err := scanLiteralString(b)
  258. if err != nil {
  259. return nil, nil, nil, err
  260. }
  261. return v, v[1 : len(v)-1], rest, nil
  262. }
  263. func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
  264. // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
  265. // inline-table-open = %x7B ws ; {
  266. // inline-table-close = ws %x7D ; }
  267. // inline-table-sep = ws %x2C ws ; , Comma
  268. // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
  269. parent := p.builder.Push(ast.Node{
  270. Kind: ast.InlineTable,
  271. })
  272. first := true
  273. var child ast.Reference
  274. b = b[1:]
  275. var err error
  276. for len(b) > 0 {
  277. previousB := b
  278. b = p.parseWhitespace(b)
  279. if len(b) == 0 {
  280. return parent, nil, newDecodeError(previousB[:1], "inline table is incomplete")
  281. }
  282. if b[0] == '}' {
  283. break
  284. }
  285. if !first {
  286. b, err = expect(',', b)
  287. if err != nil {
  288. return parent, nil, err
  289. }
  290. b = p.parseWhitespace(b)
  291. }
  292. var kv ast.Reference
  293. kv, b, err = p.parseKeyval(b)
  294. if err != nil {
  295. return parent, nil, err
  296. }
  297. if first {
  298. p.builder.AttachChild(parent, kv)
  299. } else {
  300. p.builder.Chain(child, kv)
  301. }
  302. child = kv
  303. first = false
  304. }
  305. rest, err := expect('}', b)
  306. return parent, rest, err
  307. }
  308. //nolint:funlen,cyclop
  309. func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
  310. // array = array-open [ array-values ] ws-comment-newline array-close
  311. // array-open = %x5B ; [
  312. // array-close = %x5D ; ]
  313. // array-values = ws-comment-newline val ws-comment-newline array-sep array-values
  314. // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
  315. // array-sep = %x2C ; , Comma
  316. // ws-comment-newline = *( wschar / [ comment ] newline )
  317. arrayStart := b
  318. b = b[1:]
  319. parent := p.builder.Push(ast.Node{
  320. Kind: ast.Array,
  321. })
  322. first := true
  323. var lastChild ast.Reference
  324. var err error
  325. for len(b) > 0 {
  326. b, err = p.parseOptionalWhitespaceCommentNewline(b)
  327. if err != nil {
  328. return parent, nil, err
  329. }
  330. if len(b) == 0 {
  331. return parent, nil, newDecodeError(arrayStart[:1], "array is incomplete")
  332. }
  333. if b[0] == ']' {
  334. break
  335. }
  336. if b[0] == ',' {
  337. if first {
  338. return parent, nil, newDecodeError(b[0:1], "array cannot start with comma")
  339. }
  340. b = b[1:]
  341. b, err = p.parseOptionalWhitespaceCommentNewline(b)
  342. if err != nil {
  343. return parent, nil, err
  344. }
  345. } else if !first {
  346. return parent, nil, newDecodeError(b[0:1], "array elements must be separated by commas")
  347. }
  348. // TOML allows trailing commas in arrays.
  349. if len(b) > 0 && b[0] == ']' {
  350. break
  351. }
  352. var valueRef ast.Reference
  353. valueRef, b, err = p.parseVal(b)
  354. if err != nil {
  355. return parent, nil, err
  356. }
  357. if first {
  358. p.builder.AttachChild(parent, valueRef)
  359. } else {
  360. p.builder.Chain(lastChild, valueRef)
  361. }
  362. lastChild = valueRef
  363. b, err = p.parseOptionalWhitespaceCommentNewline(b)
  364. if err != nil {
  365. return parent, nil, err
  366. }
  367. first = false
  368. }
  369. rest, err := expect(']', b)
  370. return parent, rest, err
  371. }
  372. func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
  373. for len(b) > 0 {
  374. var err error
  375. b = p.parseWhitespace(b)
  376. if len(b) > 0 && b[0] == '#' {
  377. _, b, err = scanComment(b)
  378. if err != nil {
  379. return nil, err
  380. }
  381. }
  382. if len(b) == 0 {
  383. break
  384. }
  385. if b[0] == '\n' || b[0] == '\r' {
  386. b, err = p.parseNewline(b)
  387. if err != nil {
  388. return nil, err
  389. }
  390. } else {
  391. break
  392. }
  393. }
  394. return b, nil
  395. }
  396. func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
  397. token, rest, err := scanMultilineLiteralString(b)
  398. if err != nil {
  399. return nil, nil, nil, err
  400. }
  401. i := 3
  402. // skip the immediate new line
  403. if token[i] == '\n' {
  404. i++
  405. } else if token[i] == '\r' && token[i+1] == '\n' {
  406. i += 2
  407. }
  408. return token, token[i : len(token)-3], rest, err
  409. }
  410. //nolint:funlen,gocognit,cyclop
  411. func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
  412. // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
  413. // ml-basic-string-delim
  414. // ml-basic-string-delim = 3quotation-mark
  415. // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
  416. //
  417. // mlb-content = mlb-char / newline / mlb-escaped-nl
  418. // mlb-char = mlb-unescaped / escaped
  419. // mlb-quotes = 1*2quotation-mark
  420. // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
  421. // mlb-escaped-nl = escape ws newline *( wschar / newline )
  422. token, escaped, rest, err := scanMultilineBasicString(b)
  423. if err != nil {
  424. return nil, nil, nil, err
  425. }
  426. i := 3
  427. // skip the immediate new line
  428. if token[i] == '\n' {
  429. i++
  430. } else if token[i] == '\r' && token[i+1] == '\n' {
  431. i += 2
  432. }
  433. // fast path
  434. startIdx := i
  435. endIdx := len(token) - len(`"""`)
  436. if !escaped {
  437. str := token[startIdx:endIdx]
  438. verr := utf8TomlValidAlreadyEscaped(str)
  439. if verr.Zero() {
  440. return token, str, rest, nil
  441. }
  442. return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
  443. }
  444. var builder bytes.Buffer
  445. // The scanner ensures that the token starts and ends with quotes and that
  446. // escapes are balanced.
  447. for i < len(token)-3 {
  448. c := token[i]
  449. //nolint:nestif
  450. if c == '\\' {
  451. // When the last non-whitespace character on a line is an unescaped \,
  452. // it will be trimmed along with all whitespace (including newlines) up
  453. // to the next non-whitespace character or closing delimiter.
  454. isLastNonWhitespaceOnLine := false
  455. j := 1
  456. findEOLLoop:
  457. for ; j < len(token)-3-i; j++ {
  458. switch token[i+j] {
  459. case ' ', '\t':
  460. continue
  461. case '\r':
  462. if token[i+j+1] == '\n' {
  463. continue
  464. }
  465. case '\n':
  466. isLastNonWhitespaceOnLine = true
  467. }
  468. break findEOLLoop
  469. }
  470. if isLastNonWhitespaceOnLine {
  471. i += j
  472. for ; i < len(token)-3; i++ {
  473. c := token[i]
  474. if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
  475. i--
  476. break
  477. }
  478. }
  479. i++
  480. continue
  481. }
  482. // handle escaping
  483. i++
  484. c = token[i]
  485. switch c {
  486. case '"', '\\':
  487. builder.WriteByte(c)
  488. case 'b':
  489. builder.WriteByte('\b')
  490. case 'f':
  491. builder.WriteByte('\f')
  492. case 'n':
  493. builder.WriteByte('\n')
  494. case 'r':
  495. builder.WriteByte('\r')
  496. case 't':
  497. builder.WriteByte('\t')
  498. case 'e':
  499. builder.WriteByte(0x1B)
  500. case 'u':
  501. x, err := hexToRune(atmost(token[i+1:], 4), 4)
  502. if err != nil {
  503. return nil, nil, nil, err
  504. }
  505. builder.WriteRune(x)
  506. i += 4
  507. case 'U':
  508. x, err := hexToRune(atmost(token[i+1:], 8), 8)
  509. if err != nil {
  510. return nil, nil, nil, err
  511. }
  512. builder.WriteRune(x)
  513. i += 8
  514. default:
  515. return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
  516. }
  517. i++
  518. } else {
  519. size := utf8ValidNext(token[i:])
  520. if size == 0 {
  521. return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c)
  522. }
  523. builder.Write(token[i : i+size])
  524. i += size
  525. }
  526. }
  527. return token, builder.Bytes(), rest, nil
  528. }
  529. func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
  530. // key = simple-key / dotted-key
  531. // simple-key = quoted-key / unquoted-key
  532. //
  533. // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
  534. // quoted-key = basic-string / literal-string
  535. // dotted-key = simple-key 1*( dot-sep simple-key )
  536. //
  537. // dot-sep = ws %x2E ws ; . Period
  538. raw, key, b, err := p.parseSimpleKey(b)
  539. if err != nil {
  540. return ast.InvalidReference, nil, err
  541. }
  542. ref := p.builder.Push(ast.Node{
  543. Kind: ast.Key,
  544. Raw: p.Range(raw),
  545. Data: key,
  546. })
  547. for {
  548. b = p.parseWhitespace(b)
  549. if len(b) > 0 && b[0] == '.' {
  550. b = p.parseWhitespace(b[1:])
  551. raw, key, b, err = p.parseSimpleKey(b)
  552. if err != nil {
  553. return ref, nil, err
  554. }
  555. p.builder.PushAndChain(ast.Node{
  556. Kind: ast.Key,
  557. Raw: p.Range(raw),
  558. Data: key,
  559. })
  560. } else {
  561. break
  562. }
  563. }
  564. return ref, b, nil
  565. }
  566. func (p *parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
  567. if len(b) == 0 {
  568. return nil, nil, nil, newDecodeError(b, "expected key but found none")
  569. }
  570. // simple-key = quoted-key / unquoted-key
  571. // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
  572. // quoted-key = basic-string / literal-string
  573. switch {
  574. case b[0] == '\'':
  575. return p.parseLiteralString(b)
  576. case b[0] == '"':
  577. return p.parseBasicString(b)
  578. case isUnquotedKeyChar(b[0]):
  579. key, rest = scanUnquotedKey(b)
  580. return key, key, rest, nil
  581. default:
  582. return nil, nil, nil, newDecodeError(b[0:1], "invalid character at start of key: %c", b[0])
  583. }
  584. }
  585. //nolint:funlen,cyclop
  586. func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
  587. // basic-string = quotation-mark *basic-char quotation-mark
  588. // quotation-mark = %x22 ; "
  589. // basic-char = basic-unescaped / escaped
  590. // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
  591. // escaped = escape escape-seq-char
  592. // escape-seq-char = %x22 ; " quotation mark U+0022
  593. // escape-seq-char =/ %x5C ; \ reverse solidus U+005C
  594. // escape-seq-char =/ %x62 ; b backspace U+0008
  595. // escape-seq-char =/ %x66 ; f form feed U+000C
  596. // escape-seq-char =/ %x6E ; n line feed U+000A
  597. // escape-seq-char =/ %x72 ; r carriage return U+000D
  598. // escape-seq-char =/ %x74 ; t tab U+0009
  599. // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
  600. // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
  601. token, escaped, rest, err := scanBasicString(b)
  602. if err != nil {
  603. return nil, nil, nil, err
  604. }
  605. startIdx := len(`"`)
  606. endIdx := len(token) - len(`"`)
  607. // Fast path. If there is no escape sequence, the string should just be
  608. // an UTF-8 encoded string, which is the same as Go. In that case,
  609. // validate the string and return a direct reference to the buffer.
  610. if !escaped {
  611. str := token[startIdx:endIdx]
  612. verr := utf8TomlValidAlreadyEscaped(str)
  613. if verr.Zero() {
  614. return token, str, rest, nil
  615. }
  616. return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
  617. }
  618. i := startIdx
  619. var builder bytes.Buffer
  620. // The scanner ensures that the token starts and ends with quotes and that
  621. // escapes are balanced.
  622. for i < len(token)-1 {
  623. c := token[i]
  624. if c == '\\' {
  625. i++
  626. c = token[i]
  627. switch c {
  628. case '"', '\\':
  629. builder.WriteByte(c)
  630. case 'b':
  631. builder.WriteByte('\b')
  632. case 'f':
  633. builder.WriteByte('\f')
  634. case 'n':
  635. builder.WriteByte('\n')
  636. case 'r':
  637. builder.WriteByte('\r')
  638. case 't':
  639. builder.WriteByte('\t')
  640. case 'e':
  641. builder.WriteByte(0x1B)
  642. case 'u':
  643. x, err := hexToRune(token[i+1:len(token)-1], 4)
  644. if err != nil {
  645. return nil, nil, nil, err
  646. }
  647. builder.WriteRune(x)
  648. i += 4
  649. case 'U':
  650. x, err := hexToRune(token[i+1:len(token)-1], 8)
  651. if err != nil {
  652. return nil, nil, nil, err
  653. }
  654. builder.WriteRune(x)
  655. i += 8
  656. default:
  657. return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
  658. }
  659. i++
  660. } else {
  661. size := utf8ValidNext(token[i:])
  662. if size == 0 {
  663. return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c)
  664. }
  665. builder.Write(token[i : i+size])
  666. i += size
  667. }
  668. }
  669. return token, builder.Bytes(), rest, nil
  670. }
  671. func hexToRune(b []byte, length int) (rune, error) {
  672. if len(b) < length {
  673. return -1, newDecodeError(b, "unicode point needs %d character, not %d", length, len(b))
  674. }
  675. b = b[:length]
  676. var r uint32
  677. for i, c := range b {
  678. d := uint32(0)
  679. switch {
  680. case '0' <= c && c <= '9':
  681. d = uint32(c - '0')
  682. case 'a' <= c && c <= 'f':
  683. d = uint32(c - 'a' + 10)
  684. case 'A' <= c && c <= 'F':
  685. d = uint32(c - 'A' + 10)
  686. default:
  687. return -1, newDecodeError(b[i:i+1], "non-hex character")
  688. }
  689. r = r*16 + d
  690. }
  691. if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
  692. return -1, newDecodeError(b, "escape sequence is invalid Unicode code point")
  693. }
  694. return rune(r), nil
  695. }
  696. func (p *parser) parseWhitespace(b []byte) []byte {
  697. // ws = *wschar
  698. // wschar = %x20 ; Space
  699. // wschar =/ %x09 ; Horizontal tab
  700. _, rest := scanWhitespace(b)
  701. return rest
  702. }
  703. //nolint:cyclop
  704. func (p *parser) parseIntOrFloatOrDateTime(b []byte) (ast.Reference, []byte, error) {
  705. switch b[0] {
  706. case 'i':
  707. if !scanFollowsInf(b) {
  708. return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'inf'")
  709. }
  710. return p.builder.Push(ast.Node{
  711. Kind: ast.Float,
  712. Data: b[:3],
  713. }), b[3:], nil
  714. case 'n':
  715. if !scanFollowsNan(b) {
  716. return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'nan'")
  717. }
  718. return p.builder.Push(ast.Node{
  719. Kind: ast.Float,
  720. Data: b[:3],
  721. }), b[3:], nil
  722. case '+', '-':
  723. return p.scanIntOrFloat(b)
  724. }
  725. if len(b) < 3 {
  726. return p.scanIntOrFloat(b)
  727. }
  728. s := 5
  729. if len(b) < s {
  730. s = len(b)
  731. }
  732. for idx, c := range b[:s] {
  733. if isDigit(c) {
  734. continue
  735. }
  736. if idx == 2 && c == ':' || (idx == 4 && c == '-') {
  737. return p.scanDateTime(b)
  738. }
  739. break
  740. }
  741. return p.scanIntOrFloat(b)
  742. }
  743. func (p *parser) scanDateTime(b []byte) (ast.Reference, []byte, error) {
  744. // scans for contiguous characters in [0-9T:Z.+-], and up to one space if
  745. // followed by a digit.
  746. hasDate := false
  747. hasTime := false
  748. hasTz := false
  749. seenSpace := false
  750. i := 0
  751. byteLoop:
  752. for ; i < len(b); i++ {
  753. c := b[i]
  754. switch {
  755. case isDigit(c):
  756. case c == '-':
  757. hasDate = true
  758. const minOffsetOfTz = 8
  759. if i >= minOffsetOfTz {
  760. hasTz = true
  761. }
  762. case c == 'T' || c == 't' || c == ':' || c == '.':
  763. hasTime = true
  764. case c == '+' || c == '-' || c == 'Z' || c == 'z':
  765. hasTz = true
  766. case c == ' ':
  767. if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
  768. i += 2
  769. // Avoid reaching past the end of the document in case the time
  770. // is malformed. See TestIssue585.
  771. if i >= len(b) {
  772. i--
  773. }
  774. seenSpace = true
  775. hasTime = true
  776. } else {
  777. break byteLoop
  778. }
  779. default:
  780. break byteLoop
  781. }
  782. }
  783. var kind ast.Kind
  784. if hasTime {
  785. if hasDate {
  786. if hasTz {
  787. kind = ast.DateTime
  788. } else {
  789. kind = ast.LocalDateTime
  790. }
  791. } else {
  792. kind = ast.LocalTime
  793. }
  794. } else {
  795. kind = ast.LocalDate
  796. }
  797. return p.builder.Push(ast.Node{
  798. Kind: kind,
  799. Data: b[:i],
  800. }), b[i:], nil
  801. }
  802. //nolint:funlen,gocognit,cyclop
  803. func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
  804. i := 0
  805. if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
  806. var isValidRune validRuneFn
  807. switch b[1] {
  808. case 'x':
  809. isValidRune = isValidHexRune
  810. case 'o':
  811. isValidRune = isValidOctalRune
  812. case 'b':
  813. isValidRune = isValidBinaryRune
  814. default:
  815. i++
  816. }
  817. if isValidRune != nil {
  818. i += 2
  819. for ; i < len(b); i++ {
  820. if !isValidRune(b[i]) {
  821. break
  822. }
  823. }
  824. }
  825. return p.builder.Push(ast.Node{
  826. Kind: ast.Integer,
  827. Data: b[:i],
  828. }), b[i:], nil
  829. }
  830. isFloat := false
  831. for ; i < len(b); i++ {
  832. c := b[i]
  833. if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
  834. continue
  835. }
  836. if c == '.' || c == 'e' || c == 'E' {
  837. isFloat = true
  838. continue
  839. }
  840. if c == 'i' {
  841. if scanFollowsInf(b[i:]) {
  842. return p.builder.Push(ast.Node{
  843. Kind: ast.Float,
  844. Data: b[:i+3],
  845. }), b[i+3:], nil
  846. }
  847. return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'i' while scanning for a number")
  848. }
  849. if c == 'n' {
  850. if scanFollowsNan(b[i:]) {
  851. return p.builder.Push(ast.Node{
  852. Kind: ast.Float,
  853. Data: b[:i+3],
  854. }), b[i+3:], nil
  855. }
  856. return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'n' while scanning for a number")
  857. }
  858. break
  859. }
  860. if i == 0 {
  861. return ast.InvalidReference, b, newDecodeError(b, "incomplete number")
  862. }
  863. kind := ast.Integer
  864. if isFloat {
  865. kind = ast.Float
  866. }
  867. return p.builder.Push(ast.Node{
  868. Kind: kind,
  869. Data: b[:i],
  870. }), b[i:], nil
  871. }
  872. func isDigit(r byte) bool {
  873. return r >= '0' && r <= '9'
  874. }
  875. type validRuneFn func(r byte) bool
  876. func isValidHexRune(r byte) bool {
  877. return r >= 'a' && r <= 'f' ||
  878. r >= 'A' && r <= 'F' ||
  879. r >= '0' && r <= '9' ||
  880. r == '_'
  881. }
  882. func isValidOctalRune(r byte) bool {
  883. return r >= '0' && r <= '7' || r == '_'
  884. }
  885. func isValidBinaryRune(r byte) bool {
  886. return r == '0' || r == '1' || r == '_'
  887. }
  888. func expect(x byte, b []byte) ([]byte, error) {
  889. if len(b) == 0 {
  890. return nil, newDecodeError(b, "expected character %c but the document ended here", x)
  891. }
  892. if b[0] != x {
  893. return nil, newDecodeError(b[0:1], "expected character %c", x)
  894. }
  895. return b[1:], nil
  896. }