scanner.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. package toml
  2. func scanFollows(b []byte, pattern string) bool {
  3. n := len(pattern)
  4. return len(b) >= n && string(b[:n]) == pattern
  5. }
  6. func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
  7. return scanFollows(b, `"""`)
  8. }
  9. func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
  10. return scanFollows(b, `'''`)
  11. }
  12. func scanFollowsTrue(b []byte) bool {
  13. return scanFollows(b, `true`)
  14. }
  15. func scanFollowsFalse(b []byte) bool {
  16. return scanFollows(b, `false`)
  17. }
  18. func scanFollowsInf(b []byte) bool {
  19. return scanFollows(b, `inf`)
  20. }
  21. func scanFollowsNan(b []byte) bool {
  22. return scanFollows(b, `nan`)
  23. }
  24. func scanUnquotedKey(b []byte) ([]byte, []byte) {
  25. // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
  26. for i := 0; i < len(b); i++ {
  27. if !isUnquotedKeyChar(b[i]) {
  28. return b[:i], b[i:]
  29. }
  30. }
  31. return b, b[len(b):]
  32. }
  33. func isUnquotedKeyChar(r byte) bool {
  34. return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
  35. }
  36. func scanLiteralString(b []byte) ([]byte, []byte, error) {
  37. // literal-string = apostrophe *literal-char apostrophe
  38. // apostrophe = %x27 ; ' apostrophe
  39. // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
  40. for i := 1; i < len(b); {
  41. switch b[i] {
  42. case '\'':
  43. return b[:i+1], b[i+1:], nil
  44. case '\n', '\r':
  45. return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines")
  46. }
  47. size := utf8ValidNext(b[i:])
  48. if size == 0 {
  49. return nil, nil, newDecodeError(b[i:i+1], "invalid character")
  50. }
  51. i += size
  52. }
  53. return nil, nil, newDecodeError(b[len(b):], "unterminated literal string")
  54. }
  55. func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
  56. // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
  57. // ml-literal-string-delim
  58. // ml-literal-string-delim = 3apostrophe
  59. // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
  60. //
  61. // mll-content = mll-char / newline
  62. // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
  63. // mll-quotes = 1*2apostrophe
  64. for i := 3; i < len(b); {
  65. switch b[i] {
  66. case '\'':
  67. if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
  68. i += 3
  69. // At that point we found 3 apostrophe, and i is the
  70. // index of the byte after the third one. The scanner
  71. // needs to be eager, because there can be an extra 2
  72. // apostrophe that can be accepted at the end of the
  73. // string.
  74. if i >= len(b) || b[i] != '\'' {
  75. return b[:i], b[i:], nil
  76. }
  77. i++
  78. if i >= len(b) || b[i] != '\'' {
  79. return b[:i], b[i:], nil
  80. }
  81. i++
  82. if i < len(b) && b[i] == '\'' {
  83. return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
  84. }
  85. return b[:i], b[i:], nil
  86. }
  87. case '\r':
  88. if len(b) < i+2 {
  89. return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`)
  90. }
  91. if b[i+1] != '\n' {
  92. return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
  93. }
  94. i += 2 // skip the \n
  95. continue
  96. }
  97. size := utf8ValidNext(b[i:])
  98. if size == 0 {
  99. return nil, nil, newDecodeError(b[i:i+1], "invalid character")
  100. }
  101. i += size
  102. }
  103. return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
  104. }
  105. func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
  106. const lenCRLF = 2
  107. if len(b) < lenCRLF {
  108. return nil, nil, newDecodeError(b, "windows new line expected")
  109. }
  110. if b[1] != '\n' {
  111. return nil, nil, newDecodeError(b, `windows new line should be \r\n`)
  112. }
  113. return b[:lenCRLF], b[lenCRLF:], nil
  114. }
  115. func scanWhitespace(b []byte) ([]byte, []byte) {
  116. for i := 0; i < len(b); i++ {
  117. switch b[i] {
  118. case ' ', '\t':
  119. continue
  120. default:
  121. return b[:i], b[i:]
  122. }
  123. }
  124. return b, b[len(b):]
  125. }
  126. //nolint:unparam
  127. func scanComment(b []byte) ([]byte, []byte, error) {
  128. // comment-start-symbol = %x23 ; #
  129. // non-ascii = %x80-D7FF / %xE000-10FFFF
  130. // non-eol = %x09 / %x20-7F / non-ascii
  131. //
  132. // comment = comment-start-symbol *non-eol
  133. for i := 1; i < len(b); {
  134. if b[i] == '\n' {
  135. return b[:i], b[i:], nil
  136. }
  137. if b[i] == '\r' {
  138. if i+1 < len(b) && b[i+1] == '\n' {
  139. return b[:i+1], b[i+1:], nil
  140. }
  141. return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment")
  142. }
  143. size := utf8ValidNext(b[i:])
  144. if size == 0 {
  145. return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment")
  146. }
  147. i += size
  148. }
  149. return b, b[len(b):], nil
  150. }
  151. func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
  152. // basic-string = quotation-mark *basic-char quotation-mark
  153. // quotation-mark = %x22 ; "
  154. // basic-char = basic-unescaped / escaped
  155. // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
  156. // escaped = escape escape-seq-char
  157. escaped := false
  158. i := 1
  159. for ; i < len(b); i++ {
  160. switch b[i] {
  161. case '"':
  162. return b[:i+1], escaped, b[i+1:], nil
  163. case '\n', '\r':
  164. return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
  165. case '\\':
  166. if len(b) < i+2 {
  167. return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
  168. }
  169. escaped = true
  170. i++ // skip the next character
  171. }
  172. }
  173. return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
  174. }
  175. func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
  176. // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
  177. // ml-basic-string-delim
  178. // ml-basic-string-delim = 3quotation-mark
  179. // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
  180. //
  181. // mlb-content = mlb-char / newline / mlb-escaped-nl
  182. // mlb-char = mlb-unescaped / escaped
  183. // mlb-quotes = 1*2quotation-mark
  184. // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
  185. // mlb-escaped-nl = escape ws newline *( wschar / newline )
  186. escaped := false
  187. i := 3
  188. for ; i < len(b); i++ {
  189. switch b[i] {
  190. case '"':
  191. if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
  192. i += 3
  193. // At that point we found 3 apostrophe, and i is the
  194. // index of the byte after the third one. The scanner
  195. // needs to be eager, because there can be an extra 2
  196. // apostrophe that can be accepted at the end of the
  197. // string.
  198. if i >= len(b) || b[i] != '"' {
  199. return b[:i], escaped, b[i:], nil
  200. }
  201. i++
  202. if i >= len(b) || b[i] != '"' {
  203. return b[:i], escaped, b[i:], nil
  204. }
  205. i++
  206. if i < len(b) && b[i] == '"' {
  207. return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`)
  208. }
  209. return b[:i], escaped, b[i:], nil
  210. }
  211. case '\\':
  212. if len(b) < i+2 {
  213. return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
  214. }
  215. escaped = true
  216. i++ // skip the next character
  217. case '\r':
  218. if len(b) < i+2 {
  219. return nil, escaped, nil, newDecodeError(b[len(b):], `need a \n after \r`)
  220. }
  221. if b[i+1] != '\n' {
  222. return nil, escaped, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
  223. }
  224. i++ // skip the \n
  225. }
  226. }
  227. return nil, escaped, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
  228. }