123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818 |
- // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
- // Use of this source code is governed by a MIT license found in the LICENSE file.
- package codec
- import "io"
- // decReader abstracts the reading source, allowing implementations that can
- // read from an io.Reader or directly off a byte slice with zero-copying.
- type decReader interface {
- // readx will return a view of the []byte if decoding from a []byte, OR
- // read into the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR
- // create a new []byte and read into that
- readx(n uint) []byte
- readb([]byte)
- readn1() byte
- readn2() [2]byte
- // readn3 will read 3 bytes into the top-most elements of a 4-byte array
- readn3() [4]byte
- readn4() [4]byte
- readn8() [8]byte
- // readn1eof() (v uint8, eof bool)
- // // read up to 8 bytes at a time
- // readn(num uint8) (v [8]byte)
- numread() uint // number of bytes read
- // skip any whitespace characters, and return the first non-matching byte
- skipWhitespace() (token byte)
- // jsonReadNum will include last read byte in first element of slice,
- // and continue numeric characters until it sees a non-numeric char
- // or EOF. If it sees a non-numeric character, it will unread that.
- jsonReadNum() []byte
- // jsonReadAsisChars will read json plain characters (anything but " or \)
- // and return a slice terminated by a non-json asis character.
- jsonReadAsisChars() []byte
- // skip will skip any byte that matches, and return the first non-matching byte
- // skip(accept *bitset256) (token byte)
- // readTo will read any byte that matches, stopping once no-longer matching.
- // readTo(accept *bitset256) (out []byte)
- // readUntil will read, only stopping once it matches the 'stop' byte (which it excludes).
- readUntil(stop byte) (out []byte)
- }
- // ------------------------------------------------
- type unreadByteStatus uint8
- // unreadByteStatus goes from
- // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ...
- const (
- unreadByteUndefined unreadByteStatus = iota
- unreadByteCanRead
- unreadByteCanUnread
- )
- // --------------------
- type ioDecReaderCommon struct {
- r io.Reader // the reader passed in
- n uint // num read
- l byte // last byte
- ls unreadByteStatus // last byte status
- b [6]byte // tiny buffer for reading single bytes
- blist *bytesFreelist
- bufr []byte // buffer for readTo/readUntil
- }
- func (z *ioDecReaderCommon) reset(r io.Reader, blist *bytesFreelist) {
- z.blist = blist
- z.r = r
- z.ls = unreadByteUndefined
- z.l, z.n = 0, 0
- z.bufr = z.blist.check(z.bufr, 256)
- }
- func (z *ioDecReaderCommon) numread() uint {
- return z.n
- }
- // ------------------------------------------
- // ioDecReader is a decReader that reads off an io.Reader.
- //
- // It also has a fallback implementation of ByteScanner if needed.
- type ioDecReader struct {
- ioDecReaderCommon
- br io.ByteScanner
- x [64 + 48]byte // for: get struct field name, swallow valueTypeBytes, etc
- }
- func (z *ioDecReader) reset(r io.Reader, blist *bytesFreelist) {
- z.ioDecReaderCommon.reset(r, blist)
- z.br, _ = r.(io.ByteScanner)
- }
- func (z *ioDecReader) Read(p []byte) (n int, err error) {
- if len(p) == 0 {
- return
- }
- var firstByte bool
- if z.ls == unreadByteCanRead {
- z.ls = unreadByteCanUnread
- p[0] = z.l
- if len(p) == 1 {
- n = 1
- return
- }
- firstByte = true
- p = p[1:]
- }
- n, err = z.r.Read(p)
- if n > 0 {
- if err == io.EOF && n == len(p) {
- err = nil // read was successful, so postpone EOF (till next time)
- }
- z.l = p[n-1]
- z.ls = unreadByteCanUnread
- }
- if firstByte {
- n++
- }
- return
- }
- func (z *ioDecReader) ReadByte() (c byte, err error) {
- if z.br != nil {
- c, err = z.br.ReadByte()
- if err == nil {
- z.l = c
- z.ls = unreadByteCanUnread
- }
- return
- }
- n, err := z.Read(z.b[:1])
- if n == 1 {
- c = z.b[0]
- if err == io.EOF {
- err = nil // read was successful, so postpone EOF (till next time)
- }
- }
- return
- }
- func (z *ioDecReader) UnreadByte() (err error) {
- if z.br != nil {
- err = z.br.UnreadByte()
- if err == nil {
- z.ls = unreadByteCanRead
- }
- return
- }
- switch z.ls {
- case unreadByteCanUnread:
- z.ls = unreadByteCanRead
- case unreadByteCanRead:
- err = errDecUnreadByteLastByteNotRead
- case unreadByteUndefined:
- err = errDecUnreadByteNothingToRead
- default:
- err = errDecUnreadByteUnknown
- }
- return
- }
- func (z *ioDecReader) readn2() (bs [2]byte) {
- z.readb(bs[:])
- return
- }
- func (z *ioDecReader) readn3() (bs [4]byte) {
- z.readb(bs[1:])
- return
- }
- func (z *ioDecReader) readn4() (bs [4]byte) {
- z.readb(bs[:])
- return
- }
- func (z *ioDecReader) readn8() (bs [8]byte) {
- z.readb(bs[:])
- return
- }
- func (z *ioDecReader) readx(n uint) (bs []byte) {
- if n == 0 {
- return
- }
- if n < uint(len(z.x)) {
- bs = z.x[:n]
- } else {
- bs = make([]byte, n)
- }
- _, err := readFull(z.r, bs)
- halt.onerror(err)
- z.n += uint(len(bs))
- return
- }
- func (z *ioDecReader) readb(bs []byte) {
- if len(bs) == 0 {
- return
- }
- _, err := readFull(z.r, bs)
- halt.onerror(err)
- z.n += uint(len(bs))
- }
- func (z *ioDecReader) readn1() (b uint8) {
- b, err := z.ReadByte()
- halt.onerror(err)
- z.n++
- return
- }
- func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
- b, err := z.ReadByte()
- if err == nil {
- z.n++
- } else if err == io.EOF {
- eof = true
- } else {
- halt.onerror(err)
- }
- return
- }
- func (z *ioDecReader) jsonReadNum() (bs []byte) {
- z.unreadn1()
- z.bufr = z.bufr[:0]
- LOOP:
- i, eof := z.readn1eof()
- if eof {
- return z.bufr
- }
- if isNumberChar(i) {
- z.bufr = append(z.bufr, i)
- goto LOOP
- }
- z.unreadn1()
- return z.bufr
- }
- func (z *ioDecReader) jsonReadAsisChars() (bs []byte) {
- z.bufr = z.bufr[:0]
- LOOP:
- i := z.readn1()
- z.bufr = append(z.bufr, i)
- if i == '"' || i == '\\' {
- return z.bufr
- }
- goto LOOP
- }
- func (z *ioDecReader) skipWhitespace() (token byte) {
- LOOP:
- token = z.readn1()
- if isWhitespaceChar(token) {
- goto LOOP
- }
- return
- }
- func (z *ioDecReader) readUntil(stop byte) []byte {
- z.bufr = z.bufr[:0]
- LOOP:
- token := z.readn1()
- z.bufr = append(z.bufr, token)
- if token == stop {
- return z.bufr[:len(z.bufr)-1]
- }
- goto LOOP
- }
- func (z *ioDecReader) unreadn1() {
- err := z.UnreadByte()
- halt.onerror(err)
- z.n--
- }
- // ------------------------------------
- type bufioDecReader struct {
- ioDecReaderCommon
- c uint // cursor
- buf []byte
- }
- func (z *bufioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) {
- z.ioDecReaderCommon.reset(r, blist)
- z.c = 0
- if cap(z.buf) < bufsize {
- z.buf = blist.get(bufsize)
- } else {
- z.buf = z.buf[:0]
- }
- }
- func (z *bufioDecReader) readb(p []byte) {
- var n = uint(copy(p, z.buf[z.c:]))
- z.n += n
- z.c += n
- if len(p) != int(n) {
- z.readbFill(p, n, true, false)
- }
- }
- func readbFillHandleErr(err error, must, eof bool) (isEOF bool) {
- if err == io.EOF {
- isEOF = true
- }
- if must && !(eof && isEOF) {
- halt.onerror(err)
- }
- return
- }
- func (z *bufioDecReader) readbFill(p0 []byte, n uint, must, eof bool) (isEOF bool, err error) {
- // at this point, there's nothing in z.buf to read (z.buf is fully consumed)
- var p []byte
- if p0 != nil {
- p = p0[n:]
- }
- var n2 uint
- if len(p) > cap(z.buf) {
- n2, err = readFull(z.r, p)
- if err != nil {
- isEOF = readbFillHandleErr(err, must, eof)
- return
- }
- n += n2
- z.n += n2
- // always keep last byte in z.buf
- z.buf = z.buf[:1]
- z.buf[0] = p[len(p)-1]
- z.c = 1
- return
- }
- // z.c is now 0, and len(p) <= cap(z.buf)
- var n1 int
- LOOP:
- // for len(p) > 0 && z.err == nil {
- z.buf = z.buf[0:cap(z.buf)]
- n1, err = z.r.Read(z.buf)
- n2 = uint(n1)
- if n2 == 0 && err != nil {
- isEOF = readbFillHandleErr(err, must, eof)
- return
- }
- err = nil
- z.buf = z.buf[:n2]
- z.c = 0
- if len(p) > 0 {
- n2 = uint(copy(p, z.buf))
- z.c = n2
- n += n2
- z.n += n2
- p = p[n2:]
- if len(p) > 0 {
- goto LOOP
- }
- if z.c == 0 {
- z.buf = z.buf[:1]
- z.buf[0] = p[len(p)-1]
- z.c = 1
- }
- }
- return
- }
- func (z *bufioDecReader) readn1() (b byte) {
- if z.c >= uint(len(z.buf)) {
- z.readbFill(nil, 0, true, false)
- }
- b = z.buf[z.c]
- z.c++
- z.n++
- return
- }
- func (z *bufioDecReader) readn1eof() (b byte, eof bool) {
- if z.c >= uint(len(z.buf)) {
- eof, _ = z.readbFill(nil, 0, true, true)
- if eof {
- return
- }
- }
- b = z.buf[z.c]
- z.c++
- z.n++
- return
- }
- func (z *bufioDecReader) unreadn1() {
- if z.c == 0 {
- halt.onerror(errDecUnreadByteNothingToRead)
- }
- z.c--
- z.n--
- }
- func (z *bufioDecReader) readn2() (bs [2]byte) {
- z.readb(bs[:])
- return
- }
- func (z *bufioDecReader) readn3() (bs [4]byte) {
- z.readb(bs[1:])
- return
- }
- func (z *bufioDecReader) readn4() (bs [4]byte) {
- z.readb(bs[:])
- return
- }
- func (z *bufioDecReader) readn8() (bs [8]byte) {
- z.readb(bs[:])
- return
- }
- func (z *bufioDecReader) readx(n uint) (bs []byte) {
- if n == 0 {
- // return
- } else if z.c+n <= uint(len(z.buf)) {
- bs = z.buf[z.c : z.c+n]
- z.n += n
- z.c += n
- } else {
- bs = make([]byte, n)
- // n no longer used - can reuse
- n = uint(copy(bs, z.buf[z.c:]))
- z.n += n
- z.c += n
- z.readbFill(bs, n, true, false)
- }
- return
- }
- func (z *bufioDecReader) jsonReadNum() (bs []byte) {
- z.unreadn1()
- z.bufr = z.bufr[:0]
- LOOP:
- i, eof := z.readn1eof()
- if eof {
- return z.bufr
- }
- if isNumberChar(i) {
- z.bufr = append(z.bufr, i)
- goto LOOP
- }
- z.unreadn1()
- return z.bufr
- }
- func (z *bufioDecReader) jsonReadAsisChars() (bs []byte) {
- z.bufr = z.bufr[:0]
- LOOP:
- i := z.readn1()
- z.bufr = append(z.bufr, i)
- if i == '"' || i == '\\' {
- return z.bufr
- }
- goto LOOP
- }
- func (z *bufioDecReader) skipWhitespace() (token byte) {
- i := z.c
- LOOP:
- if i < uint(len(z.buf)) {
- // inline z.skipLoopFn(i) and refactor, so cost is within inline budget
- token = z.buf[i]
- i++
- if isWhitespaceChar(token) {
- goto LOOP
- }
- z.n += i - 2 - z.c
- z.c = i
- return
- }
- return z.skipFillWhitespace()
- }
- func (z *bufioDecReader) skipFillWhitespace() (token byte) {
- z.n += uint(len(z.buf)) - z.c
- var i, n2 int
- var err error
- for {
- z.c = 0
- z.buf = z.buf[0:cap(z.buf)]
- n2, err = z.r.Read(z.buf)
- if n2 == 0 {
- halt.onerror(err)
- }
- z.buf = z.buf[:n2]
- for i, token = range z.buf {
- if !isWhitespaceChar(token) {
- z.n += (uint(i) - z.c) - 1
- z.loopFn(uint(i + 1))
- return
- }
- }
- z.n += uint(n2)
- }
- }
- func (z *bufioDecReader) loopFn(i uint) {
- z.c = i
- }
- func (z *bufioDecReader) readUntil(stop byte) (out []byte) {
- i := z.c
- LOOP:
- if i < uint(len(z.buf)) {
- if z.buf[i] == stop {
- z.n += (i - z.c) - 1
- i++
- out = z.buf[z.c:i]
- z.c = i
- goto FINISH
- }
- i++
- goto LOOP
- }
- out = z.readUntilFill(stop)
- FINISH:
- return out[:len(out)-1]
- }
- func (z *bufioDecReader) readUntilFill(stop byte) []byte {
- z.bufr = z.bufr[:0]
- z.n += uint(len(z.buf)) - z.c
- z.bufr = append(z.bufr, z.buf[z.c:]...)
- for {
- z.c = 0
- z.buf = z.buf[0:cap(z.buf)]
- n1, err := z.r.Read(z.buf)
- if n1 == 0 {
- halt.onerror(err)
- }
- n2 := uint(n1)
- z.buf = z.buf[:n2]
- for i, token := range z.buf {
- if token == stop {
- z.n += (uint(i) - z.c) - 1
- z.bufr = append(z.bufr, z.buf[z.c:i+1]...)
- z.loopFn(uint(i + 1))
- return z.bufr
- }
- }
- z.bufr = append(z.bufr, z.buf...)
- z.n += n2
- }
- }
- // ------------------------------------
- // bytesDecReader is a decReader that reads off a byte slice with zero copying
- //
- // Note: we do not try to convert index'ing out of bounds to an io.EOF.
- // instead, we let it bubble up to the exported Encode/Decode method
- // and recover it as an io.EOF.
- //
- // see panicValToErr(...) function in helper.go.
- type bytesDecReader struct {
- b []byte // data
- c uint // cursor
- }
- func (z *bytesDecReader) reset(in []byte) {
- z.b = in[:len(in):len(in)] // reslicing must not go past capacity
- z.c = 0
- }
- func (z *bytesDecReader) numread() uint {
- return z.c
- }
- // Note: slicing from a non-constant start position is more expensive,
- // as more computation is required to decipher the pointer start position.
- // However, we do it only once, and it's better than reslicing both z.b and return value.
- func (z *bytesDecReader) readx(n uint) (bs []byte) {
- x := z.c + n
- bs = z.b[z.c:x]
- z.c = x
- return
- }
- func (z *bytesDecReader) readb(bs []byte) {
- copy(bs, z.readx(uint(len(bs))))
- }
- // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown)
- // func (z *bytesDecReader) readnn(bs []byte, n uint) {
- // x := z.c
- // copy(bs, z.b[x:x+n])
- // z.c += n
- // }
- // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) {
- // x := z.c + uint(num)
- // copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past
- // z.c = x
- // return
- // }
- // func (z *bytesDecReader) readn1() uint8 {
- // z.c++
- // return z.b[z.c-1]
- // }
- func (z *bytesDecReader) readn1() (v uint8) {
- v = z.b[z.c]
- z.c++
- return
- }
- // MARKER: for readn{2,3,4,8}, ensure you slice z.b completely so we get bounds error if past end.
- func (z *bytesDecReader) readn2() (bs [2]byte) {
- // copy(bs[:], z.b[z.c:z.c+2])
- bs[1] = z.b[z.c+1]
- bs[0] = z.b[z.c]
- z.c += 2
- return
- }
- func (z *bytesDecReader) readn3() (bs [4]byte) {
- // copy(bs[1:], z.b[z.c:z.c+3])
- bs = okBytes3(z.b[z.c : z.c+3])
- z.c += 3
- return
- }
- func (z *bytesDecReader) readn4() (bs [4]byte) {
- // copy(bs[:], z.b[z.c:z.c+4])
- bs = okBytes4(z.b[z.c : z.c+4])
- z.c += 4
- return
- }
- func (z *bytesDecReader) readn8() (bs [8]byte) {
- // copy(bs[:], z.b[z.c:z.c+8])
- bs = okBytes8(z.b[z.c : z.c+8])
- z.c += 8
- return
- }
- func (z *bytesDecReader) jsonReadNum() []byte {
- z.c--
- i := z.c
- LOOP:
- if i < uint(len(z.b)) && isNumberChar(z.b[i]) {
- i++
- goto LOOP
- }
- z.c, i = i, z.c
- return z.b[i:z.c]
- }
- func (z *bytesDecReader) jsonReadAsisChars() []byte {
- i := z.c
- LOOP:
- token := z.b[i]
- i++
- if token == '"' || token == '\\' {
- z.c, i = i, z.c
- return z.b[i:z.c]
- }
- goto LOOP
- }
- func (z *bytesDecReader) skipWhitespace() (token byte) {
- i := z.c
- LOOP:
- token = z.b[i]
- if isWhitespaceChar(token) {
- i++
- goto LOOP
- }
- z.c = i + 1
- return
- }
- func (z *bytesDecReader) readUntil(stop byte) (out []byte) {
- i := z.c
- LOOP:
- if z.b[i] == stop {
- out = z.b[z.c:i]
- z.c = i + 1
- return
- }
- i++
- goto LOOP
- }
- // --------------
- type decRd struct {
- mtr bool // is maptype a known type?
- str bool // is slicetype a known type?
- be bool // is binary encoding
- js bool // is json handle
- jsms bool // is json handle, and MapKeyAsString
- cbor bool // is cbor handle
- bytes bool // is bytes reader
- bufio bool // is this a bufioDecReader?
- rb bytesDecReader
- ri *ioDecReader
- bi *bufioDecReader
- decReader
- }
- // From out benchmarking, we see the following in terms of performance:
- //
- // - interface calls
- // - branch that can inline what it calls
- //
- // the if/else-if/else block is expensive to inline.
- // Each node of this construct costs a lot and dominates the budget.
- // Best to only do an if fast-path else block (so fast-path is inlined).
- // This is irrespective of inlineExtraCallCost set in $GOROOT/src/cmd/compile/internal/gc/inl.go
- //
- // In decRd methods below, we delegate all IO functions into their own methods.
- // This allows for the inlining of the common path when z.bytes=true.
- // Go 1.12+ supports inlining methods with up to 1 inlined function (or 2 if no other constructs).
- //
- // However, up through Go 1.13, decRd's readXXX, skip and unreadXXX methods are not inlined.
- // Consequently, there is no benefit to do the xxxIO methods for decRd at this time.
- // Instead, we have a if/else-if/else block so that IO calls do not have to jump through
- // a second unnecessary function call.
- //
- // If golang inlining gets better and bytesDecReader methods can be inlined,
- // then we can revert to using these 2 functions so the bytesDecReader
- // methods are inlined and the IO paths call out to a function.
- //
- // decRd is designed to embed a decReader, and then re-implement some of the decReader
- // methods using a conditional branch. We only override the ones that have a bytes version
- // that is small enough to be inlined. We use ./run.sh -z to check.
- // Right now, only numread and readn1 can be inlined.
- func (z *decRd) numread() uint {
- if z.bytes {
- return z.rb.numread()
- } else if z.bufio {
- return z.bi.numread()
- } else {
- return z.ri.numread()
- }
- }
- func (z *decRd) readn1() (v uint8) {
- if z.bytes {
- // MARKER: manually inline, else this function is not inlined.
- // Keep in sync with bytesDecReader.readn1
- // return z.rb.readn1()
- v = z.rb.b[z.rb.c]
- z.rb.c++
- } else {
- v = z.readn1IO()
- }
- return
- }
- func (z *decRd) readn1IO() uint8 {
- if z.bufio {
- return z.bi.readn1()
- }
- return z.ri.readn1()
- }
- type devNullReader struct{}
- func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF }
- func (devNullReader) Close() error { return nil }
- func readFull(r io.Reader, bs []byte) (n uint, err error) {
- var nn int
- for n < uint(len(bs)) && err == nil {
- nn, err = r.Read(bs[n:])
- if nn > 0 {
- if err == io.EOF {
- // leave EOF for next time
- err = nil
- }
- n += uint(nn)
- }
- }
- // do not do this below - it serves no purpose
- // if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF }
- return
- }
- var _ decReader = (*decRd)(nil)
|