huffman.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package hpack
  5. import (
  6. "bytes"
  7. "errors"
  8. "io"
  9. "sync"
  10. )
  11. var bufPool = sync.Pool{
  12. New: func() interface{} { return new(bytes.Buffer) },
  13. }
  14. // HuffmanDecode decodes the string in v and writes the expanded
  15. // result to w, returning the number of bytes written to w and the
  16. // Write call's return value. At most one Write call is made.
  17. func HuffmanDecode(w io.Writer, v []byte) (int, error) {
  18. buf := bufPool.Get().(*bytes.Buffer)
  19. buf.Reset()
  20. defer bufPool.Put(buf)
  21. if err := huffmanDecode(buf, 0, v); err != nil {
  22. return 0, err
  23. }
  24. return w.Write(buf.Bytes())
  25. }
  26. // HuffmanDecodeToString decodes the string in v.
  27. func HuffmanDecodeToString(v []byte) (string, error) {
  28. buf := bufPool.Get().(*bytes.Buffer)
  29. buf.Reset()
  30. defer bufPool.Put(buf)
  31. if err := huffmanDecode(buf, 0, v); err != nil {
  32. return "", err
  33. }
  34. return buf.String(), nil
  35. }
  36. // ErrInvalidHuffman is returned for errors found decoding
  37. // Huffman-encoded strings.
  38. var ErrInvalidHuffman = errors.New("hpack: invalid Huffman-encoded data")
  39. // huffmanDecode decodes v to buf.
  40. // If maxLen is greater than 0, attempts to write more to buf than
  41. // maxLen bytes will return ErrStringLength.
  42. func huffmanDecode(buf *bytes.Buffer, maxLen int, v []byte) error {
  43. rootHuffmanNode := getRootHuffmanNode()
  44. n := rootHuffmanNode
  45. // cur is the bit buffer that has not been fed into n.
  46. // cbits is the number of low order bits in cur that are valid.
  47. // sbits is the number of bits of the symbol prefix being decoded.
  48. cur, cbits, sbits := uint(0), uint8(0), uint8(0)
  49. for _, b := range v {
  50. cur = cur<<8 | uint(b)
  51. cbits += 8
  52. sbits += 8
  53. for cbits >= 8 {
  54. idx := byte(cur >> (cbits - 8))
  55. n = n.children[idx]
  56. if n == nil {
  57. return ErrInvalidHuffman
  58. }
  59. if n.children == nil {
  60. if maxLen != 0 && buf.Len() == maxLen {
  61. return ErrStringLength
  62. }
  63. buf.WriteByte(n.sym)
  64. cbits -= n.codeLen
  65. n = rootHuffmanNode
  66. sbits = cbits
  67. } else {
  68. cbits -= 8
  69. }
  70. }
  71. }
  72. for cbits > 0 {
  73. n = n.children[byte(cur<<(8-cbits))]
  74. if n == nil {
  75. return ErrInvalidHuffman
  76. }
  77. if n.children != nil || n.codeLen > cbits {
  78. break
  79. }
  80. if maxLen != 0 && buf.Len() == maxLen {
  81. return ErrStringLength
  82. }
  83. buf.WriteByte(n.sym)
  84. cbits -= n.codeLen
  85. n = rootHuffmanNode
  86. sbits = cbits
  87. }
  88. if sbits > 7 {
  89. // Either there was an incomplete symbol, or overlong padding.
  90. // Both are decoding errors per RFC 7541 section 5.2.
  91. return ErrInvalidHuffman
  92. }
  93. if mask := uint(1<<cbits - 1); cur&mask != mask {
  94. // Trailing bits must be a prefix of EOS per RFC 7541 section 5.2.
  95. return ErrInvalidHuffman
  96. }
  97. return nil
  98. }
  99. // incomparable is a zero-width, non-comparable type. Adding it to a struct
  100. // makes that struct also non-comparable, and generally doesn't add
  101. // any size (as long as it's first).
  102. type incomparable [0]func()
  103. type node struct {
  104. _ incomparable
  105. // children is non-nil for internal nodes
  106. children *[256]*node
  107. // The following are only valid if children is nil:
  108. codeLen uint8 // number of bits that led to the output of sym
  109. sym byte // output symbol
  110. }
  111. func newInternalNode() *node {
  112. return &node{children: new([256]*node)}
  113. }
  114. var (
  115. buildRootOnce sync.Once
  116. lazyRootHuffmanNode *node
  117. )
  118. func getRootHuffmanNode() *node {
  119. buildRootOnce.Do(buildRootHuffmanNode)
  120. return lazyRootHuffmanNode
  121. }
  122. func buildRootHuffmanNode() {
  123. if len(huffmanCodes) != 256 {
  124. panic("unexpected size")
  125. }
  126. lazyRootHuffmanNode = newInternalNode()
  127. // allocate a leaf node for each of the 256 symbols
  128. leaves := new([256]node)
  129. for sym, code := range huffmanCodes {
  130. codeLen := huffmanCodeLen[sym]
  131. cur := lazyRootHuffmanNode
  132. for codeLen > 8 {
  133. codeLen -= 8
  134. i := uint8(code >> codeLen)
  135. if cur.children[i] == nil {
  136. cur.children[i] = newInternalNode()
  137. }
  138. cur = cur.children[i]
  139. }
  140. shift := 8 - codeLen
  141. start, end := int(uint8(code<<shift)), int(1<<shift)
  142. leaves[sym].sym = byte(sym)
  143. leaves[sym].codeLen = codeLen
  144. for i := start; i < start+end; i++ {
  145. cur.children[i] = &leaves[sym]
  146. }
  147. }
  148. }
  149. // AppendHuffmanString appends s, as encoded in Huffman codes, to dst
  150. // and returns the extended buffer.
  151. func AppendHuffmanString(dst []byte, s string) []byte {
  152. // This relies on the maximum huffman code length being 30 (See tables.go huffmanCodeLen array)
  153. // So if a uint64 buffer has less than 32 valid bits can always accommodate another huffmanCode.
  154. var (
  155. x uint64 // buffer
  156. n uint // number valid of bits present in x
  157. )
  158. for i := 0; i < len(s); i++ {
  159. c := s[i]
  160. n += uint(huffmanCodeLen[c])
  161. x <<= huffmanCodeLen[c] % 64
  162. x |= uint64(huffmanCodes[c])
  163. if n >= 32 {
  164. n %= 32 // Normally would be -= 32 but %= 32 informs compiler 0 <= n <= 31 for upcoming shift
  165. y := uint32(x >> n) // Compiler doesn't combine memory writes if y isn't uint32
  166. dst = append(dst, byte(y>>24), byte(y>>16), byte(y>>8), byte(y))
  167. }
  168. }
  169. // Add padding bits if necessary
  170. if over := n % 8; over > 0 {
  171. const (
  172. eosCode = 0x3fffffff
  173. eosNBits = 30
  174. eosPadByte = eosCode >> (eosNBits - 8)
  175. )
  176. pad := 8 - over
  177. x = (x << pad) | (eosPadByte >> over)
  178. n += pad // 8 now divides into n exactly
  179. }
  180. // n in (0, 8, 16, 24, 32)
  181. switch n / 8 {
  182. case 0:
  183. return dst
  184. case 1:
  185. return append(dst, byte(x))
  186. case 2:
  187. y := uint16(x)
  188. return append(dst, byte(y>>8), byte(y))
  189. case 3:
  190. y := uint16(x >> 8)
  191. return append(dst, byte(y>>8), byte(y), byte(x))
  192. }
  193. // case 4:
  194. y := uint32(x)
  195. return append(dst, byte(y>>24), byte(y>>16), byte(y>>8), byte(y))
  196. }
  197. // HuffmanEncodeLength returns the number of bytes required to encode
  198. // s in Huffman codes. The result is round up to byte boundary.
  199. func HuffmanEncodeLength(s string) uint64 {
  200. n := uint64(0)
  201. for i := 0; i < len(s); i++ {
  202. n += uint64(huffmanCodeLen[s[i]])
  203. }
  204. return (n + 7) / 8
  205. }