123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- //go:generate go run gen.go gen_trieval.go gen_ranges.go
- // Package bidi contains functionality for bidirectional text support.
- //
- // See https://www.unicode.org/reports/tr9.
- //
- // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
- // and without notice.
- package bidi // import "golang.org/x/text/unicode/bidi"
- // TODO
- // - Transformer for reordering?
- // - Transformer (validator, really) for Bidi Rule.
- import (
- "bytes"
- )
- // This API tries to avoid dealing with embedding levels for now. Under the hood
- // these will be computed, but the question is to which extent the user should
- // know they exist. We should at some point allow the user to specify an
- // embedding hierarchy, though.
- // A Direction indicates the overall flow of text.
- type Direction int
- const (
- // LeftToRight indicates the text contains no right-to-left characters and
- // that either there are some left-to-right characters or the option
- // DefaultDirection(LeftToRight) was passed.
- LeftToRight Direction = iota
- // RightToLeft indicates the text contains no left-to-right characters and
- // that either there are some right-to-left characters or the option
- // DefaultDirection(RightToLeft) was passed.
- RightToLeft
- // Mixed indicates text contains both left-to-right and right-to-left
- // characters.
- Mixed
- // Neutral means that text contains no left-to-right and right-to-left
- // characters and that no default direction has been set.
- Neutral
- )
- type options struct {
- defaultDirection Direction
- }
- // An Option is an option for Bidi processing.
- type Option func(*options)
- // ICU allows the user to define embedding levels. This may be used, for example,
- // to use hierarchical structure of markup languages to define embeddings.
- // The following option may be a way to expose this functionality in this API.
- // // LevelFunc sets a function that associates nesting levels with the given text.
- // // The levels function will be called with monotonically increasing values for p.
- // func LevelFunc(levels func(p int) int) Option {
- // panic("unimplemented")
- // }
- // DefaultDirection sets the default direction for a Paragraph. The direction is
- // overridden if the text contains directional characters.
- func DefaultDirection(d Direction) Option {
- return func(opts *options) {
- opts.defaultDirection = d
- }
- }
- // A Paragraph holds a single Paragraph for Bidi processing.
- type Paragraph struct {
- p []byte
- o Ordering
- opts []Option
- types []Class
- pairTypes []bracketType
- pairValues []rune
- runes []rune
- options options
- }
- // Initialize the p.pairTypes, p.pairValues and p.types from the input previously
- // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
- // separator (bidi class B).
- //
- // The function p.Order() needs these values to be set, so this preparation could be postponed.
- // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
- // separator, the whole input needs to be processed anyway and should not be done twice.
- //
- // The function has the same return values as SetBytes() / SetString()
- func (p *Paragraph) prepareInput() (n int, err error) {
- p.runes = bytes.Runes(p.p)
- bytecount := 0
- // clear slices from previous SetString or SetBytes
- p.pairTypes = nil
- p.pairValues = nil
- p.types = nil
- for _, r := range p.runes {
- props, i := LookupRune(r)
- bytecount += i
- cls := props.Class()
- if cls == B {
- return bytecount, nil
- }
- p.types = append(p.types, cls)
- if props.IsOpeningBracket() {
- p.pairTypes = append(p.pairTypes, bpOpen)
- p.pairValues = append(p.pairValues, r)
- } else if props.IsBracket() {
- // this must be a closing bracket,
- // since IsOpeningBracket is not true
- p.pairTypes = append(p.pairTypes, bpClose)
- p.pairValues = append(p.pairValues, r)
- } else {
- p.pairTypes = append(p.pairTypes, bpNone)
- p.pairValues = append(p.pairValues, 0)
- }
- }
- return bytecount, nil
- }
- // SetBytes configures p for the given paragraph text. It replaces text
- // previously set by SetBytes or SetString. If b contains a paragraph separator
- // it will only process the first paragraph and report the number of bytes
- // consumed from b including this separator. Error may be non-nil if options are
- // given.
- func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
- p.p = b
- p.opts = opts
- return p.prepareInput()
- }
- // SetString configures s for the given paragraph text. It replaces text
- // previously set by SetBytes or SetString. If s contains a paragraph separator
- // it will only process the first paragraph and report the number of bytes
- // consumed from s including this separator. Error may be non-nil if options are
- // given.
- func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
- p.p = []byte(s)
- p.opts = opts
- return p.prepareInput()
- }
- // IsLeftToRight reports whether the principle direction of rendering for this
- // paragraphs is left-to-right. If this returns false, the principle direction
- // of rendering is right-to-left.
- func (p *Paragraph) IsLeftToRight() bool {
- return p.Direction() == LeftToRight
- }
- // Direction returns the direction of the text of this paragraph.
- //
- // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
- func (p *Paragraph) Direction() Direction {
- return p.o.Direction()
- }
- // TODO: what happens if the position is > len(input)? This should return an error.
- // RunAt reports the Run at the given position of the input text.
- //
- // This method can be used for computing line breaks on paragraphs.
- func (p *Paragraph) RunAt(pos int) Run {
- c := 0
- runNumber := 0
- for i, r := range p.o.runes {
- c += len(r)
- if pos < c {
- runNumber = i
- }
- }
- return p.o.Run(runNumber)
- }
- func calculateOrdering(levels []level, runes []rune) Ordering {
- var curDir Direction
- prevDir := Neutral
- prevI := 0
- o := Ordering{}
- // lvl = 0,2,4,...: left to right
- // lvl = 1,3,5,...: right to left
- for i, lvl := range levels {
- if lvl%2 == 0 {
- curDir = LeftToRight
- } else {
- curDir = RightToLeft
- }
- if curDir != prevDir {
- if i > 0 {
- o.runes = append(o.runes, runes[prevI:i])
- o.directions = append(o.directions, prevDir)
- o.startpos = append(o.startpos, prevI)
- }
- prevI = i
- prevDir = curDir
- }
- }
- o.runes = append(o.runes, runes[prevI:])
- o.directions = append(o.directions, prevDir)
- o.startpos = append(o.startpos, prevI)
- return o
- }
- // Order computes the visual ordering of all the runs in a Paragraph.
- func (p *Paragraph) Order() (Ordering, error) {
- if len(p.types) == 0 {
- return Ordering{}, nil
- }
- for _, fn := range p.opts {
- fn(&p.options)
- }
- lvl := level(-1)
- if p.options.defaultDirection == RightToLeft {
- lvl = 1
- }
- para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
- if err != nil {
- return Ordering{}, err
- }
- levels := para.getLevels([]int{len(p.types)})
- p.o = calculateOrdering(levels, p.runes)
- return p.o, nil
- }
- // Line computes the visual ordering of runs for a single line starting and
- // ending at the given positions in the original text.
- func (p *Paragraph) Line(start, end int) (Ordering, error) {
- lineTypes := p.types[start:end]
- para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
- if err != nil {
- return Ordering{}, err
- }
- levels := para.getLevels([]int{len(lineTypes)})
- o := calculateOrdering(levels, p.runes[start:end])
- return o, nil
- }
- // An Ordering holds the computed visual order of runs of a Paragraph. Calling
- // SetBytes or SetString on the originating Paragraph invalidates an Ordering.
- // The methods of an Ordering should only be called by one goroutine at a time.
- type Ordering struct {
- runes [][]rune
- directions []Direction
- startpos []int
- }
- // Direction reports the directionality of the runs.
- //
- // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
- func (o *Ordering) Direction() Direction {
- return o.directions[0]
- }
- // NumRuns returns the number of runs.
- func (o *Ordering) NumRuns() int {
- return len(o.runes)
- }
- // Run returns the ith run within the ordering.
- func (o *Ordering) Run(i int) Run {
- r := Run{
- runes: o.runes[i],
- direction: o.directions[i],
- startpos: o.startpos[i],
- }
- return r
- }
- // TODO: perhaps with options.
- // // Reorder creates a reader that reads the runes in visual order per character.
- // // Modifiers remain after the runes they modify.
- // func (l *Runs) Reorder() io.Reader {
- // panic("unimplemented")
- // }
- // A Run is a continuous sequence of characters of a single direction.
- type Run struct {
- runes []rune
- direction Direction
- startpos int
- }
- // String returns the text of the run in its original order.
- func (r *Run) String() string {
- return string(r.runes)
- }
- // Bytes returns the text of the run in its original order.
- func (r *Run) Bytes() []byte {
- return []byte(r.String())
- }
- // TODO: methods for
- // - Display order
- // - headers and footers
- // - bracket replacement.
- // Direction reports the direction of the run.
- func (r *Run) Direction() Direction {
- return r.direction
- }
- // Pos returns the position of the Run within the text passed to SetBytes or SetString of the
- // originating Paragraph value.
- func (r *Run) Pos() (start, end int) {
- return r.startpos, r.startpos + len(r.runes) - 1
- }
- // AppendReverse reverses the order of characters of in, appends them to out,
- // and returns the result. Modifiers will still follow the runes they modify.
- // Brackets are replaced with their counterparts.
- func AppendReverse(out, in []byte) []byte {
- ret := make([]byte, len(in)+len(out))
- copy(ret, out)
- inRunes := bytes.Runes(in)
- for i, r := range inRunes {
- prop, _ := LookupRune(r)
- if prop.IsBracket() {
- inRunes[i] = prop.reverseBracket(r)
- }
- }
- for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
- inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
- }
- copy(ret[len(out):], string(inRunes))
- return ret
- }
- // ReverseString reverses the order of characters in s and returns a new string.
- // Modifiers will still follow the runes they modify. Brackets are replaced with
- // their counterparts.
- func ReverseString(s string) string {
- input := []rune(s)
- li := len(input)
- ret := make([]rune, li)
- for i, r := range input {
- prop, _ := LookupRune(r)
- if prop.IsBracket() {
- ret[li-i-1] = prop.reverseBracket(r)
- } else {
- ret[li-i-1] = r
- }
- }
- return string(ret)
- }
|