summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/message/pipeline/extract.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/message/pipeline/extract.go')
-rw-r--r--vendor/golang.org/x/text/message/pipeline/extract.go814
1 files changed, 814 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/message/pipeline/extract.go b/vendor/golang.org/x/text/message/pipeline/extract.go
new file mode 100644
index 0000000..39b3dd5
--- /dev/null
+++ b/vendor/golang.org/x/text/message/pipeline/extract.go
@@ -0,0 +1,814 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pipeline
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "go/ast"
+ "go/constant"
+ "go/format"
+ "go/token"
+ "go/types"
+ "path/filepath"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+
+ fmtparser "golang.org/x/text/internal/format"
+ "golang.org/x/tools/go/callgraph"
+ "golang.org/x/tools/go/callgraph/cha"
+ "golang.org/x/tools/go/loader"
+ "golang.org/x/tools/go/ssa"
+ "golang.org/x/tools/go/ssa/ssautil"
+)
+
+const debug = false
+
+// TODO:
+// - merge information into existing files
+// - handle different file formats (PO, XLIFF)
+// - handle features (gender, plural)
+// - message rewriting
+
+// - `msg:"etc"` tags
+
+// Extract extracts all strings form the package defined in Config.
+func Extract(c *Config) (*State, error) {
+ x, err := newExtracter(c)
+ if err != nil {
+ return nil, wrap(err, "")
+ }
+
+ if err := x.seedEndpoints(); err != nil {
+ return nil, err
+ }
+ x.extractMessages()
+
+ return &State{
+ Config: *c,
+ program: x.iprog,
+ Extracted: Messages{
+ Language: c.SourceLanguage,
+ Messages: x.messages,
+ },
+ }, nil
+}
+
+type extracter struct {
+ conf loader.Config
+ iprog *loader.Program
+ prog *ssa.Program
+ callGraph *callgraph.Graph
+
+ // Calls and other expressions to collect.
+ globals map[token.Pos]*constData
+ funcs map[token.Pos]*callData
+ messages []Message
+}
+
+func newExtracter(c *Config) (x *extracter, err error) {
+ x = &extracter{
+ conf: loader.Config{},
+ globals: map[token.Pos]*constData{},
+ funcs: map[token.Pos]*callData{},
+ }
+
+ x.iprog, err = loadPackages(&x.conf, c.Packages)
+ if err != nil {
+ return nil, wrap(err, "")
+ }
+
+ x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits)
+ x.prog.Build()
+
+ x.callGraph = cha.CallGraph(x.prog)
+
+ return x, nil
+}
+
+func (x *extracter) globalData(pos token.Pos) *constData {
+ cd := x.globals[pos]
+ if cd == nil {
+ cd = &constData{}
+ x.globals[pos] = cd
+ }
+ return cd
+}
+
+func (x *extracter) seedEndpoints() error {
+ pkgInfo := x.iprog.Package("golang.org/x/text/message")
+ if pkgInfo == nil {
+ return errors.New("pipeline: golang.org/x/text/message is not imported")
+ }
+ pkg := x.prog.Package(pkgInfo.Pkg)
+ typ := types.NewPointer(pkg.Type("Printer").Type())
+
+ x.processGlobalVars()
+
+ x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{
+ formatPos: 1,
+ argPos: 2,
+ isMethod: true,
+ })
+ x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{
+ formatPos: 1,
+ argPos: 2,
+ isMethod: true,
+ })
+ x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{
+ formatPos: 2,
+ argPos: 3,
+ isMethod: true,
+ })
+ return nil
+}
+
+// processGlobalVars finds string constants that are assigned to global
+// variables.
+func (x *extracter) processGlobalVars() {
+ for _, p := range x.prog.AllPackages() {
+ m, ok := p.Members["init"]
+ if !ok {
+ continue
+ }
+ for _, b := range m.(*ssa.Function).Blocks {
+ for _, i := range b.Instrs {
+ s, ok := i.(*ssa.Store)
+ if !ok {
+ continue
+ }
+ a, ok := s.Addr.(*ssa.Global)
+ if !ok {
+ continue
+ }
+ t := a.Type()
+ for {
+ p, ok := t.(*types.Pointer)
+ if !ok {
+ break
+ }
+ t = p.Elem()
+ }
+ if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String {
+ continue
+ }
+ x.visitInit(a, s.Val)
+ }
+ }
+ }
+}
+
+type constData struct {
+ call *callData // to provide a signature for the constants
+ values []constVal
+ others []token.Pos // Assigned to other global data.
+}
+
+func (d *constData) visit(x *extracter, f func(c constant.Value)) {
+ for _, v := range d.values {
+ f(v.value)
+ }
+ for _, p := range d.others {
+ if od, ok := x.globals[p]; ok {
+ od.visit(x, f)
+ }
+ }
+}
+
+type constVal struct {
+ value constant.Value
+ pos token.Pos
+}
+
+type callData struct {
+ call ssa.CallInstruction
+ expr *ast.CallExpr
+ formats []constant.Value
+
+ callee *callData
+ isMethod bool
+ formatPos int
+ argPos int // varargs at this position in the call
+ argTypes []int // arguments extractable from this position
+}
+
+func (c *callData) callFormatPos() int {
+ c = c.callee
+ if c.isMethod {
+ return c.formatPos - 1
+ }
+ return c.formatPos
+}
+
+func (c *callData) callArgsStart() int {
+ c = c.callee
+ if c.isMethod {
+ return c.argPos - 1
+ }
+ return c.argPos
+}
+
+func (c *callData) Pos() token.Pos { return c.call.Pos() }
+func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg }
+
+func (x *extracter) handleFunc(f *ssa.Function, fd *callData) {
+ for _, e := range x.callGraph.Nodes[f].In {
+ if e.Pos() == 0 {
+ continue
+ }
+
+ call := e.Site
+ caller := x.funcs[call.Pos()]
+ if caller != nil {
+ // TODO: theoretically a format string could be passed to multiple
+ // arguments of a function. Support this eventually.
+ continue
+ }
+ x.debug(call, "CALL", f.String())
+
+ caller = &callData{
+ call: call,
+ callee: fd,
+ formatPos: -1,
+ argPos: -1,
+ }
+ // Offset by one if we are invoking an interface method.
+ offset := 0
+ if call.Common().IsInvoke() {
+ offset = -1
+ }
+ x.funcs[call.Pos()] = caller
+ if fd.argPos >= 0 {
+ x.visitArgs(caller, call.Common().Args[fd.argPos+offset])
+ }
+ x.visitFormats(caller, call.Common().Args[fd.formatPos+offset])
+ }
+}
+
+type posser interface {
+ Pos() token.Pos
+ Parent() *ssa.Function
+}
+
+func (x *extracter) debug(v posser, header string, args ...interface{}) {
+ if debug {
+ pos := ""
+ if p := v.Parent(); p != nil {
+ pos = posString(&x.conf, p.Package().Pkg, v.Pos())
+ }
+ if header != "CALL" && header != "INSERT" {
+ header = " " + header
+ }
+ fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v)
+ for _, a := range args {
+ fmt.Printf(" %v", a)
+ }
+ fmt.Println()
+ }
+}
+
+// visitInit evaluates and collects values assigned to global variables in an
+// init function.
+func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) {
+ if v == nil {
+ return
+ }
+ x.debug(v, "GLOBAL", v)
+
+ switch v := v.(type) {
+ case *ssa.Phi:
+ for _, e := range v.Edges {
+ x.visitInit(global, e)
+ }
+
+ case *ssa.Const:
+ // Only record strings with letters.
+ if str := constant.StringVal(v.Value); isMsg(str) {
+ cd := x.globalData(global.Pos())
+ cd.values = append(cd.values, constVal{v.Value, v.Pos()})
+ }
+ // TODO: handle %m-directive.
+
+ case *ssa.Global:
+ cd := x.globalData(global.Pos())
+ cd.others = append(cd.others, v.Pos())
+
+ case *ssa.FieldAddr, *ssa.Field:
+ // TODO: mark field index v.Field of v.X.Type() for extraction. extract
+ // an example args as to give parameters for the translator.
+
+ case *ssa.Slice:
+ if v.Low == nil && v.High == nil && v.Max == nil {
+ x.visitInit(global, v.X)
+ }
+
+ case *ssa.Alloc:
+ if ref := v.Referrers(); ref == nil {
+ for _, r := range *ref {
+ values := []ssa.Value{}
+ for _, o := range r.Operands(nil) {
+ if o == nil || *o == v {
+ continue
+ }
+ values = append(values, *o)
+ }
+ // TODO: return something different if we care about multiple
+ // values as well.
+ if len(values) == 1 {
+ x.visitInit(global, values[0])
+ }
+ }
+ }
+
+ case ssa.Instruction:
+ rands := v.Operands(nil)
+ if len(rands) == 1 && rands[0] != nil {
+ x.visitInit(global, *rands[0])
+ }
+ }
+ return
+}
+
+// visitFormats finds the original source of the value. The returned index is
+// position of the argument if originated from a function argument or -1
+// otherwise.
+func (x *extracter) visitFormats(call *callData, v ssa.Value) {
+ if v == nil {
+ return
+ }
+ x.debug(v, "VALUE", v)
+
+ switch v := v.(type) {
+ case *ssa.Phi:
+ for _, e := range v.Edges {
+ x.visitFormats(call, e)
+ }
+
+ case *ssa.Const:
+ // Only record strings with letters.
+ if isMsg(constant.StringVal(v.Value)) {
+ x.debug(call.call, "FORMAT", v.Value.ExactString())
+ call.formats = append(call.formats, v.Value)
+ }
+ // TODO: handle %m-directive.
+
+ case *ssa.Global:
+ x.globalData(v.Pos()).call = call
+
+ case *ssa.FieldAddr, *ssa.Field:
+ // TODO: mark field index v.Field of v.X.Type() for extraction. extract
+ // an example args as to give parameters for the translator.
+
+ case *ssa.Slice:
+ if v.Low == nil && v.High == nil && v.Max == nil {
+ x.visitFormats(call, v.X)
+ }
+
+ case *ssa.Parameter:
+ // TODO: handle the function for the index parameter.
+ f := v.Parent()
+ for i, p := range f.Params {
+ if p == v {
+ if call.formatPos < 0 {
+ call.formatPos = i
+ // TODO: is there a better way to detect this is calling
+ // a method rather than a function?
+ call.isMethod = len(f.Params) > f.Signature.Params().Len()
+ x.handleFunc(v.Parent(), call)
+ } else if debug && i != call.formatPos {
+ // TODO: support this.
+ fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n",
+ posString(&x.conf, call.Pkg(), call.Pos()),
+ call.formatPos, i)
+ }
+ }
+ }
+
+ case *ssa.Alloc:
+ if ref := v.Referrers(); ref == nil {
+ for _, r := range *ref {
+ values := []ssa.Value{}
+ for _, o := range r.Operands(nil) {
+ if o == nil || *o == v {
+ continue
+ }
+ values = append(values, *o)
+ }
+ // TODO: return something different if we care about multiple
+ // values as well.
+ if len(values) == 1 {
+ x.visitFormats(call, values[0])
+ }
+ }
+ }
+
+ // TODO:
+ // case *ssa.Index:
+ // // Get all values in the array if applicable
+ // case *ssa.IndexAddr:
+ // // Get all values in the slice or *array if applicable.
+ // case *ssa.Lookup:
+ // // Get all values in the map if applicable.
+
+ case *ssa.FreeVar:
+ // TODO: find the link between free variables and parameters:
+ //
+ // func freeVar(p *message.Printer, str string) {
+ // fn := func(p *message.Printer) {
+ // p.Printf(str)
+ // }
+ // fn(p)
+ // }
+
+ case *ssa.Call:
+
+ case ssa.Instruction:
+ rands := v.Operands(nil)
+ if len(rands) == 1 && rands[0] != nil {
+ x.visitFormats(call, *rands[0])
+ }
+ }
+}
+
+// Note: a function may have an argument marked as both format and passthrough.
+
+// visitArgs collects information on arguments. For wrapped functions it will
+// just determine the position of the variable args slice.
+func (x *extracter) visitArgs(fd *callData, v ssa.Value) {
+ if v == nil {
+ return
+ }
+ x.debug(v, "ARGV", v)
+ switch v := v.(type) {
+
+ case *ssa.Slice:
+ if v.Low == nil && v.High == nil && v.Max == nil {
+ x.visitArgs(fd, v.X)
+ }
+
+ case *ssa.Parameter:
+ // TODO: handle the function for the index parameter.
+ f := v.Parent()
+ for i, p := range f.Params {
+ if p == v {
+ fd.argPos = i
+ }
+ }
+
+ case *ssa.Alloc:
+ if ref := v.Referrers(); ref == nil {
+ for _, r := range *ref {
+ values := []ssa.Value{}
+ for _, o := range r.Operands(nil) {
+ if o == nil || *o == v {
+ continue
+ }
+ values = append(values, *o)
+ }
+ // TODO: return something different if we care about
+ // multiple values as well.
+ if len(values) == 1 {
+ x.visitArgs(fd, values[0])
+ }
+ }
+ }
+
+ case ssa.Instruction:
+ rands := v.Operands(nil)
+ if len(rands) == 1 && rands[0] != nil {
+ x.visitArgs(fd, *rands[0])
+ }
+ }
+}
+
+// print returns Go syntax for the specified node.
+func (x *extracter) print(n ast.Node) string {
+ var buf bytes.Buffer
+ format.Node(&buf, x.conf.Fset, n)
+ return buf.String()
+}
+
+type packageExtracter struct {
+ f *ast.File
+ x *extracter
+ info *loader.PackageInfo
+ cmap ast.CommentMap
+}
+
+func (px packageExtracter) getComment(n ast.Node) string {
+ cs := px.cmap.Filter(n).Comments()
+ if len(cs) > 0 {
+ return strings.TrimSpace(cs[0].Text())
+ }
+ return ""
+}
+
+func (x *extracter) extractMessages() {
+ prog := x.iprog
+ files := []packageExtracter{}
+ for _, info := range x.iprog.AllPackages {
+ for _, f := range info.Files {
+ // Associate comments with nodes.
+ px := packageExtracter{
+ f, x, info,
+ ast.NewCommentMap(prog.Fset, f, f.Comments),
+ }
+ files = append(files, px)
+ }
+ }
+ for _, px := range files {
+ ast.Inspect(px.f, func(n ast.Node) bool {
+ switch v := n.(type) {
+ case *ast.CallExpr:
+ if d := x.funcs[v.Lparen]; d != nil {
+ d.expr = v
+ }
+ }
+ return true
+ })
+ }
+ for _, px := range files {
+ ast.Inspect(px.f, func(n ast.Node) bool {
+ switch v := n.(type) {
+ case *ast.CallExpr:
+ return px.handleCall(v)
+ case *ast.ValueSpec:
+ return px.handleGlobal(v)
+ }
+ return true
+ })
+ }
+}
+
+func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool {
+ comment := px.getComment(spec)
+
+ for _, ident := range spec.Names {
+ data, ok := px.x.globals[ident.Pos()]
+ if !ok {
+ continue
+ }
+ name := ident.Name
+ var arguments []argument
+ if data.call != nil {
+ arguments = px.getArguments(data.call)
+ } else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") {
+ continue
+ }
+ data.visit(px.x, func(c constant.Value) {
+ px.addMessage(spec.Pos(), []string{name}, c, comment, arguments)
+ })
+ }
+
+ return true
+}
+
+func (px packageExtracter) handleCall(call *ast.CallExpr) bool {
+ x := px.x
+ data := x.funcs[call.Lparen]
+ if data == nil || len(data.formats) == 0 {
+ return true
+ }
+ if data.expr != call {
+ panic("invariant `data.call != call` failed")
+ }
+ x.debug(data.call, "INSERT", data.formats)
+
+ argn := data.callFormatPos()
+ if argn >= len(call.Args) {
+ return true
+ }
+ format := call.Args[argn]
+
+ arguments := px.getArguments(data)
+
+ comment := ""
+ key := []string{}
+ if ident, ok := format.(*ast.Ident); ok {
+ key = append(key, ident.Name)
+ if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
+ // TODO: get comment above ValueSpec as well
+ comment = v.Comment.Text()
+ }
+ }
+ if c := px.getComment(call.Args[0]); c != "" {
+ comment = c
+ }
+
+ formats := data.formats
+ for _, c := range formats {
+ px.addMessage(call.Lparen, key, c, comment, arguments)
+ }
+ return true
+}
+
+func (px packageExtracter) getArguments(data *callData) []argument {
+ arguments := []argument{}
+ x := px.x
+ info := px.info
+ if data.callArgsStart() >= 0 {
+ args := data.expr.Args[data.callArgsStart():]
+ for i, arg := range args {
+ expr := x.print(arg)
+ val := ""
+ if v := info.Types[arg].Value; v != nil {
+ val = v.ExactString()
+ switch arg.(type) {
+ case *ast.BinaryExpr, *ast.UnaryExpr:
+ expr = val
+ }
+ }
+ arguments = append(arguments, argument{
+ ArgNum: i + 1,
+ Type: info.Types[arg].Type.String(),
+ UnderlyingType: info.Types[arg].Type.Underlying().String(),
+ Expr: expr,
+ Value: val,
+ Comment: px.getComment(arg),
+ Position: posString(&x.conf, info.Pkg, arg.Pos()),
+ // TODO report whether it implements
+ // interfaces plural.Interface,
+ // gender.Interface.
+ })
+ }
+ }
+ return arguments
+}
+
+func (px packageExtracter) addMessage(
+ pos token.Pos,
+ key []string,
+ c constant.Value,
+ comment string,
+ arguments []argument) {
+ x := px.x
+ fmtMsg := constant.StringVal(c)
+
+ ph := placeholders{index: map[string]string{}}
+
+ trimmed, _, _ := trimWS(fmtMsg)
+
+ p := fmtparser.Parser{}
+ simArgs := make([]interface{}, len(arguments))
+ for i, v := range arguments {
+ simArgs[i] = v
+ }
+ msg := ""
+ p.Reset(simArgs)
+ for p.SetFormat(trimmed); p.Scan(); {
+ name := ""
+ var arg *argument
+ switch p.Status {
+ case fmtparser.StatusText:
+ msg += p.Text()
+ continue
+ case fmtparser.StatusSubstitution,
+ fmtparser.StatusBadWidthSubstitution,
+ fmtparser.StatusBadPrecSubstitution:
+ arguments[p.ArgNum-1].used = true
+ arg = &arguments[p.ArgNum-1]
+ name = getID(arg)
+ case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg:
+ arg = &argument{
+ ArgNum: p.ArgNum,
+ Position: posString(&x.conf, px.info.Pkg, pos),
+ }
+ name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum)
+ }
+ sub := p.Text()
+ if !p.HasIndex {
+ r, sz := utf8.DecodeLastRuneInString(sub)
+ sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
+ }
+ msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub))
+ }
+ key = append(key, msg)
+
+ // Add additional Placeholders that can be used in translations
+ // that are not present in the string.
+ for _, arg := range arguments {
+ if arg.used {
+ continue
+ }
+ ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum))
+ }
+
+ x.messages = append(x.messages, Message{
+ ID: key,
+ Key: fmtMsg,
+ Message: Text{Msg: msg},
+ // TODO(fix): this doesn't get the before comment.
+ Comment: comment,
+ Placeholders: ph.slice,
+ Position: posString(&x.conf, px.info.Pkg, pos),
+ })
+}
+
+func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string {
+ p := conf.Fset.Position(pos)
+ file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
+ return filepath.Join(pkg.Path(), file)
+}
+
+func getID(arg *argument) string {
+ s := getLastComponent(arg.Expr)
+ s = strip(s)
+ s = strings.Replace(s, " ", "", -1)
+ // For small variable names, use user-defined types for more info.
+ if len(s) <= 2 && arg.UnderlyingType != arg.Type {
+ s = getLastComponent(arg.Type)
+ }
+ return strings.Title(s)
+}
+
+// strip is a dirty hack to convert function calls to placeholder IDs.
+func strip(s string) string {
+ s = strings.Map(func(r rune) rune {
+ if unicode.IsSpace(r) || r == '-' {
+ return '_'
+ }
+ if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) {
+ return -1
+ }
+ return r
+ }, s)
+ // Strip "Get" from getter functions.
+ if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
+ if len(s) > len("get") {
+ r, _ := utf8.DecodeRuneInString(s)
+ if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark
+ s = s[len("get"):]
+ }
+ }
+ }
+ return s
+}
+
+// verbToPlaceholder gives a name for a placeholder based on the substitution
+// verb. This is only to be used if there is otherwise no other type information
+// available.
+func verbToPlaceholder(sub string, pos int) (name, underlying string) {
+ r, _ := utf8.DecodeLastRuneInString(sub)
+ name = fmt.Sprintf("Arg_%d", pos)
+ switch r {
+ case 's', 'q':
+ underlying = "string"
+ case 'd':
+ name = "Integer"
+ underlying = "int"
+ case 'e', 'f', 'g':
+ name = "Number"
+ underlying = "float64"
+ case 'm':
+ name = "Message"
+ underlying = "string"
+ default:
+ underlying = "interface{}"
+ }
+ return name, underlying
+}
+
+type placeholders struct {
+ index map[string]string
+ slice []Placeholder
+}
+
+func (p *placeholders) addArg(arg *argument, name, sub string) (id string) {
+ id = name
+ alt, ok := p.index[id]
+ for i := 1; ok && alt != sub; i++ {
+ id = fmt.Sprintf("%s_%d", name, i)
+ alt, ok = p.index[id]
+ }
+ p.index[id] = sub
+ p.slice = append(p.slice, Placeholder{
+ ID: id,
+ String: sub,
+ Type: arg.Type,
+ UnderlyingType: arg.UnderlyingType,
+ ArgNum: arg.ArgNum,
+ Expr: arg.Expr,
+ Comment: arg.Comment,
+ })
+ return id
+}
+
+func getLastComponent(s string) string {
+ return s[1+strings.LastIndexByte(s, '.'):]
+}
+
+// isMsg returns whether s should be translated.
+func isMsg(s string) bool {
+ // TODO: parse as format string and omit strings that contain letters
+ // coming from format verbs.
+ for _, r := range s {
+ if unicode.In(r, unicode.L) {
+ return true
+ }
+ }
+ return false
+}