diff options
author | Sasha Smundak <asmundak@google.com> | 2021-02-24 04:06:42 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-02-24 04:06:42 +0000 |
commit | 0710991d7fabe8660f0a9b840bc4e74184a5a2d7 (patch) | |
tree | 2efd7da6ec6557b9aaed7932d19e07e1beade288 | |
parent | 7b6d2ef30437199c2b437649bc92fdf6f8379d09 (diff) | |
parent | 2ba90398f8ec337a2662214298c9d9ea9cc48380 (diff) | |
download | starlark-go-0710991d7fabe8660f0a9b840bc4e74184a5a2d7.tar.gz |
Import from upstream. am: 2ba90398f8
Original change: https://android-review.googlesource.com/c/platform/external/starlark-go/+/1592915
MUST ONLY BE SUBMITTED BY AUTOMERGER
Change-Id: I2342babc15ec846be46b25cb69fe8432104f0330
91 files changed, 31119 insertions, 0 deletions
diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..23fcb4f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,20 @@ +language: go + +go_import_path: go.starlark.net + +go: + - "1.13.x" + - "1.14.x" + - "1.15.x" + - "master" + +env: + - "GO111MODULE=on" + +script: + - "go test -mod=readonly ./..." + - "cp go.mod go.mod.orig" + - "cp go.sum go.sum.orig" + - "go mod tidy" + - "diff go.mod.orig go.mod" + - "diff go.sum.orig go.sum" @@ -0,0 +1 @@ +go.starlark.net @@ -0,0 +1,29 @@ +Copyright (c) 2017 The Bazel Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ea6695 --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ + +<!-- This file is the project homepage for go.starlark.net --> + +# Starlark in Go + +[![Travis CI](https://travis-ci.org/google/starlark-go.svg)](https://travis-ci.org/google/starlark-go) +[![GoDoc](https://godoc.org/go.starlark.net/starlark?status.svg)](https://godoc.org/go.starlark.net/starlark) + +This is the home of the _Starlark in Go_ project. +Starlark in Go is an interpreter for Starlark, implemented in Go. +Starlark was formerly known as Skylark. +The new import path for Go packages is `"go.starlark.net/starlark"`. + +Starlark is a dialect of Python intended for use as a configuration language. +Like Python, it is an untyped dynamic language with high-level data +types, first-class functions with lexical scope, and garbage collection. +Unlike CPython, independent Starlark threads execute in parallel, so +Starlark workloads scale well on parallel machines. +Starlark is a small and simple language with a familiar and highly +readable syntax. You can use it as an expressive notation for +structured data, defining functions to eliminate repetition, or you +can use it to add scripting capabilities to an existing application. + +A Starlark interpreter is typically embedded within a larger +application, and the application may define additional domain-specific +functions and data types beyond those provided by the core language. +For example, Starlark was originally developed for the +[Bazel build tool](https://bazel.build). +Bazel uses Starlark as the notation both for its BUILD files (like +Makefiles, these declare the executables, libraries, and tests in a +directory) and for [its macro +language](https://docs.bazel.build/versions/master/skylark/language.html), +through which Bazel is extended with custom logic to support new +languages and compilers. + + +## Documentation + +* Language definition: [doc/spec.md](doc/spec.md) + +* About the Go implementation: [doc/impl.md](doc/impl.md) + +* API documentation: [godoc.org/go.starlark.net/starlark](https://godoc.org/go.starlark.net/starlark) + +* Mailing list: [starlark-go](https://groups.google.com/forum/#!forum/starlark-go) + +* Issue tracker: [https://github.com/google/starlark-go/issues](https://github.com/google/starlark-go/issues) + +### Getting started + +Build the code: + +```shell +# check out the code and dependencies, +# and install interpreter in $GOPATH/bin +$ go get -u go.starlark.net/cmd/starlark +``` + +Run the interpreter: + +```console +$ cat coins.star +coins = { + 'dime': 10, + 'nickel': 5, + 'penny': 1, + 'quarter': 25, +} +print('By name:\t' + ', '.join(sorted(coins.keys()))) +print('By value:\t' + ', '.join(sorted(coins.keys(), key=coins.get))) + +$ starlark coins.star +By name: dime, nickel, penny, quarter +By value: penny, nickel, dime, quarter +``` + +Interact with the read-eval-print loop (REPL): + +```pycon +$ starlark +>>> def fibonacci(n): +... res = list(range(n)) +... for i in res[2:]: +... res[i] = res[i-2] + res[i-1] +... return res +... +>>> fibonacci(10) +[0, 1, 1, 2, 3, 5, 8, 13, 21, 34] +>>> +``` + +When you have finished, type `Ctrl-D` to close the REPL's input stream. + +Embed the interpreter in your Go program: + +```go +import "go.starlark.net/starlark" + +// Execute Starlark program in a file. +thread := &starlark.Thread{Name: "my thread"} +globals, err := starlark.ExecFile(thread, "fibonacci.star", nil, nil) +if err != nil { ... } + +// Retrieve a module global. +fibonacci := globals["fibonacci"] + +// Call Starlark function from Go. +v, err := starlark.Call(thread, fibonacci, starlark.Tuple{starlark.MakeInt(10)}, nil) +if err != nil { ... } +fmt.Printf("fibonacci(10) = %v\n", v) // fibonacci(10) = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34] +``` + +See [starlark/example_test.go](starlark/example_test.go) for more examples. + +### Contributing + +We welcome submissions but please let us know what you're working on +if you want to change or add to the Starlark repository. + +Before undertaking to write something new for the Starlark project, +please file an issue or claim an existing issue. +All significant changes to the language or to the interpreter's Go +API must be discussed before they can be accepted. +This gives all participants a chance to validate the design and to +avoid duplication of effort. + +Despite some differences, the Go implementation of Starlark strives to +match the behavior of [the Java implementation](https://github.com/bazelbuild/bazel) +used by Bazel and maintained by the Bazel team. +For that reason, proposals to change the language itself should +generally be directed to [the Starlark site]( +https://github.com/bazelbuild/starlark/), not to the maintainers of this +project. +Only once there is consensus that a language change is desirable may +its Go implementation proceed. + +We use GitHub pull requests for contributions. + +Please complete Google's contributor license agreement (CLA) before +sending your first change to the project. If you are the copyright +holder, you will need to agree to the +[individual contributor license agreement](https://cla.developers.google.com/about/google-individual), +which can be completed online. +If your organization is the copyright holder, the organization will +need to agree to the [corporate contributor license agreement](https://cla.developers.google.com/about/google-corporate). +If the copyright holder for your contribution has already completed +the agreement in connection with another Google open source project, +it does not need to be completed again. + +### Stability + +We reserve the right to make breaking language and API changes at this +stage in the project, although we will endeavor to keep them to a minimum. +Once the Bazel team has finalized the version 1 language specification, +we will be more rigorous with interface stability. + +### Credits + +Starlark was designed and implemented in Java by +Ulf Adams, +Lukács Berki, +Jon Brandvein, +John Field, +Laurent Le Brun, +Dmitry Lomov, +Damien Martin-Guillerez, +Vladimir Moskva, and +Florian Weikert, +standing on the shoulders of the Python community. +The Go implementation was written by Alan Donovan and Jay Conrod; +its scanner was derived from one written by Russ Cox. + +### Legal + +Starlark in Go is Copyright (c) 2018 The Bazel Authors. +All rights reserved. + +It is provided under a 3-clause BSD license: +[LICENSE](https://github.com/google/starlark-go/blob/master/LICENSE). + +Starlark in Go is not an official Google product. diff --git a/cmd/starlark/starlark.go b/cmd/starlark/starlark.go new file mode 100644 index 0000000..3825f00 --- /dev/null +++ b/cmd/starlark/starlark.go @@ -0,0 +1,141 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The starlark command interprets a Starlark file. +// With no arguments, it starts a read-eval-print loop (REPL). +package main // import "go.starlark.net/cmd/starlark" + +import ( + "flag" + "fmt" + "log" + "os" + "runtime" + "runtime/pprof" + "strings" + + "go.starlark.net/internal/compile" + "go.starlark.net/repl" + "go.starlark.net/resolve" + "go.starlark.net/starlark" + "go.starlark.net/starlarkjson" +) + +// flags +var ( + cpuprofile = flag.String("cpuprofile", "", "gather Go CPU profile in this file") + memprofile = flag.String("memprofile", "", "gather Go memory profile in this file") + profile = flag.String("profile", "", "gather Starlark time profile in this file") + showenv = flag.Bool("showenv", false, "on success, print final global environment") + execprog = flag.String("c", "", "execute program `prog`") +) + +func init() { + flag.BoolVar(&compile.Disassemble, "disassemble", compile.Disassemble, "show disassembly during compilation of each function") + + // non-standard dialect flags + flag.BoolVar(&resolve.AllowFloat, "float", resolve.AllowFloat, "obsolete; no effect") + flag.BoolVar(&resolve.AllowSet, "set", resolve.AllowSet, "allow set data type") + flag.BoolVar(&resolve.AllowLambda, "lambda", resolve.AllowLambda, "allow lambda expressions") + flag.BoolVar(&resolve.AllowRecursion, "recursion", resolve.AllowRecursion, "allow while statements and recursive functions") + flag.BoolVar(&resolve.AllowGlobalReassign, "globalreassign", resolve.AllowGlobalReassign, "allow reassignment of globals, and if/for/while statements at top level") +} + +func main() { + os.Exit(doMain()) +} + +func doMain() int { + log.SetPrefix("starlark: ") + log.SetFlags(0) + flag.Parse() + + if *cpuprofile != "" { + f, err := os.Create(*cpuprofile) + check(err) + err = pprof.StartCPUProfile(f) + check(err) + defer func() { + pprof.StopCPUProfile() + err := f.Close() + check(err) + }() + } + if *memprofile != "" { + f, err := os.Create(*memprofile) + check(err) + defer func() { + runtime.GC() + err := pprof.Lookup("heap").WriteTo(f, 0) + check(err) + err = f.Close() + check(err) + }() + } + + if *profile != "" { + f, err := os.Create(*profile) + check(err) + err = starlark.StartProfile(f) + check(err) + defer func() { + err := starlark.StopProfile() + check(err) + }() + } + + thread := &starlark.Thread{Load: repl.MakeLoad()} + globals := make(starlark.StringDict) + + // Ideally this statement would update the predeclared environment. + // TODO(adonovan): plumb predeclared env through to the REPL. + starlark.Universe["json"] = starlarkjson.Module + + switch { + case flag.NArg() == 1 || *execprog != "": + var ( + filename string + src interface{} + err error + ) + if *execprog != "" { + // Execute provided program. + filename = "cmdline" + src = *execprog + } else { + // Execute specified file. + filename = flag.Arg(0) + } + thread.Name = "exec " + filename + globals, err = starlark.ExecFile(thread, filename, src, nil) + if err != nil { + repl.PrintError(err) + return 1 + } + case flag.NArg() == 0: + fmt.Println("Welcome to Starlark (go.starlark.net)") + thread.Name = "REPL" + repl.REPL(thread, globals) + default: + log.Print("want at most one Starlark file name") + return 1 + } + + // Print the global environment. + if *showenv { + for _, name := range globals.Keys() { + if !strings.HasPrefix(name, "_") { + fmt.Fprintf(os.Stderr, "%s = %s\n", name, globals[name]) + } + } + } + + return 0 +} + +func check(err error) { + if err != nil { + log.Fatal(err) + } +} diff --git a/doc/impl.md b/doc/impl.md new file mode 100644 index 0000000..380e2d6 --- /dev/null +++ b/doc/impl.md @@ -0,0 +1,242 @@ + +# Starlark in Go: Implementation + +This document (a work in progress) describes some of the design +choices of the Go implementation of Starlark. + + * [Scanner](#scanner) + * [Parser](#parser) + * [Resolver](#resolver) + * [Evaluator](#evaluator) + * [Data types](#data-types) + * [Freezing](#freezing) + * [Testing](#testing) + + +## Scanner + +The scanner is derived from Russ Cox's +[buildifier](https://github.com/bazelbuild/buildtools/tree/master/buildifier) +tool, which pretty-prints Bazel BUILD files. + +Most of the work happens in `(*scanner).nextToken`. + +## Parser + +The parser is hand-written recursive-descent parser. It uses the +technique of [precedence +climbing](http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing) +to reduce the number of productions. + +In some places the parser accepts a larger set of programs than are +strictly valid, leaving the task of rejecting them to the subsequent +resolver pass. For example, in the function call `f(a, b=c)` the +parser accepts any expression for `a` and `b`, even though `b` may +legally be only an identifier. For the parser to distinguish these +cases would require additional lookahead. + +## Resolver + +The resolver reports structural errors in the program, such as the use +of `break` and `continue` outside of a loop. + +Starlark has stricter syntactic limitations than Python. For example, +it does not permit `for` loops or `if` statements at top level, nor +does it permit global variables to be bound more than once. +These limitations come from the Bazel project's desire to make it easy +to identify the sole statement that defines each global, permitting +accurate cross-reference documentation. + +In addition, the resolver validates all variable names, classifying +them as references to universal, global, local, or free variables. +Local and free variables are mapped to a small integer, allowing the +evaluator to use an efficient (flat) representation for the +environment. + +Not all features of the Go implementation are "standard" (that is, +supported by Bazel's Java implementation), at least for now, so +non-standard features such as `lambda`, `float`, and `set` +are flag-controlled. The resolver reports +any uses of dialect features that have not been enabled. + + +## Evaluator + +### Data types + +<b>Integers:</b> Integers are representing using `big.Int`, an +arbitrary precision integer. This representation was chosen because, +for many applications, Starlark must be able to handle without loss +protocol buffer values containing signed and unsigned 64-bit integers, +which requires 65 bits of precision. + +Small integers (<256) are preallocated, but all other values require +memory allocation. Integer performance is relatively poor, but it +matters little for Bazel-like workloads which depend much +more on lists of strings than on integers. (Recall that a typical loop +over a list in Starlark does not materialize the loop index as an `int`.) + +An optimization worth trying would be to represent integers using +either an `int32` or `big.Int`, with the `big.Int` used only when +`int32` does not suffice. Using `int32`, not `int64`, for "small" +numbers would make it easier to detect overflow from operations like +`int32 * int32`, which would trigger the use of `big.Int`. + +<b>Floating point</b>: +Floating point numbers are represented using Go's `float64`. +Again, `float` support is required to support protocol buffers. The +existence of floating-point NaN and its infamous comparison behavior +(`NaN != NaN`) had many ramifications for the API, since we cannot +assume the result of an ordered comparison is either less than, +greater than, or equal: it may also fail. + +<b>Strings</b>: + +TODO: discuss UTF-8 and string.bytes method. + +<b>Dictionaries and sets</b>: +Starlark dictionaries have predictable iteration order. +Furthermore, many Starlark values are hashable in Starlark even though +the Go values that represent them are not hashable in Go: big +integers, for example. +Consequently, we cannot use Go maps to implement Starlark's dictionary. + +We use a simple hash table whose buckets are linked lists, each +element of which holds up to 8 key/value pairs. In a well-distributed +table the list should rarely exceed length 1. In addition, each +key/value item is part of doubly-linked list that maintains the +insertion order of the elements for iteration. + +<b>Struct:</b> +The `starlarkstruct` Go package provides a non-standard Starlark +extension data type, `struct`, that maps field identifiers to +arbitrary values. Fields are accessed using dot notation: `y = s.f`. +This data type is extensively used in Bazel, but its specification is +currently evolving. + +Starlark has no `class` mechanism, nor equivalent of Python's +`namedtuple`, though it is likely that future versions will support +some way to define a record data type of several fields, with a +representation more efficient than a hash table. + + +### Freezing + +All mutable values created during module initialization are _frozen_ +upon its completion. It is this property that permits a Starlark module +to be referenced by two Starlark threads running concurrently (such as +the initialization threads of two other modules) without the +possibility of a data race. + +The Go implementation supports freezing by storing an additional +"frozen" Boolean variable in each mutable object. Once this flag is set, +all subsequent attempts at mutation fail. Every value defines a +Freeze method that sets its own frozen flag if not already set, and +calls Freeze for each value that it contains. +For example, when a list is frozen, it freezes each of its elements; +when a dictionary is frozen, it freezes each of its keys and values; +and when a function value is frozen, it freezes each of the free +variables and parameter default values implicitly referenced by its closure. +Application-defined types must also follow this discipline. + +The freeze mechanism in the Go implementation is finer grained than in +the Java implementation: in effect, the latter has one "frozen" flag +per module, and every value holds a reference to the frozen flag of +its module. This makes setting the frozen flag more efficient---a +simple bit flip, no need to traverse the object graph---but coarser +grained. Also, it complicates the API slightly because to construct a +list, say, requires a reference to the frozen flag it should use. + +The Go implementation would also permit the freeze operation to be +exposed to the program, for example as a built-in function. +This has proven valuable in writing tests of the freeze mechanism +itself, but is otherwise mostly a curiosity. + + +### Fail-fast iterators + +In some languages (such as Go), a program may mutate a data structure +while iterating over it; for example, a range loop over a map may +delete map elements. In other languages (such as Java), iterators do +extra bookkeeping so that modification of the underlying collection +invalidates the iterator, and the next attempt to use it fails. +This often helps to detect subtle mistakes. + +Starlark takes this a step further. Instead of mutation of the +collection invalidating the iterator, the act of iterating makes the +collection temporarily immutable, so that an attempt to, say, delete a +dict element while looping over the dict, will fail. The error is +reported against the delete operation, not the iteration. + +This is implemented by having each mutable iterable value record a +counter of active iterators. Starting a loop increments this counter, +and completing a loop decrements it. A collection with a nonzero +counter behaves as if frozen. If the collection is actually frozen, +the counter bookkeeping is unnecessary. (Consequently, iterator +bookkeeping is needed only while objects are still mutable, before +they can have been published to another thread, and thus no +synchronization is necessary.) + +A consequence of this design is that in the Go API, it is imperative +to call `Done` on each iterator once it is no longer needed. + +``` +TODO +starlark.Value interface and subinterfaces +argument passing to builtins: UnpackArgs, UnpackPositionalArgs. +``` + +<b>Evaluation strategy:</b> +The evaluator uses a simple recursive tree walk, returning a value or +an error for each expression. We have experimented with just-in-time +compilation of syntax trees to bytecode, but two limitations in the +current Go compiler prevent this strategy from outperforming the +tree-walking evaluator. + +First, the Go compiler does not generate a "computed goto" for a +switch statement ([Go issue +5496](https://github.com/golang/go/issues/5496)). A bytecode +interpreter's main loop is a for-loop around a switch statement with +dozens or hundreds of cases, and the speed with which each case can be +dispatched strongly affects overall performance. +Currently, a switch statement generates a binary tree of ordered +comparisons, requiring several branches instead of one. + +Second, the Go compiler's escape analysis assumes that the underlying +array from a `make([]Value, n)` allocation always escapes +([Go issue 20533](https://github.com/golang/go/issues/20533)). +Because the bytecode interpreter's operand stack has a non-constant +length, it must be allocated with `make`. The resulting allocation +adds to the cost of each Starlark function call; this can be tolerated +by amortizing one very large stack allocation across many calls. +More problematic appears to be the cost of the additional GC write +barriers incurred by every VM operation: every intermediate result is +saved to the VM's operand stack, which is on the heap. +By contrast, intermediate results in the tree-walking evaluator are +never stored to the heap. + +``` +TODO +frames, backtraces, errors. +threads +Print +Load +``` + +## Testing + +``` +TODO +starlarktest package +`assert` module +starlarkstruct +integration with Go testing.T +``` + + +## TODO + + +``` +Discuss practical separation of code and data. +``` diff --git a/doc/spec.md b/doc/spec.md new file mode 100644 index 0000000..15e4dc2 --- /dev/null +++ b/doc/spec.md @@ -0,0 +1,4263 @@ +# Starlark in Go: Language definition + +Starlark is a dialect of Python intended for use as a configuration +language. A Starlark interpreter is typically embedded within a larger +application, and this application may define additional +domain-specific functions and data types beyond those provided by the +core language. For example, Starlark is embedded within (and was +originally developed for) the [Bazel build tool](https://bazel.build), +and [Bazel's build language](https://docs.bazel.build/versions/master/starlark/language.html) is based on Starlark. + +This document describes the Go implementation of Starlark +at go.starlark.net/starlark. +The language it defines is similar but not identical to +[the Java-based implementation](https://github.com/bazelbuild/bazel/blob/master/src/main/java/com/google/devtools/starlark/Starlark.java) +used by Bazel. +We identify places where their behaviors differ, and an +[appendix](#dialect-differences) provides a summary of those +differences. +We plan to converge both implementations on a single specification. + +This document is maintained by Alan Donovan <adonovan@google.com>. +It was influenced by the Python specification, +Copyright 1990–2017, Python Software Foundation, +and the Go specification, Copyright 2009–2017, The Go Authors. + +Starlark was designed and implemented in Java by Laurent Le Brun, +Dmitry Lomov, Jon Brandvin, and Damien Martin-Guillerez, standing on +the shoulders of the Python community. +The Go implementation was written by Alan Donovan and Jay Conrod; +its scanner was derived from one written by Russ Cox. + +## Overview + +Starlark is an untyped dynamic language with high-level data types, +first-class functions with lexical scope, and automatic memory +management or _garbage collection_. + +Starlark is strongly influenced by Python, and is almost a subset of +that language. In particular, its data types and syntax for +statements and expressions will be very familiar to any Python +programmer. +However, Starlark is intended not for writing applications but for +expressing configuration: its programs are short-lived and have no +external side effects and their main result is structured data or side +effects on the host application. +As a result, Starlark has no need for classes, exceptions, reflection, +concurrency, and other such features of Python. + +Starlark execution is _deterministic_: all functions and operators +in the core language produce the same execution each time the program +is run; there are no sources of random numbers, clocks, or unspecified +iterators. This makes Starlark suitable for use in applications where +reproducibility is paramount, such as build tools. + +## Contents + +<!-- WTF? No automatic TOC? --> + + * [Overview](#overview) + * [Contents](#contents) + * [Lexical elements](#lexical-elements) + * [Data types](#data-types) + * [None](#none) + * [Booleans](#booleans) + * [Integers](#integers) + * [Floating-point numbers](#floating-point-numbers) + * [Strings](#strings) + * [Lists](#lists) + * [Tuples](#tuples) + * [Dictionaries](#dictionaries) + * [Sets](#sets) + * [Functions](#functions) + * [Built-in functions](#built-in-functions) + * [Name binding and variables](#name-binding-and-variables) + * [Value concepts](#value-concepts) + * [Identity and mutation](#identity-and-mutation) + * [Freezing a value](#freezing-a-value) + * [Hashing](#hashing) + * [Sequence types](#sequence-types) + * [Indexing](#indexing) + * [Expressions](#expressions) + * [Identifiers](#identifiers) + * [Literals](#literals) + * [Parenthesized expressions](#parenthesized-expressions) + * [Dictionary expressions](#dictionary-expressions) + * [List expressions](#list-expressions) + * [Unary operators](#unary-operators) + * [Binary operators](#binary-operators) + * [Conditional expressions](#conditional-expressions) + * [Comprehensions](#comprehensions) + * [Function and method calls](#function-and-method-calls) + * [Dot expressions](#dot-expressions) + * [Index expressions](#index-expressions) + * [Slice expressions](#slice-expressions) + * [Lambda expressions](#lambda-expressions) + * [Statements](#statements) + * [Pass statements](#pass-statements) + * [Assignments](#assignments) + * [Augmented assignments](#augmented-assignments) + * [Function definitions](#function-definitions) + * [Return statements](#return-statements) + * [Expression statements](#expression-statements) + * [If statements](#if-statements) + * [For loops](#for-loops) + * [Break and Continue](#break-and-continue) + * [Load statements](#load-statements) + * [Module execution](#module-execution) + * [Built-in constants and functions](#built-in-constants-and-functions) + * [None](#none) + * [True and False](#true-and-false) + * [any](#any) + * [all](#all) + * [bool](#bool) + * [chr](#chr) + * [dict](#dict) + * [dir](#dir) + * [enumerate](#enumerate) + * [fail](#fail) + * [float](#float) + * [getattr](#getattr) + * [hasattr](#hasattr) + * [hash](#hash) + * [int](#int) + * [len](#len) + * [list](#list) + * [max](#max) + * [min](#min) + * [ord](#ord) + * [print](#print) + * [range](#range) + * [repr](#repr) + * [reversed](#reversed) + * [set](#set) + * [sorted](#sorted) + * [str](#str) + * [tuple](#tuple) + * [type](#type) + * [zip](#zip) + * [Built-in methods](#built-in-methods) + * [dict·clear](#dict·clear) + * [dict·get](#dict·get) + * [dict·items](#dict·items) + * [dict·keys](#dict·keys) + * [dict·pop](#dict·pop) + * [dict·popitem](#dict·popitem) + * [dict·setdefault](#dict·setdefault) + * [dict·update](#dict·update) + * [dict·values](#dict·values) + * [list·append](#list·append) + * [list·clear](#list·clear) + * [list·extend](#list·extend) + * [list·index](#list·index) + * [list·insert](#list·insert) + * [list·pop](#list·pop) + * [list·remove](#list·remove) + * [set·union](#set·union) + * [string·capitalize](#string·capitalize) + * [string·codepoint_ords](#string·codepoint_ords) + * [string·codepoints](#string·codepoints) + * [string·count](#string·count) + * [string·elem_ords](#string·elem_ords) + * [string·elems](#string·elems) + * [string·endswith](#string·endswith) + * [string·find](#string·find) + * [string·format](#string·format) + * [string·index](#string·index) + * [string·isalnum](#string·isalnum) + * [string·isalpha](#string·isalpha) + * [string·isdigit](#string·isdigit) + * [string·islower](#string·islower) + * [string·isspace](#string·isspace) + * [string·istitle](#string·istitle) + * [string·isupper](#string·isupper) + * [string·join](#string·join) + * [string·lower](#string·lower) + * [string·lstrip](#string·lstrip) + * [string·partition](#string·partition) + * [string·replace](#string·replace) + * [string·rfind](#string·rfind) + * [string·rindex](#string·rindex) + * [string·rpartition](#string·rpartition) + * [string·rsplit](#string·rsplit) + * [string·rstrip](#string·rstrip) + * [string·split](#string·split) + * [string·splitlines](#string·splitlines) + * [string·startswith](#string·startswith) + * [string·strip](#string·strip) + * [string·title](#string·title) + * [string·upper](#string·upper) + * [Dialect differences](#dialect-differences) + + +## Lexical elements + +A Starlark program consists of one or more modules. +Each module is defined by a single UTF-8-encoded text file. + +A complete grammar of Starlark can be found in [grammar.txt](../syntax/grammar.txt). +That grammar is presented piecemeal throughout this document +in boxes such as this one, which explains the notation: + +```grammar {.good} +Grammar notation + +- lowercase and 'quoted' items are lexical tokens. +- Capitalized names denote grammar productions. +- (...) implies grouping. +- x | y means either x or y. +- [x] means x is optional. +- {x} means x is repeated zero or more times. +- The end of each declaration is marked with a period. +``` + +The contents of a Starlark file are broken into a sequence of tokens of +five kinds: white space, punctuation, keywords, identifiers, and literals. +Each token is formed from the longest sequence of characters that +would form a valid token of each kind. + +```grammar {.good} +File = {Statement | newline} eof . +``` + +*White space* consists of spaces (U+0020), tabs (U+0009), carriage +returns (U+000D), and newlines (U+000A). Within a line, white space +has no effect other than to delimit the previous token, but newlines, +and spaces at the start of a line, are significant tokens. + +*Comments*: A hash character (`#`) appearing outside of a string +literal marks the start of a comment; the comment extends to the end +of the line, not including the newline character. +Comments are treated like other white space. + +*Punctuation*: The following punctuation characters or sequences of +characters are tokens: + +```text ++ - * / // % = ++= -= *= /= //= %= == != +^ < > << >> & | +^= <= >= <<= >>= &= |= +. , ; : ~ ** +( ) [ ] { } +``` + +*Keywords*: The following tokens are keywords and may not be used as +identifiers: + +```text +and elif in or +break else lambda pass +continue for load return +def if not while +``` + +The tokens below also may not be used as identifiers although they do not +appear in the grammar; they are reserved as possible future keywords: + +<!-- and to remain a syntactic subset of Python --> + +```text +as finally nonlocal +assert from raise +class global try +del import with +except is yield +``` + +<b>Implementation note:</b> +The Go implementation permits `assert` to be used as an identifier, +and this feature is widely used in its tests. + +*Identifiers*: an identifier is a sequence of Unicode letters, decimal + digits, and underscores (`_`), not starting with a digit. +Identifiers are used as names for values. + +Examples: + +```text +None True len +x index starts_with arg0 +``` + +*Literals*: literals are tokens that denote specific values. Starlark +has string, integer, and floating-point literals. + +```text +0 # int +123 # decimal int +0x7f # hexadecimal int +0o755 # octal int +0b1011 # binary int + +0.0 0. .0 # float +1e10 1e+10 1e-10 +1.1e10 1.1e+10 1.1e-10 + +"hello" 'hello' # string +'''hello''' """hello""" # triple-quoted string +r'hello' r"hello" # raw string literal +``` + +Integer and floating-point literal tokens are defined by the following grammar: + +```grammar {.good} +int = decimal_lit | octal_lit | hex_lit | binary_lit . +decimal_lit = ('1' … '9') {decimal_digit} | '0' . +octal_lit = '0' ('o'|'O') octal_digit {octal_digit} . +hex_lit = '0' ('x'|'X') hex_digit {hex_digit} . +binary_lit = '0' ('b'|'B') binary_digit {binary_digit} . + +float = decimals '.' [decimals] [exponent] + | decimals exponent + | '.' decimals [exponent] + . +decimals = decimal_digit {decimal_digit} . +exponent = ('e'|'E') ['+'|'-'] decimals . + +decimal_digit = '0' … '9' . +octal_digit = '0' … '7' . +hex_digit = '0' … '9' | 'A' … 'F' | 'a' … 'f' . +binary_digit = '0' | '1' . +``` + +### String literals + +A Starlark string literal denotes a string value. +In its simplest form, it consists of the desired text +surrounded by matching single- or double-quotation marks: + +```python +"abc" +'abc' +``` + +Literal occurrences of the chosen quotation mark character must be +escaped by a preceding backslash. So, if a string contains several +of one kind of quotation mark, it may be convenient to quote the string +using the other kind, as in these examples: + +```python +'Have you read "To Kill a Mockingbird?"' +"Yes, it's a classic." + +"Have you read \"To Kill a Mockingbird?\"" +'Yes, it\'s a classic.' +``` + +Literal occurrences of the _opposite_ kind of quotation mark, such as +an apostrophe within a double-quoted string literal, may be escaped +by a backslash, but this is not necessary: `"it's"` and `"it\'s"` are +equivalent. + + +#### String escapes + +Within a string literal, the backslash character `\` indicates the +start of an _escape sequence_, a notation for expressing things that +are impossible or awkward to write directly. + +The following *traditional escape sequences* represent the ASCII control +codes 7-13: + +``` +\a \x07 alert or bell +\b \x08 backspace +\f \x0C form feed +\n \x0A line feed +\r \x0D carriage return +\t \x09 horizontal tab +\v \x0B vertical tab +``` + +A *literal backslash* is written using the escape `\\`. + +An *escaped newline*---that is, a backslash at the end of a line---is ignored, +allowing a long string to be split across multiple lines of the source file. + +```python +"abc\ +def" # "abcdef" +``` + +An *octal escape* encodes a single byte using its octal value. +It consists of a backslash followed by one, two, or three octal digits [0-7]. +It is error if the value is greater than decimal 255. + +```python +'\0' # "\x00" a string containing a single NUL byte +'\12' # "\n" octal 12 = decimal 10 +'\101-\132' # "A-Z" +'\119' # "\t9" = "\11" + "9" +``` + +<b>Implementation note:</b> +The Java implementation encodes strings using UTF-16, +so an octal escape encodes a single UTF-16 code unit. +Octal escapes for values above 127 are therefore not portable across implementations. +There is little reason to use octal escapes in new code. + +A *hex escape* encodes a single byte using its hexadecimal value. +It consists of `\x` followed by exactly two hexadecimal digits [0-9A-Fa-f]. + +```python +"\x00" # "\x00" a string containing a single NUL byte +"(\x20)" # "( )" ASCII 0x20 = 32 = space + +red, reset = "\x1b[31m", "\x1b[0m" # ANSI terminal control codes for color +"(" + red + "hello" + reset + ")" # "(hello)" with red text, if on a terminal +``` + +<b>Implementation note:</b> +The Java implementation does not support hex escapes. + +An ordinary string literal may not contain an unescaped newline, +but a *multiline string literal* may spread over multiple source lines. +It is denoted using three quotation marks at start and end. +Within it, unescaped newlines and quotation marks (or even pairs of +quotation marks) have their literal meaning, but three quotation marks +end the literal. This makes it easy to quote large blocks of text with +few escapes. + +``` +haiku = ''' +Yesterday it worked. +Today it is not working. +That's computers. Sigh. +''' +``` + +Regardless of the platform's convention for text line endings---for +example, a linefeed (\n) on UNIX, or a carriage return followed by a +linefeed (\r\n) on Microsoft Windows---an unescaped line ending in a +multiline string literal always denotes a line feed (\n). + +Starlark also supports *raw string literals*, which look like an +ordinary single- or double-quotation preceded by `r`. Within a raw +string literal, there is no special processing of backslash escapes, +other than an escaped quotation mark (which denotes a literal +quotation mark), or an escaped newline (which denotes a backslash +followed by a newline). This form of quotation is typically used when +writing strings that contain many quotation marks or backslashes (such +as regular expressions or shell commands) to reduce the burden of +escaping: + +```python +"a\nb" # "a\nb" = 'a' + '\n' + 'b' +r"a\nb" # "a\\nb" = 'a' + '\\' + 'n' + 'b' + +"a\ +b" # "ab" +r"a\ +b" # "a\\\nb" +``` + +It is an error for a backslash to appear within a string literal other +than as part of one of the escapes described above. + +TODO: define indent, outdent, semicolon, newline, eof + +## Data types + +These are the main data types built in to the interpreter: + +```python +NoneType # the type of None +bool # True or False +int # a signed integer of arbitrary magnitude +float # an IEEE 754 double-precision floating point number +string # a byte string +list # a modifiable sequence of values +tuple # an unmodifiable sequence of values +dict # a mapping from values to values +set # a set of values +function # a function implemented in Starlark +builtin_function_or_method # a function or method implemented by the interpreter or host application +``` + +Some functions, such as the iteration methods of `string`, or the +`range` function, return instances of special-purpose types that don't +appear in this list. +Additional data types may be defined by the host application into +which the interpreter is embedded, and those data types may +participate in basic operations of the language such as arithmetic, +comparison, indexing, and function calls. + +<!-- We needn't mention the stringIterable type here. --> + +Some operations can be applied to any Starlark value. For example, +every value has a type string that can be obtained with the expression +`type(x)`, and any value may be converted to a string using the +expression `str(x)`, or to a Boolean truth value using the expression +`bool(x)`. Other operations apply only to certain types. For +example, the indexing operation `a[i]` works only with strings, lists, +and tuples, and any application-defined types that are _indexable_. +The [_value concepts_](#value-concepts) section explains the groupings of +types by the operators they support. + + +### None + +`None` is a distinguished value used to indicate the absence of any other value. +For example, the result of a call to a function that contains no return statement is `None`. + +`None` is equal only to itself. Its [type](#type) is `"NoneType"`. +The truth value of `None` is `False`. + + +### Booleans + +There are two Boolean values, `True` and `False`, representing the +truth or falsehood of a predicate. The [type](#type) of a Boolean is `"bool"`. + +Boolean values are typically used as conditions in `if`-statements, +although any Starlark value used as a condition is implicitly +interpreted as a Boolean. +For example, the values `None`, `0`, `0.0`, and the empty sequences +`""`, `()`, `[]`, and `{}` have a truth value of `False`, whereas non-zero +numbers and non-empty sequences have a truth value of `True`. +Application-defined types determine their own truth value. +Any value may be explicitly converted to a Boolean using the built-in `bool` +function. + +```python +1 + 1 == 2 # True +2 + 2 == 5 # False + +if 1 + 1: + print("True") +else: + print("False") +``` + +### Integers + +The Starlark integer type represents integers. Its [type](#type) is `"int"`. + +Integers may be positive or negative, and arbitrarily large. +Integer arithmetic is exact. +Integers are totally ordered; comparisons follow mathematical +tradition. + +The `+` and `-` operators perform addition and subtraction, respectively. +The `*` operator performs multiplication. + +The `//` and `%` operations on integers compute floored division and +remainder of floored division, respectively. +If the signs of the operands differ, the sign of the remainder `x % y` +matches that of the divisor, `y`. +For all finite x and y (y ≠ 0), `(x // y) * y + (x % y) == x`. +The `/` operator implements real division, and +yields a `float` result even when its operands are both of type `int`. + +Integers, including negative values, may be interpreted as bit vectors. +The `|`, `&`, and `^` operators implement bitwise OR, AND, and XOR, +respectively. The unary `~` operator yields the bitwise inversion of its +integer argument. The `<<` and `>>` operators shift the first argument +to the left or right by the number of bits given by the second argument. + +Any bool, number, or string may be interpreted as an integer by using +the `int` built-in function. + +An integer used in a Boolean context is considered true if it is +non-zero. + +```python +100 // 5 * 9 + 32 # 212 +3 // 2 # 1 +3 / 2 # 1.5 +111111111 * 111111111 # 12345678987654321 +"0x%x" % (0x1234 & 0xf00f) # "0x1004" +int("ffff", 16) # 65535, 0xffff +``` + +### Floating-point numbers + +The Starlark floating-point data type represents an IEEE 754 +double-precision floating-point number. Its [type](#type) is `"float"`. + +Arithmetic on floats using the `+`, `-`, `*`, `/`, `//`, and `%` + operators follows the IEE 754 standard. +However, computing the division or remainder of division by zero is a dynamic error. + +An arithmetic operation applied to a mixture of `float` and `int` +operands works as if the `int` operand is first converted to a +`float`. For example, `3.141 + 1` is equivalent to `3.141 + +float(1)`. +There are two floating-point division operators: +`x / y ` yields the floating-point quotient of `x` and `y`, +whereas `x // y` yields `floor(x / y)`, that is, the largest +integer value not greater than `x / y`. +Although the resulting number is integral, it is represented as a +`float` if either operand is a `float`. + +The `%` operation computes the remainder of floored division. +As with the corresponding operation on integers, +if the signs of the operands differ, the sign of the remainder `x % y` +matches that of the divisor, `y`. + +The infinite float values `+Inf` and `-Inf` represent numbers +greater/less than all finite float values. + +The non-finite `NaN` value represents the result of dubious operations +such as `Inf/Inf`. A NaN value compares neither less than, nor +greater than, nor equal to any value, including itself. + +All floats other than NaN are totally ordered, so they may be compared +using operators such as `==` and `<`. + +Any bool, number, or string may be interpreted as a floating-point +number by using the `float` built-in function. + +A float used in a Boolean context is considered true if it is +non-zero. + +```python +1.23e45 * 1.23e45 # 1.5129e+90 +1.111111111111111 * 1.111111111111111 # 1.23457 +3.0 / 2 # 1.5 +3 / 2.0 # 1.5 +float(3) / 2 # 1.5 +3.0 // 2.0 # 1 +``` + +### Strings + +A string represents an immutable sequence of bytes. +The [type](#type) of a string is `"string"`. + +Strings can represent arbitrary binary data, including zero bytes, but +most strings contain text, encoded by convention using UTF-8. + +The built-in `len` function returns the number of bytes in a string. + +Strings may be concatenated with the `+` operator. + +The substring expression `s[i:j]` returns the substring of `s` from +index `i` up to index `j`. The index expression `s[i]` returns the +1-byte substring `s[i:i+1]`. + +Strings are hashable, and thus may be used as keys in a dictionary. + +Strings are totally ordered lexicographically, so strings may be +compared using operators such as `==` and `<`. + +Strings are _not_ iterable sequences, so they cannot be used as the operand of +a `for`-loop, list comprehension, or any other operation than requires +an iterable sequence. +To obtain a view of a string as an iterable sequence of numeric byte +values, 1-byte substrings, numeric Unicode code points, or 1-code +point substrings, you must explicitly call one of its four methods: +`elems`, `elem_ords`, `codepoints`, or `codepoint_ords`. + +Any value may formatted as a string using the `str` or `repr` built-in +functions, the `str % tuple` operator, or the `str.format` method. + +A string used in a Boolean context is considered true if it is +non-empty. + +Strings have several built-in methods: + +* [`capitalize`](#string·capitalize) +* [`codepoint_ords`](#string·codepoint_ords) +* [`codepoints`](#string·codepoints) +* [`count`](#string·count) +* [`elem_ords`](#string·elem_ords) +* [`elems`](#string·elems) +* [`endswith`](#string·endswith) +* [`find`](#string·find) +* [`format`](#string·format) +* [`index`](#string·index) +* [`isalnum`](#string·isalnum) +* [`isalpha`](#string·isalpha) +* [`isdigit`](#string·isdigit) +* [`islower`](#string·islower) +* [`isspace`](#string·isspace) +* [`istitle`](#string·istitle) +* [`isupper`](#string·isupper) +* [`join`](#string·join) +* [`lower`](#string·lower) +* [`lstrip`](#string·lstrip) +* [`partition`](#string·partition) +* [`replace`](#string·replace) +* [`rfind`](#string·rfind) +* [`rindex`](#string·rindex) +* [`rpartition`](#string·rpartition) +* [`rsplit`](#string·rsplit) +* [`rstrip`](#string·rstrip) +* [`split`](#string·split) +* [`splitlines`](#string·splitlines) +* [`startswith`](#string·startswith) +* [`strip`](#string·strip) +* [`title`](#string·title) +* [`upper`](#string·upper) + +<b>Implementation note:</b> +The type of a string element varies across implementations. +There is agreement that byte strings, with text conventionally encoded +using UTF-8, is the ideal choice, but the Java implementation treats +strings as sequences of UTF-16 codes and changing it appears +intractible; see Google Issue b/36360490. + +<b>Implementation note:</b> +The Java implementation does not consistently treat strings as +iterable; see `testdata/string.star` in the test suite and Google Issue +b/34385336 for further details. + +### Lists + +A list is a mutable sequence of values. +The [type](#type) of a list is `"list"`. + +Lists are indexable sequences: the elements of a list may be iterated +over by `for`-loops, list comprehensions, and various built-in +functions. + +List may be constructed using bracketed list notation: + +```python +[] # an empty list +[1] # a 1-element list +[1, 2] # a 2-element list +``` + +Lists can also be constructed from any iterable sequence by using the +built-in `list` function. + +The built-in `len` function applied to a list returns the number of elements. +The index expression `list[i]` returns the element at index i, +and the slice expression `list[i:j]` returns a new list consisting of +the elements at indices from i to j. + +List elements may be added using the `append` or `extend` methods, +removed using the `remove` method, or reordered by assignments such as +`list[i] = list[j]`. + +The concatenation operation `x + y` yields a new list containing all +the elements of the two lists x and y. + +For most types, `x += y` is equivalent to `x = x + y`, except that it +evaluates `x` only once, that is, it allocates a new list to hold +the concatenation of `x` and `y`. +However, if `x` refers to a list, the statement does not allocate a +new list but instead mutates the original list in place, similar to +`x.extend(y)`. + +Lists are not hashable, so may not be used in the keys of a dictionary. + +A list used in a Boolean context is considered true if it is +non-empty. + +A [_list comprehension_](#comprehensions) creates a new list whose elements are the +result of some expression applied to each element of another sequence. + +```python +[x*x for x in [1, 2, 3, 4]] # [1, 4, 9, 16] +``` + +A list value has these methods: + +* [`append`](#list·append) +* [`clear`](#list·clear) +* [`extend`](#list·extend) +* [`index`](#list·index) +* [`insert`](#list·insert) +* [`pop`](#list·pop) +* [`remove`](#list·remove) + +### Tuples + +A tuple is an immutable sequence of values. +The [type](#type) of a tuple is `"tuple"`. + +Tuples are constructed using parenthesized list notation: + +```python +() # the empty tuple +(1,) # a 1-tuple +(1, 2) # a 2-tuple ("pair") +(1, 2, 3) # a 3-tuple +``` + +Observe that for the 1-tuple, the trailing comma is necessary to +distinguish it from the parenthesized expression `(1)`. +1-tuples are seldom used. + +Starlark, unlike Python, does not permit a trailing comma to appear in +an unparenthesized tuple expression: + +```python +for k, v, in dict.items(): pass # syntax error at 'in' +_ = [(v, k) for k, v, in dict.items()] # syntax error at 'in' +f = lambda a, b, : None # syntax error at ':' + +sorted(3, 1, 4, 1,) # ok +[1, 2, 3, ] # ok +{1: 2, 3:4, } # ok +``` + +Any iterable sequence may be converted to a tuple by using the +built-in `tuple` function. + +Like lists, tuples are indexed sequences, so they may be indexed and +sliced. The index expression `tuple[i]` returns the tuple element at +index i, and the slice expression `tuple[i:j]` returns a sub-sequence +of a tuple. + +Tuples are iterable sequences, so they may be used as the operand of a +`for`-loop, a list comprehension, or various built-in functions. + +Unlike lists, tuples cannot be modified. +However, the mutable elements of a tuple may be modified. + +Tuples are hashable (assuming their elements are hashable), +so they may be used as keys of a dictionary. + +Tuples may be concatenated using the `+` operator. + +A tuple used in a Boolean context is considered true if it is +non-empty. + + +### Dictionaries + +A dictionary is a mutable mapping from keys to values. +The [type](#type) of a dictionary is `"dict"`. + +Dictionaries provide constant-time operations to insert an element, to +look up the value for a key, or to remove an element. Dictionaries +are implemented using hash tables, so keys must be hashable. Hashable +values include `None`, Booleans, numbers, and strings, and tuples +composed from hashable values. Most mutable values, such as lists, +dictionaries, and sets, are not hashable, even when frozen. +Attempting to use a non-hashable value as a key in a dictionary +results in a dynamic error. + +A [dictionary expression](#dictionary-expressions) specifies a +dictionary as a set of key/value pairs enclosed in braces: + +```python +coins = { + "penny": 1, + "nickel": 5, + "dime": 10, + "quarter": 25, +} +``` + +The expression `d[k]`, where `d` is a dictionary and `k` is a key, +retrieves the value associated with the key. If the dictionary +contains no such item, the operation fails: + +```python +coins["penny"] # 1 +coins["dime"] # 10 +coins["silver dollar"] # error: key not found +``` + +The number of items in a dictionary `d` is given by `len(d)`. +A key/value item may be added to a dictionary, or updated if the key +is already present, by using `d[k]` on the left side of an assignment: + +```python +len(coins) # 4 +coins["shilling"] = 20 +len(coins) # 5, item was inserted +coins["shilling"] = 5 +len(coins) # 5, existing item was updated +``` + +A dictionary can also be constructed using a [dictionary +comprehension](#comprehension), which evaluates a pair of expressions, +the _key_ and the _value_, for every element of another iterable such +as a list. This example builds a mapping from each word to its length +in bytes: + +```python +words = ["able", "baker", "charlie"] +{x: len(x) for x in words} # {"charlie": 7, "baker": 5, "able": 4} +``` + +Dictionaries are iterable sequences, so they may be used as the +operand of a `for`-loop, a list comprehension, or various built-in +functions. +Iteration yields the dictionary's keys in the order in which they were +inserted; updating the value associated with an existing key does not +affect the iteration order. + +```python +x = dict([("a", 1), ("b", 2)]) # {"a": 1, "b": 2} +x.update([("a", 3), ("c", 4)]) # {"a": 3, "b": 2, "c": 4} +``` + +```python +for name in coins: + print(name, coins[name]) # prints "quarter 25", "dime 10", ... +``` + +Like all mutable values in Starlark, a dictionary can be frozen, and +once frozen, all subsequent operations that attempt to update it will +fail. + +A dictionary used in a Boolean context is considered true if it is +non-empty. + +Dictionaries may be compared for equality using `==` and `!=`. Two +dictionaries compare equal if they contain the same number of items +and each key/value item (k, v) found in one dictionary is also present +in the other. Dictionaries are not ordered; it is an error to compare +two dictionaries with `<`. + + +A dictionary value has these methods: + +* [`clear`](#dict·clear) +* [`get`](#dict·get) +* [`items`](#dict·items) +* [`keys`](#dict·keys) +* [`pop`](#dict·pop) +* [`popitem`](#dict·popitem) +* [`setdefault`](#dict·setdefault) +* [`update`](#dict·update) +* [`values`](#dict·values) + +### Sets + +A set is a mutable set of values. +The [type](#type) of a set is `"set"`. + +Like dictionaries, sets are implemented using hash tables, so the +elements of a set must be hashable. + +Sets may be compared for equality or inequality using `==` and `!=`. +Two sets compare equal if they contain the same elements. + +Sets are iterable sequences, so they may be used as the operand of a +`for`-loop, a list comprehension, or various built-in functions. +Iteration yields the set's elements in the order in which they were +inserted. + +The binary `|` and `&` operators compute union and intersection when +applied to sets. The right operand of the `|` operator may be any +iterable value. The binary `in` operator performs a set membership +test when its right operand is a set. + +The binary `^` operator performs symmetric difference of two sets. + +Sets are instantiated by calling the built-in `set` function, which +returns a set containing all the elements of its optional argument, +which must be an iterable sequence. Sets have no literal syntax. + +The only method of a set is `union`, which is equivalent to the `|` operator. + +A set used in a Boolean context is considered true if it is non-empty. + +<b>Implementation note:</b> +The Go implementation of Starlark requires the `-set` flag to +enable support for sets. +The Java implementation does not support sets. + + +### Functions + +A function value represents a function defined in Starlark. +Its [type](#type) is `"function"`. +A function value used in a Boolean context is always considered true. + +Functions defined by a [`def` statement](#function-definitions) are named; +functions defined by a [`lambda` expression](#lambda-expressions) are anonymous. + +Function definitions may be nested, and an inner function may refer to a local variable of an outer function. + +A function definition defines zero or more named parameters. +Starlark has a rich mechanism for passing arguments to functions. + +<!-- TODO break up this explanation into caller-side and callee-side + parts, and put the former under function calls and the latter + under function definitions. Also try to convey that the Callable + interface sees the flattened-out args and kwargs and that's what + built-ins get. +--> + +The example below shows a definition and call of a function of two +required parameters, `x` and `y`. + +```python +def idiv(x, y): + return x // y + +idiv(6, 3) # 2 +``` + +A call may provide arguments to function parameters either by +position, as in the example above, or by name, as in first two calls +below, or by a mixture of the two forms, as in the third call below. +All the positional arguments must precede all the named arguments. +Named arguments may improve clarity, especially in functions of +several parameters. + +```python +idiv(x=6, y=3) # 2 +idiv(y=3, x=6) # 2 + +idiv(6, y=3) # 2 +``` + +<b>Optional parameters:</b> A parameter declaration may specify a +default value using `name=value` syntax; such a parameter is +_optional_. The default value expression is evaluated during +execution of the `def` statement or evaluation of the `lambda` +expression, and the default value forms part of the function value. +All optional parameters must follow all non-optional parameters. +A function call may omit arguments for any suffix of the optional +parameters; the effective values of those arguments are supplied by +the function's parameter defaults. + +```python +def f(x, y=3): + return x, y + +f(1, 2) # (1, 2) +f(1) # (1, 3) +``` + +If a function parameter's default value is a mutable expression, +modifications to the value during one call may be observed by +subsequent calls. +Beware of this when using lists or dicts as default values. +If the function becomes frozen, its parameters' default values become +frozen too. + +```python +# module a.star +def f(x, list=[]): + list.append(x) + return list + +f(4, [1,2,3]) # [1, 2, 3, 4] +f(1) # [1] +f(2) # [1, 2], not [2]! + +# module b.star +load("a.star", "f") +f(3) # error: cannot append to frozen list +``` + +<b>Variadic functions:</b> Some functions allow callers to provide an +arbitrary number of arguments. +After all required and optional parameters, a function definition may +specify a _variadic arguments_ or _varargs_ parameter, indicated by a +star preceding the parameter name: `*args`. +Any surplus positional arguments provided by the caller are formed +into a tuple and assigned to the `args` parameter. + +```python +def f(x, y, *args): + return x, y, args + +f(1, 2) # (1, 2, ()) +f(1, 2, 3, 4) # (1, 2, (3, 4)) +``` + +<b>Keyword-variadic functions:</b> Some functions allow callers to +provide an arbitrary sequence of `name=value` keyword arguments. +A function definition may include a final _keyword arguments_ or +_kwargs_ parameter, indicated by a double-star preceding the parameter +name: `**kwargs`. +Any surplus named arguments that do not correspond to named parameters +are collected in a new dictionary and assigned to the `kwargs` parameter: + +```python +def f(x, y, **kwargs): + return x, y, kwargs + +f(1, 2) # (1, 2, {}) +f(x=2, y=1) # (2, 1, {}) +f(x=2, y=1, z=3) # (2, 1, {"z": 3}) +``` + +It is a static error if any two parameters of a function have the same name. + +Just as a function definition may accept an arbitrary number of +positional or named arguments, a function call may provide an +arbitrary number of positional or named arguments supplied by a +list or dictionary: + +```python +def f(a, b, c=5): + return a * b + c + +f(*[2, 3]) # 11 +f(*[2, 3, 7]) # 13 +f(*[2]) # error: f takes at least 2 arguments (1 given) + +f(**dict(b=3, a=2)) # 11 +f(**dict(c=7, a=2, b=3)) # 13 +f(**dict(a=2)) # error: f takes at least 2 arguments (1 given) +f(**dict(d=4)) # error: f got unexpected keyword argument "d" +``` + +Once the parameters have been successfully bound to the arguments +supplied by the call, the sequence of statements that comprise the +function body is executed. + +It is a static error if a function call has two named arguments of the +same name, such as `f(x=1, x=2)`. A call that provides a `**kwargs` +argument may yet have two values for the same name, such as +`f(x=1, **dict(x=2))`. This results in a dynamic error. + +Function arguments are evaluated in the order they appear in the call. +<!-- see https://github.com/bazelbuild/starlark/issues/13 --> + +Unlike Python, Starlark does not allow more than one `*args` argument in a +call, and if a `*args` argument is present it must appear after all +positional and named arguments. + +The final argument to a function call may be followed by a trailing comma. + +A function call completes normally after the execution of either a +`return` statement, or of the last statement in the function body. +The result of the function call is the value of the return statement's +operand, or `None` if the return statement had no operand or if the +function completeted without executing a return statement. + +```python +def f(x): + if x == 0: + return + if x < 0: + return -x + print(x) + +f(1) # returns None after printing "1" +f(0) # returns None without printing +f(-1) # returns 1 without printing +``` + +<b>Implementation note:</b> +The Go implementation of Starlark requires the `-recursion` +flag to allow recursive functions. + + +If the `-recursion` flag is not specified it is a dynamic error for a +function to call itself or another function value with the same +declaration. + +```python +def fib(x): + if x < 2: + return x + return fib(x-2) + fib(x-1) # dynamic error: function fib called recursively + +fib(5) +``` + +This rule, combined with the invariant that all loops are iterations +over finite sequences, implies that Starlark programs can not be +Turing complete unless the `-recursion` flag is specified. + +<!-- This rule is supposed to deter people from abusing Starlark for + inappropriate uses, especially in the build system. + It may work for that purpose, but it doesn't stop Starlark programs + from consuming too much time or space. Perhaps it should be a + dialect option. +--> + + + +### Built-in functions + +A built-in function is a function or method implemented in Go by the interpreter +or the application into which the interpreter is embedded. + +The [type](#type) of a built-in function is `"builtin_function_or_method"`. + +A built-in function value used in a Boolean context is always considered true. + +Many built-in functions are predeclared in the environment +(see [Name Resolution](#name-resolution)). +Some built-in functions such as `len` are _universal_, that is, +available to all Starlark programs. +The host application may predeclare additional built-in functions +in the environment of a specific module. + +Except where noted, built-in functions accept only positional arguments. +The parameter names serve merely as documentation. + +Most built-in functions that have a Boolean parameter require its +argument to be `True` or `False`. Unlike `if` statements, other values +are not implicitly converted to their truth value and instead cause a +dynamic error. + + +## Name binding and variables + +After a Starlark file is parsed, but before its execution begins, the +Starlark interpreter checks statically that the program is well formed. +For example, `break` and `continue` statements may appear only within +a loop; a `return` statement may appear only within a +function; and `load` statements may appear only outside any function. + +_Name resolution_ is the static checking process that +resolves names to variable bindings. +During execution, names refer to variables. Statically, names denote +places in the code where variables are created; these places are +called _bindings_. A name may denote different bindings at different +places in the program. The region of text in which a particular name +refers to the same binding is called that binding's _scope_. + +Four Starlark constructs bind names, as illustrated in the example below: +`load` statements (`a` and `b`), +`def` statements (`c`), +function parameters (`d`), +and assignments (`e`, `h`, including the augmented assignment `e += 1`). +Variables may be assigned or re-assigned explicitly (`e`, `h`), or implicitly, as +in a `for`-loop (`f`) or comprehension (`g`, `i`). + +```python +load("lib.star", "a", b="B") + +def c(d): + e = 0 + for f in d: + print([True for g in f]) + e += 1 + +h = [2*i for i in a] +``` + +The environment of a Starlark program is structured as a tree of +_lexical blocks_, each of which may contain name bindings. +The tree of blocks is parallel to the syntax tree. +Blocks are of five kinds. + +<!-- Avoid the term "built-in" block since that's also a type. --> +At the root of the tree is the _predeclared_ block, +which binds several names implicitly. +The set of predeclared names includes the universal +constant values `None`, `True`, and `False`, and +various built-in functions such as `len` and `list`; +these functions are immutable and stateless. +An application may pre-declare additional names +to provide domain-specific functions to that file, for example. +These additional functions may have side effects on the application. +Starlark programs cannot change the set of predeclared bindings +or assign new values to them. + +Nested beneath the predeclared block is the _module_ block, +which contains the bindings of the current module. +Bindings in the module block (such as `c`, and `h` in the +example) are called _global_ and may be visible to other modules. +The module block is empty at the start of the file +and is populated by top-level binding statements. + +Nested beneath the module block is the _file_ block, +which contains bindings local to the current file. +Names in this block (such as `a` and `b` in the example) +are bound only by `load` statements. +The sets of names bound in the file block and in the module block do not overlap: +it is an error for a load statement to bind the name of a global, +or for a top-level statement to bind a name bound by a load statement. + +A file block contains a _function_ block for each top-level +function, and a _comprehension_ block for each top-level comprehension. +Bindings in either of these kinds of block, +and in the file block itself, are called _local_. +(In the example, the bindings for `e`, `f`, `g`, and `i` are all local.) +Additional functions and comprehensions, and their blocks, may be +nested in any order, to any depth. + +If name is bound anywhere within a block, all uses of the name within +the block are treated as references to that binding, +even if the use appears before the binding. +This is true even at the top level, unlike Python. +The binding of `y` on the last line of the example below makes `y` +local to the function `hello`, so the use of `y` in the print +statement also refers to the local `y`, even though it appears +earlier. + +```python +y = "goodbye" + +def hello(): + for x in (1, 2): + if x == 2: + print(y) # prints "hello" + if x == 1: + y = "hello" +``` +It is a dynamic error to evaluate a reference to a local variable +before it has been bound: + +```python +def f(): + print(x) # dynamic error: local variable x referenced before assignment + x = "hello" +``` + +The same is true for global variables: + +```python +print(x) # dynamic error: global variable x referenced before assignment +x = "hello" +``` + +The same is also true for nested loops in comprehensions. +In the (unnatural) examples below, the scope of the variables `x`, `y`, +and `z` is the entire compehension block, except the operand of the first +loop (`[]` or `[1]`), which is resolved in the enclosing environment. +The second loop may thus refer to variables defined by the third (`z`), +even though such references would fail if actually executed. + +``` +[1//0 for x in [] for y in z for z in ()] # [] (no error) +[1//0 for x in [1] for y in z for z in ()] # dynamic error: local variable z referenced before assignment +``` + + +<!-- This is similar to Python[23]. Presumed rational: it resembles + the desugaring to nested loop statements, in which the scope + of all three variables is the entire enclosing function, + including the portion before the bindings. + + def f(): + ... + for x in []: + for y in z: + for z in (): + 1//0 +--> + +It is a static error to refer to a name that has no binding at all. +``` +def f(): + if False: + g() # static error: undefined: g +``` +(This behavior differs from Python, which treats such references as global, +and thus does not report an error until the expression is evaluated.) + +<!-- Consequently, the REPL, which consumes one compound statement at a time, + cannot resolve forward references such as + def f(): return K + K = 1 + because the first chunk has an unresolved reference to K. +--> + +It is a static error to bind a global variable already explicitly bound in the file: + +```python +x = 1 +x = 2 # static error: cannot reassign global x declared on line 1 +``` + +<!-- The above rule, and the rule that forbids if-statements and loops at + top level, exist to ensure that there is exactly one statement + that binds each global variable, which makes cross-referenced + documentation more useful, the designers assure me, but + I am skeptical that it's worth the trouble. --> + +If a name was pre-bound by the application, the Starlark program may +explicitly bind it, but only once. + +An augmented assignment statement such as `x += y` is considered both a +reference to `x` and a binding use of `x`, so it may not be used at +top level. + +<b>Implementation note:</b> +The Go implementation of Starlark permits augmented assignments to appear +at top level if the `-globalreassign` flag is enabled. + +A function may refer to variables defined in an enclosing function. +In this example, the inner function `f` refers to a variable `x` +that is local to the outer function `squarer`. +`x` is a _free variable_ of `f`. +The function value (`f`) created by a `def` statement holds a +reference to each of its free variables so it may use +them even after the enclosing function has returned. + +```python +def squarer(): + x = [0] + def f(): + x[0] += 1 + return x[0]*x[0] + return f + +sq = squarer() +print(sq(), sq(), sq(), sq()) # "1 4 9 16" +``` + +An inner function cannot assign to a variable bound in an enclosing +function, because the assignment would bind the variable in the +inner function. +In the example below, the `x += 1` statement binds `x` within `f`, +hiding the outer `x`. +Execution fails because the inner `x` has not been assigned before the +attempt to increment it. + +```python +def squarer(): + x = 0 + def f(): + x += 1 # dynamic error: local variable x referenced before assignment + return x*x + return f + +sq = squarer() +``` + +(Starlark has no equivalent of Python's `nonlocal` or `global` +declarations, but as the first version of `squarer` showed, this +omission can be worked around by using a list of a single element.) + + +A name appearing after a dot, such as `split` in +`get_filename().split('/')`, is not resolved statically. +The [dot expression](#dot-expressions) `.split` is a dynamic operation +on the value returned by `get_filename()`. + + +## Value concepts + +Starlark has eleven core [data types](#data-types). An application +that embeds the Starlark intepreter may define additional types that +behave like Starlark values. All values, whether core or +application-defined, implement a few basic behaviors: + +```text +str(x) -- return a string representation of x +type(x) -- return a string describing the type of x +bool(x) -- convert x to a Boolean truth value +``` + +### Identity and mutation + +Starlark is an imperative language: programs consist of sequences of +statements executed for their side effects. +For example, an assignment statement updates the value held by a +variable, and calls to some built-in functions such as `print` change +the state of the application that embeds the interpreter. + +Values of some data types, such as `NoneType`, `bool`, `int`, `float`, and +`string`, are _immutable_; they can never change. +Immutable values have no notion of _identity_: it is impossible for a +Starlark program to tell whether two integers, for instance, are +represented by the same object; it can tell only whether they are +equal. + +Values of other data types, such as `list`, `dict`, and `set`, are +_mutable_: they may be modified by a statement such as `a[i] = 0` or +`items.clear()`. Although `tuple` and `function` values are not +directly mutable, they may refer to mutable values indirectly, so for +this reason we consider them mutable too. Starlark values of these +types are actually _references_ to variables. + +Copying a reference to a variable, using an assignment statement for +instance, creates an _alias_ for the variable, and the effects of +operations applied to the variable through one alias are visible +through all others. + +```python +x = [] # x refers to a new empty list variable +y = x # y becomes an alias for x +x.append(1) # changes the variable referred to by x +print(y) # "[1]"; y observes the mutation +``` + +Starlark uses _call-by-value_ parameter passing: in a function call, +argument values are assigned to function parameters as if by +assignment statements. If the values are references, the caller and +callee may refer to the same variables, so if the called function +changes the variable referred to by a parameter, the effect may also +be observed by the caller: + +```python +def f(y): + y.append(1) # changes the variable referred to by x + +x = [] # x refers to a new empty list variable +f(x) # f's parameter y becomes an alias for x +print(x) # "[1]"; x observes the mutation +``` + + +As in all imperative languages, understanding _aliasing_, the +relationship between reference values and the variables to which they +refer, is crucial to writing correct programs. + +### Freezing a value + +Starlark has a feature unusual among imperative programming languages: +a mutable value may be _frozen_ so that all subsequent attempts to +mutate it fail with a dynamic error; the value, and all other values +reachable from it, become _immutable_. + +Immediately after execution of a Starlark module, all values in its +top-level environment are frozen. Because all the global variables of +an initialized Starlark module are immutable, the module may be published to +and used by other threads in a parallel program without the need for +locks. For example, the Bazel build system loads and executes BUILD +and .bzl files in parallel, and two modules being executed +concurrently may freely access variables or call functions from a +third without the possibility of a race condition. + +### Hashing + +The `dict` and `set` data types are implemented using hash tables, so +only _hashable_ values are suitable as keys of a `dict` or elements of +a `set`. Attempting to use a non-hashable value as the key in a hash +table results in a dynamic error. + +The hash of a value is an unspecified integer chosen so that two equal +values have the same hash, in other words, `x == y => hash(x) == hash(y)`. +A hashable value has the same hash throughout its lifetime. + +Values of the types `NoneType`, `bool`, `int`, `float`, and `string`, +which are all immutable, are hashable. + +Values of mutable types such as `list`, `dict`, and `set` are not +hashable. These values remain unhashable even if they have become +immutable due to _freezing_. + +A `tuple` value is hashable only if all its elements are hashable. +Thus `("localhost", 80)` is hashable but `([127, 0, 0, 1], 80)` is not. + +Values of the types `function` and `builtin_function_or_method` are also hashable. +Although functions are not necessarily immutable, as they may be +closures that refer to mutable variables, instances of these types +are compared by reference identity (see [Comparisons](#comparisons)), +so their hash values are derived from their identity. + + +### Sequence types + +Many Starlark data types represent a _sequence_ of values: lists, +tuples, and sets are sequences of arbitrary values, and in many +contexts dictionaries act like a sequence of their keys. + +We can classify different kinds of sequence types based on the +operations they support. +Each is listed below using the name of its corresponding interface in +the interpreter's Go API. + +* `Iterable`: an _iterable_ value lets us process each of its elements in a fixed order. + Examples: `dict`, `set`, `list`, `tuple`, but not `string`. +* `Sequence`: a _sequence of known length_ lets us know how many elements it + contains without processing them. + Examples: `dict`, `set`, `list`, `tuple`, but not `string`. +* `Indexable`: an _indexed_ type has a fixed length and provides efficient + random access to its elements, which are identified by integer indices. + Examples: `string`, `tuple`, and `list`. +* `SetIndexable`: a _settable indexed type_ additionally allows us to modify the + element at a given integer index. Example: `list`. +* `Mapping`: a mapping is an association of keys to values. Example: `dict`. + +Although all of Starlark's core data types for sequences implement at +least the `Sequence` contract, it's possible for an application +that embeds the Starlark interpreter to define additional data types +representing sequences of unknown length that implement only the `Iterable` contract. + +Strings are not iterable, though they do support the `len(s)` and +`s[i]` operations. Starlark deviates from Python here to avoid a common +pitfall in which a string is used by mistake where a list containing a +single string was intended, resulting in its interpretation as a sequence +of bytes. + +Most Starlark operators and built-in functions that need a sequence +of values will accept any iterable. + +It is a dynamic error to mutate a sequence such as a list, set, or +dictionary while iterating over it. + +```python +def increment_values(dict): + for k in dict: + dict[k] += 1 # error: cannot insert into hash table during iteration + +dict = {"one": 1, "two": 2} +increment_values(dict) +``` + + +### Indexing + +Many Starlark operators and functions require an index operand `i`, +such as `a[i]` or `list.insert(i, x)`. Others require two indices `i` +and `j` that indicate the start and end of a sub-sequence, such as +`a[i:j]`, `list.index(x, i, j)`, or `string.find(x, i, j)`. +All such operations follow similar conventions, described here. + +Indexing in Starlark is *zero-based*. The first element of a string +or list has index 0, the next 1, and so on. The last element of a +sequence of length `n` has index `n-1`. + +```python +"hello"[0] # "h" +"hello"[4] # "o" +"hello"[5] # error: index out of range +``` + +For sub-sequence operations that require two indices, the first is +_inclusive_ and the second _exclusive_. Thus `a[i:j]` indicates the +sequence starting with element `i` up to but not including element +`j`. The length of this sub-sequence is `j-i`. This convention is known +as *half-open indexing*. + +```python +"hello"[1:4] # "ell" +``` + +Either or both of the index operands may be omitted. If omitted, the +first is treated equivalent to 0 and the second is equivalent to the +length of the sequence: + +```python +"hello"[1:] # "ello" +"hello"[:4] # "hell" +``` + +It is permissible to supply a negative integer to an indexing +operation. The effective index is computed from the supplied value by +the following two-step procedure. First, if the value is negative, the +length of the sequence is added to it. This provides a convenient way +to address the final elements of the sequence: + +```python +"hello"[-1] # "o", like "hello"[4] +"hello"[-3:-1] # "ll", like "hello"[2:4] +``` + +Second, for sub-sequence operations, if the value is still negative, it +is replaced by zero, or if it is greater than the length `n` of the +sequence, it is replaced by `n`. In effect, the index is "truncated" to +the nearest value in the range `[0:n]`. + +```python +"hello"[-1000:+1000] # "hello" +``` + +This truncation step does not apply to indices of individual elements: + +```python +"hello"[-6] # error: index out of range +"hello"[-5] # "h" +"hello"[4] # "o" +"hello"[5] # error: index out of range +``` + + +## Expressions + +An expression specifies the computation of a value. + +The Starlark grammar defines several categories of expression. +An _operand_ is an expression consisting of a single token (such as an +identifier or a literal), or a bracketed expression. +Operands are self-delimiting. +An operand may be followed by any number of dot, call, or slice +suffixes, to form a _primary_ expression. +In some places in the Starlark grammar where an expression is expected, +it is legal to provide a comma-separated list of expressions denoting +a tuple. +The grammar uses `Expression` where a multiple-component expression is allowed, +and `Test` where it accepts an expression of only a single component. + +```grammar {.good} +Expression = Test {',' Test} . + +Test = LambdaExpr | IfExpr | PrimaryExpr | UnaryExpr | BinaryExpr . + +PrimaryExpr = Operand + | PrimaryExpr DotSuffix + | PrimaryExpr CallSuffix + | PrimaryExpr SliceSuffix + . + +Operand = identifier + | int | float | string + | ListExpr | ListComp + | DictExpr | DictComp + | '(' [Expression] [,] ')' + | ('-' | '+') PrimaryExpr + . + +DotSuffix = '.' identifier . +CallSuffix = '(' [Arguments [',']] ')' . +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . +``` + +TODO: resolve position of +x, -x, and 'not x' in grammar: Operand or UnaryExpr? + +### Identifiers + +```grammar {.good} {.good} +Primary = identifier +``` + +An identifier is a name that identifies a value. + +Lookup of locals and globals may fail if not yet defined. + +### Literals + +Starlark supports literals of three different kinds: + +```grammar {.good} +Primary = int | float | string +``` + +Evaluation of a literal yields a value of the given type (string, int, +or float) with the given value. +See [Literals](#lexical-elements) for details. + +### Parenthesized expressions + +```grammar {.good} +Primary = '(' [Expression] ')' +``` + +A single expression enclosed in parentheses yields the result of that expression. +Explicit parentheses may be used for clarity, +or to override the default association of subexpressions. + +```python +1 + 2 * 3 + 4 # 11 +(1 + 2) * (3 + 4) # 21 +``` + +If the parentheses are empty, or contain a single expression followed +by a comma, or contain two or more expressions, the expression yields a tuple. + +```python +() # (), the empty tuple +(1,) # (1,), a tuple of length 1 +(1, 2) # (1, 2), a 2-tuple or pair +(1, 2, 3) # (1, 2, 3), a 3-tuple or triple +``` + +In some contexts, such as a `return` or assignment statement or the +operand of a `for` statement, a tuple may be expressed without +parentheses. + +```python +x, y = 1, 2 + +return 1, 2 + +for x in 1, 2: + print(x) +``` + +Starlark (like Python 3) does not accept an unparenthesized tuple +expression as the operand of a list comprehension: + +```python +[2*x for x in 1, 2, 3] # parse error: unexpected ',' +``` + +### Dictionary expressions + +A dictionary expression is a comma-separated list of colon-separated +key/value expression pairs, enclosed in curly brackets, and it yields +a new dictionary object. +An optional comma may follow the final pair. + +```grammar {.good} +DictExpr = '{' [Entries [',']] '}' . +Entries = Entry {',' Entry} . +Entry = Test ':' Test . +``` + +Examples: + + +```python +{} +{"one": 1} +{"one": 1, "two": 2,} +``` + +The key and value expressions are evaluated in left-to-right order. +Evaluation fails if the same key is used multiple times. + +Only [hashable](#hashing) values may be used as the keys of a dictionary. +This includes all built-in types except dictionaries, sets, and lists; +a tuple is hashable only if its elements are hashable. + + +### List expressions + +A list expression is a comma-separated list of element expressions, +enclosed in square brackets, and it yields a new list object. +An optional comma may follow the last element expression. + +```grammar {.good} +ListExpr = '[' [Expression [',']] ']' . +``` + +Element expressions are evaluated in left-to-right order. + +Examples: + +```python +[] # [], empty list +[1] # [1], a 1-element list +[1, 2, 3,] # [1, 2, 3], a 3-element list +``` + +### Unary operators + +There are three unary operators, all appearing before their operand: +`+`, `-`, `~`, and `not`. + +```grammar {.good} +UnaryExpr = '+' PrimaryExpr + | '-' PrimaryExpr + | '~' PrimaryExpr + | 'not' Test + . +``` + +```text ++ number unary positive (int, float) +- number unary negation (int, float) +~ number unary bitwise inversion (int) +not x logical negation (any type) +``` + +The `+` and `-` operators may be applied to any number +(`int` or `float`) and return the number unchanged. +Unary `+` is never necessary in a correct program, +but may serve as an assertion that its operand is a number, +or as documentation. + +```python +if x > 0: + return +1 +else if x < 0: + return -1 +else: + return 0 +``` + +The `not` operator returns the negation of the truth value of its +operand. + +```python +not True # False +not False # True +not [1, 2, 3] # False +not "" # True +not 0 # True +``` + +The `~` operator yields the bitwise inversion of its integer argument. +The bitwise inversion of x is defined as -(x+1). + +```python +~1 # -2 +~-1 # 0 +~0 # -1 +``` + + +### Binary operators + +Starlark has the following binary operators, arranged in order of increasing precedence: + +```text +or +and +== != < > <= >= in not in +| +^ +& +<< >> +- + +* / // % +``` + +Comparison operators, `in`, and `not in` are non-associative, +so the parser will not accept `0 <= i < n`. +All other binary operators of equal precedence associate to the left. + +```grammar {.good} +BinaryExpr = Test {Binop Test} . + +Binop = 'or' + | 'and' + | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' + | '|' + | '^' + | '&' + | '-' | '+' + | '*' | '%' | '/' | '//' + | '<<' | '>>' + . +``` + +#### `or` and `and` + +The `or` and `and` operators yield, respectively, the logical disjunction and +conjunction of their arguments, which need not be Booleans. +The expression `x or y` yields the value of `x` if its truth value is `True`, +or the value of `y` otherwise. + +```starlark +False or False # False +False or True # True +True or False # True +True or True # True + +0 or "hello" # "hello" +1 or "hello" # 1 +``` + +Similarly, `x and y` yields the value of `x` if its truth value is +`False`, or the value of `y` otherwise. + +```starlark +False and False # False +False and True # False +True and False # False +True and True # True + +0 and "hello" # 0 +1 and "hello" # "hello" +``` + +These operators use "short circuit" evaluation, so the second +expression is not evaluated if the value of the first expression has +already determined the result, allowing constructions like these: + +```python +len(x) > 0 and x[0] == 1 # x[0] is not evaluated if x is empty +x and x[0] == 1 +len(x) == 0 or x[0] == "" +not x or not x[0] +``` + +#### Comparisons + +The `==` operator reports whether its operands are equal; the `!=` +operator is its negation. + +The operators `<`, `>`, `<=`, and `>=` perform an ordered comparison +of their operands. It is an error to apply these operators to +operands of unequal type, unless one of the operands is an `int` and +the other is a `float`. Of the built-in types, only the following +support ordered comparison, using the ordering relation shown: + +```shell +NoneType # None <= None +bool # False < True +int # mathematical +float # as defined by IEEE 754 +string # lexicographical +tuple # lexicographical +list # lexicographical +``` + +Comparison of floating point values follows the IEEE 754 standard, +which breaks several mathematical identities. For example, if `x` is +a `NaN` value, the comparisons `x < y`, `x == y`, and `x > y` all +yield false for all values of `y`. + +Applications may define additional types that support ordered +comparison. + +The remaining built-in types support only equality comparisons. +Values of type `dict` or `set` compare equal if their elements compare +equal, and values of type `function` or `builtin_function_or_method` are equal only to +themselves. + +```shell +dict # equal contents +set # equal contents +function # identity +builtin_function_or_method # identity +``` + +#### Arithmetic operations + +The following table summarizes the binary arithmetic operations +available for built-in types: + +```shell +Arithmetic (int or float; result has type float unless both operands have type int) + number + number # addition + number - number # subtraction + number * number # multiplication + number / number # real division (result is always a float) + number // number # floored division + number % number # remainder of floored division + number ^ number # bitwise XOR + number << number # bitwise left shift + number >> number # bitwise right shift + +Concatenation + string + string + list + list + tuple + tuple + +Repetition (string/list/tuple) + int * sequence + sequence * int + +String interpolation + string % any # see String Interpolation + +Sets + int | int # bitwise union (OR) + set | set # set union + int & int # bitwise intersection (AND) + set & set # set intersection + set ^ set # set symmetric difference +``` + +The operands of the arithmetic operators `+`, `-`, `*`, `//`, and +`%` must both be numbers (`int` or `float`) but need not have the same type. +The type of the result has type `int` only if both operands have that type. +The result of real division `/` always has type `float`. + +The `+` operator may be applied to non-numeric operands of the same +type, such as two lists, two tuples, or two strings, in which case it +computes the concatenation of the two operands and yields a new value of +the same type. + +```python +"Hello, " + "world" # "Hello, world" +(1, 2) + (3, 4) # (1, 2, 3, 4) +[1, 2] + [3, 4] # [1, 2, 3, 4] +``` + +The `*` operator may be applied to an integer _n_ and a value of type +`string`, `list`, or `tuple`, in which case it yields a new value +of the same sequence type consisting of _n_ repetitions of the original sequence. +The order of the operands is immaterial. +Negative values of _n_ behave like zero. + +```python +'mur' * 2 # 'murmur' +3 * range(3) # [0, 1, 2, 0, 1, 2, 0, 1, 2] +``` + +Applications may define additional types that support any subset of +these operators. + +The `&` operator requires two operands of the same type, either `int` or `set`. +For integers, it yields the bitwise intersection (AND) of its operands. +For sets, it yields a new set containing the intersection of the +elements of the operand sets, preserving the element order of the left +operand. + +The `|` operator likewise computes bitwise or set unions. +The result of `set | set` is a new set whose elements are the +union of the operands, preserving the order of the elements of the +operands, left before right. + +The `^` operator accepts operands of either `int` or `set` type. +For integers, it yields the bitwise XOR (exclusive OR) of its operands. +For sets, it yields a new set containing elements of either first or second +operand but not both (symmetric difference). + +The `<<` and `>>` operators require operands of `int` type both. They shift +the first operand to the left or right by the number of bits given by the +second operand. It is a dynamic error if the second operand is negative. +Implementations may impose a limit on the second operand of a left shift. + +```python +0x12345678 & 0xFF # 0x00000078 +0x12345678 | 0xFF # 0x123456FF +0b01011101 ^ 0b110101101 # 0b111110000 +0b01011101 >> 2 # 0b010111 +0b01011101 << 2 # 0b0101110100 + +set([1, 2]) & set([2, 3]) # set([2]) +set([1, 2]) | set([2, 3]) # set([1, 2, 3]) +set([1, 2]) ^ set([2, 3]) # set([1, 3]) +``` + +<b>Implementation note:</b> +The Go implementation of Starlark requires the `-set` flag to +enable support for sets. +The Java implementation does not support sets. + + +#### Membership tests + +```text + any in sequence (list, tuple, dict, set, string) + any not in sequence +``` + +The `in` operator reports whether its first operand is a member of its +second operand, which must be a list, tuple, dict, set, or string. +The `not in` operator is its negation. +Both return a Boolean. + +The meaning of membership varies by the type of the second operand: +the members of a list, tuple, or set are its elements; +the members of a dict are its keys; +the members of a string are all its substrings. + +```python +1 in [1, 2, 3] # True +4 in (1, 2, 3) # False +4 not in set([1, 2, 3]) # True + +d = {"one": 1, "two": 2} +"one" in d # True +"three" in d # False +1 in d # False +[] in d # False + +"nasty" in "dynasty" # True +"a" in "banana" # True +"f" not in "way" # True +``` + +#### String interpolation + +The expression `format % args` performs _string interpolation_, a +simple form of template expansion. +The `format` string is interpreted as a sequence of literal portions +and _conversions_. +Each conversion, which starts with a `%` character, is replaced by its +corresponding value from `args`. +The characters following `%` in each conversion determine which +argument it uses and how to convert it to a string. + +Each `%` character marks the start of a conversion specifier, unless +it is immediately followed by another `%`, in which case both +characters together denote a literal percent sign. + +If the `"%"` is immediately followed by `"(key)"`, the parenthesized +substring specifies the key of the `args` dictionary whose +corresponding value is the operand to convert. +Otherwise, the conversion's operand is the next element of `args`, +which must be a tuple with exactly one component per conversion, +unless the format string contains only a single conversion, in which +case `args` itself is its operand. + +Starlark does not support the flag, width, and padding specifiers +supported by Python's `%` and other variants of C's `printf`. + +After the optional `(key)` comes a single letter indicating what +operand types are valid and how to convert the operand `x` to a string: + +```text +% none literal percent sign +s any as if by str(x) +r any as if by repr(x) +d number signed integer decimal +i number signed integer decimal +o number signed octal +x number signed hexadecimal, lowercase +X number signed hexadecimal, uppercase +e number float exponential format, lowercase +E number float exponential format, uppercase +f number float decimal format, lowercase +F number float decimal format, uppercase +g number like %e for large exponents, %f otherwise +G number like %E for large exponents, %F otherwise +c string x (string must encode a single Unicode code point) + int as if by chr(x) +``` + +It is an error if the argument does not have the type required by the +conversion specifier. A Boolean argument is not considered a number. + +Examples: + +```python +"Hello %s, your score is %d" % ("Bob", 75) # "Hello Bob, your score is 75" + +"%d %o %x %c" % (65, 65, 65, 65) # "65 101 41 A" (decimal, octal, hexadecimal, Unicode) + +"%(greeting)s, %(audience)s" % dict( # "Hello, world" + greeting="Hello", + audience="world", +) + +"rate = %g%% APR" % 3.5 # "rate = 3.5% APR" +``` + +One subtlety: to use a tuple as the operand of a conversion in format +string containing only a single conversion, you must wrap the tuple in +a singleton tuple: + +```python +"coordinates=%s" % (40.741491, -74.003680) # error: too many arguments for format string +"coordinates=%s" % ((40.741491, -74.003680),) # "coordinates=(40.741491, -74.003680)" +``` + +TODO: specify `%e` and `%f` more precisely. + +### Conditional expressions + +A conditional expression has the form `a if cond else b`. +It first evaluates the condition `cond`. +If it's true, it evaluates `a` and yields its value; +otherwise it yields the value of `b`. + +```grammar {.good} +IfExpr = Test 'if' Test 'else' Test . +``` + +Example: + +```python +"yes" if enabled else "no" +``` + +### Comprehensions + +A comprehension constructs new list or dictionary value by looping +over one or more iterables and evaluating a _body_ expression that produces +successive elements of the result. + +A list comprehension consists of a single expression followed by one +or more _clauses_, the first of which must be a `for` clause. +Each `for` clause resembles a `for` statement, and specifies an +iterable operand and a set of variables to be assigned by successive +values of the iterable. +An `if` cause resembles an `if` statement, and specifies a condition +that must be met for the body expression to be evaluated. +A sequence of `for` and `if` clauses acts like a nested sequence of +`for` and `if` statements. + +```grammar {.good} +ListComp = '[' Test {CompClause} ']'. +DictComp = '{' Entry {CompClause} '}' . + +CompClause = 'for' LoopVariables 'in' Test + | 'if' Test . + +LoopVariables = PrimaryExpr {',' PrimaryExpr} . +``` + +Examples: + +```python +[x*x for x in range(5)] # [0, 1, 4, 9, 16] +[x*x for x in range(5) if x%2 == 0] # [0, 4, 16] +[(x, y) for x in range(5) + if x%2 == 0 + for y in range(5) + if y > x] # [(0, 1), (0, 2), (0, 3), (0, 4), (2, 3), (2, 4)] +``` + +A dict comprehension resembles a list comprehension, but its body is a +pair of expressions, `key: value`, separated by a colon, +and its result is a dictionary containing the key/value pairs +for which the body expression was evaluated. +Evaluation fails if the value of any key is unhashable. + +As with a `for` loop, the loop variables may exploit compound +assignment: + +```python +[x*y+z for (x, y), z in [((2, 3), 5), (("o", 2), "!")]] # [11, 'oo!'] +``` + +Starlark, following Python 3, does not accept an unparenthesized +tuple or lambda expression as the operand of a `for` clause: + +```python +[x*x for x in 1, 2, 3] # parse error: unexpected comma +[x*x for x in lambda: 0] # parse error: unexpected lambda +``` + +Comprehensions in Starlark, again following Python 3, define a new lexical +block, so assignments to loop variables have no effect on variables of +the same name in an enclosing block: + +```python +x = 1 +_ = [x for x in [2]] # new variable x is local to the comprehension +print(x) # 1 +``` + +The operand of a comprehension's first clause (always a `for`) is +resolved in the lexical block enclosing the comprehension. +In the examples below, identifiers referring to the outer variable +named `x` have been distinguished by subscript. + +```python +x₀ = (1, 2, 3) +[x*x for x in x₀] # [1, 4, 9] +[x*x for x in x₀ if x%2 == 0] # [4] +``` + +All subsequent `for` and `if` expressions are resolved within the +comprehension's lexical block, as in this rather obscure example: + +```python +x₀ = ([1, 2], [3, 4], [5, 6]) +[x*x for x in x₀ for x in x if x%2 == 0] # [4, 16, 36] +``` + +which would be more clearly rewritten as: + +```python +x = ([1, 2], [3, 4], [5, 6]) +[z*z for y in x for z in y if z%2 == 0] # [4, 16, 36] +``` + + +### Function and method calls + +```grammar {.good} +CallSuffix = '(' [Arguments [',']] ')' . + +Arguments = Argument {',' Argument} . +Argument = Test | identifier '=' Test | '*' Test | '**' Test . +``` + +A value `f` of type `function` or `builtin_function_or_method` may be called using the expression `f(...)`. +Applications may define additional types whose values may be called in the same way. + +A method call such as `filename.endswith(".star")` is the composition +of two operations, `m = filename.endswith` and `m(".star")`. +The first, a dot operation, yields a _bound method_, a function value +that pairs a receiver value (the `filename` string) with a choice of +method ([string·endswith](#string·endswith)). + +Only built-in or application-defined types may have methods. + +See [Functions](#functions) for an explanation of function parameter passing. + +### Dot expressions + +A dot expression `x.f` selects the attribute `f` (a field or method) +of the value `x`. + +Fields are possessed by none of the main Starlark [data types](#data-types), +but some application-defined types have them. +Methods belong to the built-in types `string`, `list`, `dict`, and +`set`, and to many application-defined types. + +```grammar {.good} +DotSuffix = '.' identifier . +``` + +A dot expression fails if the value does not have an attribute of the +specified name. + +Use the built-in function `hasattr(x, "f")` to ascertain whether a +value has a specific attribute, or `dir(x)` to enumerate all its +attributes. The `getattr(x, "f")` function can be used to select an +attribute when the name `"f"` is not known statically. + +A dot expression that selects a method typically appears within a call +expression, as in these examples: + +```python +["able", "baker", "charlie"].index("baker") # 1 +"banana".count("a") # 3 +"banana".reverse() # error: string has no .reverse field or method +``` + +But when not called immediately, the dot expression evaluates to a +_bound method_, that is, a method coupled to a specific receiver +value. A bound method can be called like an ordinary function, +without a receiver argument: + +```python +f = "banana".count +f # <built-in method count of string value> +f("a") # 3 +f("n") # 2 +``` + +### Index expressions + +An index expression `a[i]` yields the `i`th element of an _indexable_ +type such as a string, tuple, or list. The index `i` must be an `int` +value in the range -`n` ≤ `i` < `n`, where `n` is `len(a)`; any other +index results in an error. + +```grammar {.good} +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . +``` + +A valid negative index `i` behaves like the non-negative index `n+i`, +allowing for convenient indexing relative to the end of the +sequence. + +```python +"abc"[0] # "a" +"abc"[1] # "b" +"abc"[-1] # "c" + +("zero", "one", "two")[0] # "zero" +("zero", "one", "two")[1] # "one" +("zero", "one", "two")[-1] # "two" +``` + +An index expression `d[key]` may also be applied to a dictionary `d`, +to obtain the value associated with the specified key. It is an error +if the dictionary contains no such key. + +An index expression appearing on the left side of an assignment causes +the specified list or dictionary element to be updated: + +```starlark +a = range(3) # a == [0, 1, 2] +a[2] = 7 # a == [0, 1, 7] + +coins["suzie b"] = 100 +``` + +It is a dynamic error to attempt to update an element of an immutable +type, such as a tuple or string, or a frozen value of a mutable type. + +### Slice expressions + +A slice expression `a[start:stop:stride]` yields a new value containing a +sub-sequence of `a`, which must be a string, tuple, or list. + +```grammar {.good} +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . +``` + +Each of the `start`, `stop`, and `stride` operands is optional; +if present, and not `None`, each must be an integer. +The `stride` value defaults to 1. +If the stride is not specified, the colon preceding it may be omitted too. +It is an error to specify a stride of zero. + +Conceptually, these operands specify a sequence of values `i` starting +at `start` and successively adding `stride` until `i` reaches or +passes `stop`. The result consists of the concatenation of values of +`a[i]` for which `i` is valid.` + +The effective start and stop indices are computed from the three +operands as follows. Let `n` be the length of the sequence. + +<b>If the stride is positive:</b> +If the `start` operand was omitted, it defaults to -infinity. +If the `end` operand was omitted, it defaults to +infinity. +For either operand, if a negative value was supplied, `n` is added to it. +The `start` and `end` values are then "clamped" to the +nearest value in the range 0 to `n`, inclusive. + +<b>If the stride is negative:</b> +If the `start` operand was omitted, it defaults to +infinity. +If the `end` operand was omitted, it defaults to -infinity. +For either operand, if a negative value was supplied, `n` is added to it. +The `start` and `end` values are then "clamped" to the +nearest value in the range -1 to `n`-1, inclusive. + +```python +"abc"[1:] # "bc" (remove first element) +"abc"[:-1] # "ab" (remove last element) +"abc"[1:-1] # "b" (remove first and last element) +"banana"[1::2] # "aaa" (select alternate elements starting at index 1) +"banana"[4::-2] # "nnb" (select alternate elements in reverse, starting at index 4) +``` + +Unlike Python, Starlark does not allow a slice expression on the left +side of an assignment. + +Slicing a tuple or string may be more efficient than slicing a list +because tuples and strings are immutable, so the result of the +operation can share the underlying representation of the original +operand (when the stride is 1). By contrast, slicing a list requires +the creation of a new list and copying of the necessary elements. + +<!-- TODO tighten up this section --> + +### Lambda expressions + +A `lambda` expression yields a new function value. + +```grammar {.good} +LambdaExpr = 'lambda' [Parameters] ':' Test . + +Parameters = Parameter {',' Parameter} . +Parameter = identifier + | identifier '=' Test + | '*' + | '*' identifier + | '**' identifier + . +``` + +Syntactically, a lambda expression consists of the keyword `lambda`, +followed by a parameter list like that of a `def` statement but +unparenthesized, then a colon `:`, and a single expression, the +_function body_. + +Example: + +```python +def map(f, list): + return [f(x) for x in list] + +map(lambda x: 2*x, range(3)) # [2, 4, 6] +``` + +As with functions created by a `def` statement, a lambda function +captures the syntax of its body, the default values of any optional +parameters, the value of each free variable appearing in its body, and +the global dictionary of the current module. + +The name of a function created by a lambda expression is `"lambda"`. + +The two statements below are essentially equivalent, but the +function created by the `def` statement is named `twice` and the +function created by the lambda expression is named `lambda`. + +```python +def twice(x): + return x * 2 + +twice = lambda x: x * 2 +``` + +## Statements + +```grammar {.good} +Statement = DefStmt | IfStmt | ForStmt | SimpleStmt . +SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . +SmallStmt = ReturnStmt + | BreakStmt | ContinueStmt | PassStmt + | AssignStmt + | ExprStmt + | LoadStmt + . +``` + +### Pass statements + +A `pass` statement does nothing. Use a `pass` statement when the +syntax requires a statement but no behavior is required, such as the +body of a function that does nothing. + +```grammar {.good} +PassStmt = 'pass' . +``` + +Example: + +```python +def noop(): + pass + +def list_to_dict(items): + # Convert list of tuples to dict + m = {} + for k, m[k] in items: + pass + return m +``` + +### Assignments + +An assignment statement has the form `lhs = rhs`. It evaluates the +expression on the right-hand side then assigns its value (or values) to +the variable (or variables) on the left-hand side. + +```grammar {.good} +AssignStmt = Expression '=' Expression . +``` + +The expression on the left-hand side is called a _target_. The +simplest target is the name of a variable, but a target may also have +the form of an index expression, to update the element of a list or +dictionary, or a dot expression, to update the field of an object: + +```python +k = 1 +a[i] = v +m.f = "" +``` + +Compound targets may consist of a comma-separated list of +subtargets, optionally surrounded by parentheses or square brackets, +and targets may be nested arbitarily in this way. +An assignment to a compound target checks that the right-hand value is a +sequence with the same number of elements as the target. +Each element of the sequence is then assigned to the corresponding +element of the target, recursively applying the same logic. + +```python +pi, e = 3.141, 2.718 +(x, y) = f() +[zero, one, two] = range(3) + +[(a, b), (c, d)] = {"a": "b", "c": "d"}.items() +a, b = {"a": 1, "b": 2} +``` + +The same process for assigning a value to a target expression is used +in `for` loops and in comprehensions. + + +### Augmented assignments + +An augmented assignment, which has the form `lhs op= rhs` updates the +variable `lhs` by applying a binary arithmetic operator `op` (one of +`+`, `-`, `*`, `/`, `//`, `%`, `&`, `|`, `^`, `<<`, `>>`) to the previous +value of `lhs` and the value of `rhs`. + +```grammar {.good} +AssignStmt = Expression ('+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression . +``` + +The left-hand side must be a simple target: +a name, an index expression, or a dot expression. + +```python +x -= 1 +x.filename += ".star" +a[index()] *= 2 +``` + +Any subexpressions in the target on the left-hand side are evaluated +exactly once, before the evaluation of `rhs`. +The first two assignments above are thus equivalent to: + +```python +x = x - 1 +x.filename = x.filename + ".star" +``` + +and the third assignment is similar in effect to the following two +statements but does not declare a new temporary variable `i`: + +```python +i = index() +a[i] = a[i] * 2 +``` + +### Function definitions + +A `def` statement creates a named function and assigns it to a variable. + +```grammar {.good} +DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . +``` + +Example: + +```python +def twice(x): + return x * 2 + +str(twice) # "<function twice>" +twice(2) # 4 +twice("two") # "twotwo" +``` + +The function's name is preceded by the `def` keyword and followed by +the parameter list (which is enclosed in parentheses), a colon, and +then an indented block of statements which form the body of the function. + +The parameter list is a comma-separated list whose elements are of +several kinds. First come zero or more required parameters, which are +simple identifiers; all calls must provide an argument value for these parameters. + +The required parameters are followed by zero or more optional +parameters, of the form `name=expression`. The expression specifies +the default value for the parameter for use in calls that do not +provide an argument value for it. + +The required parameters are optionally followed by a single parameter +name preceded by a `*`. This is the called the _varargs_ parameter, +and it accumulates surplus positional arguments specified by a call. +It is conventionally named `*args`. + +The varargs parameter may be followed by zero or more +parameters, again of the forms `name` or `name=expression`, +but these parameters differ from earlier ones in that they are +_keyword-only_: if a call provides their values, it must do so as +keyword arguments, not positional ones. + +```python +def f(a, *, b=2, c): + print(a, b, c) + +f(1) # error: function f missing 1 argument (c) +f(1, 3) # error: function f accepts 1 positional argument (2 given) +f(1, c=3) # "1 2 3" + +def g(a, *args, b=2, c): + print(a, b, c, args) + +g(1, 3) # error: function g missing 1 argument (c) +g(1, 4, c=3) # "1 2 3 (4,)" + +``` + +A non-variadic function may also declare keyword-only parameters, +by using a bare `*` in place of the `*args` parameter. +This form does not declare a parameter but marks the boundary +between the earlier parameters and the keyword-only parameters. +This form must be followed by at least one optional parameter. + +Finally, there may be an optional parameter name preceded by `**`. +This is called the _keyword arguments_ parameter, and accumulates in a +dictionary any surplus `name=value` arguments that do not match a +prior parameter. It is conventionally named `**kwargs`. + +The final parameter may be followed by a trailing comma. + +Here are some example parameter lists: + +```python +def f(): pass +def f(a, b, c): pass +def f(a, b, c=1): pass +def f(a, b, c=1, *args): pass +def f(a, b, c=1, *args, **kwargs): pass +def f(**kwargs): pass +def f(a, b, c=1, *, d=1): pass + +def f( + a, + *args, + **kwargs, +) +``` + +Execution of a `def` statement creates a new function object. The +function object contains: the syntax of the function body; the default +value for each optional parameter; the value of each free variable +referenced within the function body; and the global dictionary of the +current module. + +<!-- this is too implementation-oriented; it's not a spec. --> + + +### Return statements + +A `return` statement ends the execution of a function and returns a +value to the caller of the function. + +```grammar {.good} +ReturnStmt = 'return' [Expression] . +``` + +A return statement may have zero, one, or more +result expressions separated by commas. +With no expressions, the function has the result `None`. +With a single expression, the function's result is the value of that expression. +With multiple expressions, the function's result is a tuple. + +```python +return # returns None +return 1 # returns 1 +return 1, 2 # returns (1, 2) +``` + +### Expression statements + +An expression statement evaluates an expression and discards its result. + +```grammar {.good} +ExprStmt = Expression . +``` + +Any expression may be used as a statement, but an expression statement is +most often used to call a function for its side effects. + +```python +list.append(1) +``` + +### If statements + +An `if` statement evaluates an expression (the _condition_), then, if +the truth value of the condition is `True`, executes a list of +statements. + +```grammar {.good} +IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . +``` + +Example: + +```python +if score >= 100: + print("You win!") + return +``` + +An `if` statement may have an `else` block defining a second list of +statements to be executed if the condition is false. + +```python +if score >= 100: + print("You win!") + return +else: + print("Keep trying...") + continue +``` + +It is common for the `else` block to contain another `if` statement. +To avoid increasing the nesting depth unnecessarily, the `else` and +following `if` may be combined as `elif`: + +```python +if x > 0: + result = +1 +elif x < 0: + result = -1 +else: + result = 0 +``` + +An `if` statement is permitted only within a function definition. +An `if` statement at top level results in a static error. + +<b>Implementation note:</b> +The Go implementation of Starlark permits `if`-statements to appear at top level +if the `-globalreassign` flag is enabled. + + +### While loops + +A `while` loop evaluates an expression (the _condition_) and if the truth +value of the condition is `True`, it executes a list of statement and repeats +the process until the truth value of the condition becomes `False`. + +```grammar {.good} +WhileStmt = 'while' Test ':' Suite . +``` + +Example: + +```python +while n > 0: + r = r + n + n = n - 1 +``` + +A `while` statement is permitted only within a function definition. +A `while` statement at top level results in a static error. + +<b>Implementation note:</b> +The Go implementation of Starlark permits `while` loops only if the `-recursion` flag is enabled. +A `while` statement is permitted at top level if the `-globalreassign` flag is enabled. + + +### For loops + +A `for` loop evaluates its operand, which must be an iterable value. +Then, for each element of the iterable's sequence, the loop assigns +the successive element values to one or more variables and executes a +list of statements, the _loop body_. + +```grammar {.good} +ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . +``` + +Example: + +```python +for x in range(10): + print(10) +``` + +The assignment of each value to the loop variables follows the same +rules as an ordinary assignment. In this example, two-element lists +are repeatedly assigned to the pair of variables (a, i): + +```python +for a, i in [["a", 1], ["b", 2], ["c", 3]]: + print(a, i) # prints "a 1", "b 2", "c 3" +``` + +Because Starlark loops always iterate over a finite sequence, they are +guaranteed to terminate, unlike loops in most languages which can +execute an arbitrary and perhaps unbounded number of iterations. + +Within the body of a `for` loop, `break` and `continue` statements may +be used to stop the execution of the loop or advance to the next +iteration. + +In Starlark, a `for` loop is permitted only within a function definition. +A `for` loop at top level results in a static error. + +<b>Implementation note:</b> +The Go implementation of Starlark permits loops to appear at top level +if the `-globalreassign` flag is enabled. + + +### Break and Continue + +The `break` and `continue` statements terminate the current iteration +of a `for` loop. Whereas the `continue` statement resumes the loop at +the next iteration, a `break` statement terminates the entire loop. + +```grammar {.good} +BreakStmt = 'break' . +ContinueStmt = 'continue' . +``` + +Example: + +```python +for x in range(10): + if x%2 == 1: + continue # skip odd numbers + if x > 7: + break # stop at 8 + print(x) # prints "0", "2", "4", "6" +``` + +Both statements affect only the innermost lexically enclosing loop. +It is a static error to use a `break` or `continue` statement outside a +loop. + + +### Load statements + +The `load` statement loads another Starlark module, extracts one or +more values from it, and binds them to names in the current module. + +<!-- +The awkwardness of load statements is a consequence of staying a +strict subset of Python syntax, which allows reuse of existing tools +such as editor support. Python import statements are inadequate for +Starlark because they don't allow arbitrary file names for module names. +--> + +Syntactically, a load statement looks like a function call `load(...)`. + +```grammar {.good} +LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' . +``` + +A load statement requires at least two "arguments". +The first must be a literal string; it identifies the module to load. +Its interpretation is determined by the application into which the +Starlark interpreter is embedded, and is not specified here. + +During execution, the application determines what action to take for a +load statement. +A typical implementation locates and executes a Starlark file, +populating a cache of files executed so far to avoid duplicate work, +to obtain a module, which is a mapping from global names to values. + +The remaining arguments are a mixture of literal strings, such as +`"x"`, or named literal strings, such as `y="x"`. + +The literal string (`"x"`), which must denote a valid identifier not +starting with `_`, specifies the name to extract from the loaded +module. In effect, names starting with `_` are not exported. +The name (`y`) specifies the local name; +if no name is given, the local name matches the quoted name. + +```python +load("module.star", "x", "y", "z") # assigns x, y, and z +load("module.star", "x", y2="y", "z") # assigns x, y2, and z +``` + +A load statement may not be nested inside any other statement. + + +## Module execution + +Each Starlark file defines a _module_, which is a mapping from the +names of global variables to their values. +When a Starlark file is executed, whether directly by the application +or indirectly through a `load` statement, a new Starlark thread is +created, and this thread executes all the top-level statements in the +file. +Because if-statements and for-loops cannot appear outside of a function, +control flows from top to bottom. + +If execution reaches the end of the file, module initialization is +successful. +At that point, the value of each of the module's global variables is +frozen, rendering subsequent mutation impossible. +The module is then ready for use by another Starlark thread, such as +one executing a load statement. +Such threads may access values or call functions defined in the loaded +module. + +A Starlark thread may carry state on behalf of the application into +which it is embedded, and application-defined functions may behave +differently depending on this thread state. +Because module initialization always occurs in a new thread, thread +state is never carried from a higher-level module into a lower-level +one. +The initialization behavior of a module is thus independent of +whichever module triggered its initialization. + +If a Starlark thread encounters an error, execution stops and the error +is reported to the application, along with a backtrace showing the +stack of active function calls at the time of the error. +If an error occurs during initialization of a Starlark module, any +active `load` statements waiting for initialization of the module also +fail. + +Starlark provides no mechanism by which errors can be handled within +the language. + + +## Built-in constants and functions + +The outermost block of the Starlark environment is known as the "predeclared" block. +It defines a number of fundamental values and functions needed by all Starlark programs, +such as `None`, `True`, `False`, and `len`, and possibly additional +application-specific names. + +These names are not reserved words so Starlark programs are free to +redefine them in a smaller block such as a function body or even at +the top level of a module. However, doing so may be confusing to the +reader. Nonetheless, this rule permits names to be added to the +predeclared block in later versions of the language (or +application-specific dialect) without breaking existing programs. + + +### None + +`None` is the distinguished value of the type `NoneType`. + +### True and False + +`True` and `False` are the two values of type `bool`. + +### any + +`any(x)` returns `True` if any element of the iterable sequence x has a truth value of true. +If the iterable is empty, it returns `False`. + +### all + +`all(x)` returns `False` if any element of the iterable sequence x has a truth value of false. +If the iterable is empty, it returns `True`. + +### bool + +`bool(x)` interprets `x` as a Boolean value---`True` or `False`. +With no argument, `bool()` returns `False`. + + +### chr + +`chr(i)` returns a string that encodes the single Unicode code point +whose value is specified by the integer `i`. `chr` fails unless 0 ≤ +`i` ≤ 0x10FFFF. + +Example: + +```python +chr(65) # "A", +chr(1049) # "Й", CYRILLIC CAPITAL LETTER SHORT I +chr(0x1F63F) # "😿", CRYING CAT FACE +``` + +See also: `ord`. + +<b>Implementation note:</b> `chr` is not provided by the Java implementation. + +### dict + +`dict` creates a dictionary. It accepts up to one positional +argument, which is interpreted as an iterable of two-element +sequences (pairs), each specifying a key/value pair in +the resulting dictionary. + +`dict` also accepts any number of keyword arguments, each of which +specifies a key/value pair in the resulting dictionary; +each keyword is treated as a string. + +```python +dict() # {}, empty dictionary +dict([(1, 2), (3, 4)]) # {1: 2, 3: 4} +dict([(1, 2), ["a", "b"]]) # {1: 2, "a": "b"} +dict(one=1, two=2) # {"one": 1, "two", 1} +dict([(1, 2)], x=3) # {1: 2, "x": 3} +``` + +With no arguments, `dict()` returns a new empty dictionary. + +`dict(x)` where x is a dictionary returns a new copy of x. + +### dir + +`dir(x)` returns a new sorted list of the names of the attributes (fields and methods) of its operand. +The attributes of a value `x` are the names `f` such that `x.f` is a valid expression. + +For example, + +```python +dir("hello") # ['capitalize', 'count', ...], the methods of a string +``` + +Several types known to the interpreter, such as list, string, and dict, have methods, but none have fields. +However, an application may define types with fields that may be read or set by statements such as these: + +```text +y = x.f +x.f = y +``` + +### enumerate + +`enumerate(x)` returns a list of (index, value) pairs, each containing +successive values of the iterable sequence xand the index of the value +within the sequence. + +The optional second parameter, `start`, specifies an integer value to +add to each index. + +```python +enumerate(["zero", "one", "two"]) # [(0, "zero"), (1, "one"), (2, "two")] +enumerate(["one", "two"], 1) # [(1, "one"), (2, "two")] +``` + +### fail + +The `fail(*args, sep=" ")` function causes execution to fail +with the specified error message. +Like `print`, arguments are formatted as if by `str(x)` and +separated by a space, unless an alternative separator is +specified by a `sep` named argument. + +```python +fail("oops") # "fail: oops" +fail("oops", 1, False, sep='/') # "fail: oops/1/False" +``` + +### float + +`float(x)` interprets its argument as a floating-point number. + +If x is a `float`, the result is x. +if x is an `int`, the result is the nearest floating point value to x. +If x is a string, the string is interpreted as a floating-point literal. +With no arguments, `float()` returns `0.0`. + + +### getattr + +`getattr(x, name)` returns the value of the attribute (field or method) of x named `name`. +It is a dynamic error if x has no such attribute. + +`getattr(x, "f")` is equivalent to `x.f`. + +```python +getattr("banana", "split")("a") # ["b", "n", "n", ""], equivalent to "banana".split("a") +``` + +The three-argument form `getattr(x, name, default)` returns the +provided `default` value instead of failing. + +### hasattr + +`hasattr(x, name)` reports whether x has an attribute (field or method) named `name`. + +### hash + +`hash(x)` returns an integer hash of a string x +such that two equal strings have the same hash. +In other words `x == y` implies `hash(x) == hash(y)`. + +In the interests of reproducibility of Starlark program behavior over time and +across implementations, the specific hash function is the same as that implemented by +[java.lang.String.hashCode](https://docs.oracle.com/javase/7/docs/api/java/lang/String.html#hashCode), +a simple polynomial accumulator over the UTF-16 transcoding of the string: + ``` +s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] +``` + +`hash` fails if given a non-string operand, +even if the value is hashable and thus suitable as the key of dictionary. + +### int + +`int(x[, base])` interprets its argument as an integer. + +If x is an `int`, the result is x. +If x is a `float`, the result is the integer value nearest to x, +truncating towards zero; it is an error if x is not finite (`NaN`, +`+Inf`, `-Inf`). +If x is a `bool`, the result is 0 for `False` or 1 for `True`. + +If x is a string, it is interpreted as a sequence of digits in the +specified base, decimal by default. +If `base` is zero, x is interpreted like an integer literal, the base +being inferred from an optional base prefix such as `0b`, `0o`, or +`0x` preceding the first digit. +When the `base` is provided explictly, a matching base prefix is +also permitted, and has no effect. +Irrespective of base, the string may start with an optional `+` or `-` +sign indicating the sign of the result. + +```python +int("11") # 11 +int("11", 0) # 11 +int("11", 10) # 11 +int("11", 2) # 3 +int("11", 8) # 9 +int("11", 16) # 17 + +int("0x11", 0) # 17 +int("0x11", 16) # 17 +int("0b1", 16) # 177 (0xb1) +int("0b1", 2) # 1 +int("0b1", 0) # 1 + +int("0x11") # error: invalid literal with base 10 +``` + +### len + +`len(x)` returns the number of elements in its argument. + +It is a dynamic error if its argument is not a sequence. + +### list + +`list` constructs a list. + +`list(x)` returns a new list containing the elements of the +iterable sequence x. + +With no argument, `list()` returns a new empty list. + +### max + +`max(x)` returns the greatest element in the iterable sequence x. + +It is an error if any element does not support ordered comparison, +or if the sequence is empty. + +The optional named parameter `key` specifies a function to be applied +to each element prior to comparison. + +```python +max([3, 1, 4, 1, 5, 9]) # 9 +max("two", "three", "four") # "two", the lexicographically greatest +max("two", "three", "four", key=len) # "three", the longest +``` + +### min + +`min(x)` returns the least element in the iterable sequence x. + +It is an error if any element does not support ordered comparison, +or if the sequence is empty. + +```python +min([3, 1, 4, 1, 5, 9]) # 1 +min("two", "three", "four") # "four", the lexicographically least +min("two", "three", "four", key=len) # "two", the shortest +``` + + +### ord + +`ord(s)` returns the integer value of the sole Unicode code point encoded by the string `s`. + +If `s` does not encode exactly one Unicode code point, `ord` fails. +Each invalid code within the string is treated as if it encodes the +Unicode replacement character, U+FFFD. + +Example: + +```python +ord("A") # 65 +ord("Й") # 1049 +ord("😿") # 0x1F63F +ord("Й"[1:]) # 0xFFFD (Unicode replacement character) +``` + +See also: `chr`. + +<b>Implementation note:</b> `ord` is not provided by the Java implementation. + +### print + +`print(*args, sep=" ")` prints its arguments, followed by a newline. +Arguments are formatted as if by `str(x)` and separated with a space, +unless an alternative separator is specified by a `sep` named argument. + +Example: + +```python +print(1, "hi") # "1 hi\n" +print("hello", "world") # "hello world\n" +print("hello", "world", sep=", ") # "hello, world\n" +``` + +Typically the formatted string is printed to the standard error file, +but the exact behavior is a property of the Starlark thread and is +determined by the host application. + +### range + +`range` returns an immutable sequence of integers defined by the specified interval and stride. + +```python +range(stop) # equivalent to range(0, stop) +range(start, stop) # equivalent to range(start, stop, 1) +range(start, stop, step) +``` + +`range` requires between one and three integer arguments. +With one argument, `range(stop)` returns the ascending sequence of non-negative integers less than `stop`. +With two arguments, `range(start, stop)` returns only integers not less than `start`. + +With three arguments, `range(start, stop, step)` returns integers +formed by successively adding `step` to `start` until the value meets or passes `stop`. +A call to `range` fails if the value of `step` is zero. + +A call to `range` does not materialize the entire sequence, but +returns a fixed-size value of type `"range"` that represents the +parameters that define the sequence. +The `range` value is iterable and may be indexed efficiently. + +```python +list(range(10)) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +list(range(3, 10)) # [3, 4, 5, 6, 7, 8, 9] +list(range(3, 10, 2)) # [3, 5, 7, 9] +list(range(10, 3, -2)) # [10, 8, 6, 4] +``` + +The `len` function applied to a `range` value returns its length. +The truth value of a `range` value is `True` if its length is non-zero. + +Range values are comparable: two `range` values compare equal if they +denote the same sequence of integers, even if they were created using +different parameters. + +Range values are not hashable. <!-- should they be? --> + +The `str` function applied to a `range` value yields a string of the +form `range(10)`, `range(1, 10)`, or `range(1, 10, 2)`. + +The `x in y` operator, where `y` is a range, reports whether `x` is equal to +some member of the sequence `y`; the operation fails unless `x` is a +number. + +### repr + +`repr(x)` formats its argument as a string. + +All strings in the result are double-quoted. + +```python +repr(1) # '1' +repr("x") # '"x"' +repr([1, "x"]) # '[1, "x"]' +``` + +### reversed + +`reversed(x)` returns a new list containing the elements of the iterable sequence x in reverse order. + +```python +reversed(range(5)) # [4, 3, 2, 1, 0] +reversed("stressed".codepoints()) # ["d", "e", "s", "s", "e", "r", "t", "s"] +reversed({"one": 1, "two": 2}.keys()) # ["two", "one"] +``` + +### set + +`set(x)` returns a new set containing the elements of the iterable x. +With no argument, `set()` returns a new empty set. + +```python +set([3, 1, 4, 1, 5, 9]) # set([3, 1, 4, 5, 9]) +``` + +<b>Implementation note:</b> +Sets are an optional feature of the Go implementation of Starlark, +enabled by the `-set` flag. + + +### sorted + +`sorted(x)` returns a new list containing the elements of the iterable sequence x, +in sorted order. The sort algorithm is stable. + +The optional named parameter `reverse`, if true, causes `sorted` to +return results in reverse sorted order. + +The optional named parameter `key` specifies a function of one +argument to apply to obtain the value's sort key. +The default behavior is the identity function. + +```python +sorted(set("harbors".codepoints())) # ['a', 'b', 'h', 'o', 'r', 's'] +sorted([3, 1, 4, 1, 5, 9]) # [1, 1, 3, 4, 5, 9] +sorted([3, 1, 4, 1, 5, 9], reverse=True) # [9, 5, 4, 3, 1, 1] + +sorted(["two", "three", "four"], key=len) # ["two", "four", "three"], shortest to longest +sorted(["two", "three", "four"], key=len, reverse=True) # ["three", "four", "two"], longest to shortest +``` + + +### str + +`str(x)` formats its argument as a string. + +If x is a string, the result is x (without quotation). +All other strings, such as elements of a list of strings, are double-quoted. + +```python +str(1) # '1' +str("x") # 'x' +str([1, "x"]) # '[1, "x"]' +``` + +### tuple + +`tuple(x)` returns a tuple containing the elements of the iterable x. + +With no arguments, `tuple()` returns the empty tuple. + +### type + +type(x) returns a string describing the type of its operand. + +```python +type(None) # "NoneType" +type(0) # "int" +type(0.0) # "float" +``` + +### zip + +`zip()` returns a new list of n-tuples formed from corresponding +elements of each of the n iterable sequences provided as arguments to +`zip`. That is, the first tuple contains the first element of each of +the sequences, the second element contains the second element of each +of the sequences, and so on. The result list is only as long as the +shortest of the input sequences. + +```python +zip() # [] +zip(range(5)) # [(0,), (1,), (2,), (3,), (4,)] +zip(range(5), "abc") # [(0, "a"), (1, "b"), (2, "c")] +``` + +## Built-in methods + +This section lists the methods of built-in types. Methods are selected +using [dot expressions](#dot-expressions). +For example, strings have a `count` method that counts +occurrences of a substring; `"banana".count("a")` yields `3`. + +As with built-in functions, built-in methods accept only positional +arguments except where noted. +The parameter names serve merely as documentation. + + +<a id='dict·clear'></a> +### dict·clear + +`D.clear()` removes all the entries of dictionary D and returns `None`. +It fails if the dictionary is frozen or if there are active iterators. + +```python +x = {"one": 1, "two": 2} +x.clear() # None +print(x) # {} +``` + +<a id='dict·get'></a> +### dict·get + +`D.get(key[, default])` returns the dictionary value corresponding to the given key. +If the dictionary contains no such value, `get` returns `None`, or the +value of the optional `default` parameter if present. + +`get` fails if `key` is unhashable, or the dictionary is frozen or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.get("one") # 1 +x.get("three") # None +x.get("three", 0) # 0 +``` + +<a id='dict·items'></a> +### dict·items + +`D.items()` returns a new list of key/value pairs, one per element in +dictionary D, in the same order as they would be returned by a `for` loop. + +```python +x = {"one": 1, "two": 2} +x.items() # [("one", 1), ("two", 2)] +``` + +<a id='dict·keys'></a> +### dict·keys + +`D.keys()` returns a new list containing the keys of dictionary D, in the +same order as they would be returned by a `for` loop. + +```python +x = {"one": 1, "two": 2} +x.keys() # ["one", "two"] +``` + +<a id='dict·pop'></a> +### dict·pop + +`D.pop(key[, default])` returns the value corresponding to the specified +key, and removes it from the dictionary. If the dictionary contains no +such value, and the optional `default` parameter is present, `pop` +returns that value; otherwise, it fails. + +`pop` fails if `key` is unhashable, or the dictionary is frozen or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.pop("one") # 1 +x # {"two": 2} +x.pop("three", 0) # 0 +x.pop("four") # error: missing key +``` + +<a id='dict·popitem'></a> +### dict·popitem + +`D.popitem()` returns the first key/value pair, removing it from the dictionary. + +`popitem` fails if the dictionary is empty, frozen, or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.popitem() # ("one", 1) +x.popitem() # ("two", 2) +x.popitem() # error: empty dict +``` + +<a id='dict·setdefault'></a> +### dict·setdefault + +`D.setdefault(key[, default])` returns the dictionary value corresponding to the given key. +If the dictionary contains no such value, `setdefault`, like `get`, +returns `None` or the value of the optional `default` parameter if +present; `setdefault` additionally inserts the new key/value entry into the dictionary. + +`setdefault` fails if the key is unhashable, or if the dictionary is frozen or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.setdefault("one") # 1 +x.setdefault("three", 0) # 0 +x # {"one": 1, "two": 2, "three": 0} +x.setdefault("four") # None +x # {"one": 1, "two": 2, "three": None} +``` + +<a id='dict·update'></a> +### dict·update + +`D.update([pairs][, name=value[, ...])` makes a sequence of key/value +insertions into dictionary D, then returns `None.` + +If the positional argument `pairs` is present, it must be `None`, +another `dict`, or some other iterable. +If it is another `dict`, then its key/value pairs are inserted into D. +If it is an iterable, it must provide a sequence of pairs (or other iterables of length 2), +each of which is treated as a key/value pair to be inserted into D. + +For each `name=value` argument present, the name is converted to a +string and used as the key for an insertion into D, with its corresponding +value being `value`. + +`update` fails if the dictionary is frozen or has active iterators. + +```python +x = {} +x.update([("a", 1), ("b", 2)], c=3) +x.update({"d": 4}) +x.update(e=5) +x # {"a": 1, "b": "2", "c": 3, "d": 4, "e": 5} +``` + +<a id='dict·values'></a> +### dict·values + +`D.values()` returns a new list containing the dictionary's values, in the +same order as they would be returned by a `for` loop over the +dictionary. + +```python +x = {"one": 1, "two": 2} +x.values() # [1, 2] +``` + +<a id='list·append'></a> +### list·append + +`L.append(x)` appends `x` to the list L, and returns `None`. + +`append` fails if the list is frozen or has active iterators. + +```python +x = [] +x.append(1) # None +x.append(2) # None +x.append(3) # None +x # [1, 2, 3] +``` + +<a id='list·clear'></a> +### list·clear + +`L.clear()` removes all the elements of the list L and returns `None`. +It fails if the list is frozen or if there are active iterators. + +```python +x = [1, 2, 3] +x.clear() # None +x # [] +``` + +<a id='list·extend'></a> +### list·extend + +`L.extend(x)` appends the elements of `x`, which must be iterable, to +the list L, and returns `None`. + +`extend` fails if `x` is not iterable, or if the list L is frozen or has active iterators. + +```python +x = [] +x.extend([1, 2, 3]) # None +x.extend(["foo"]) # None +x # [1, 2, 3, "foo"] +``` + +<a id='list·index'></a> +### list·index + +`L.index(x[, start[, end]])` finds `x` within the list L and returns its index. + +The optional `start` and `end` parameters restrict the portion of +list L that is inspected. If provided and not `None`, they must be list +indices of type `int`. If an index is negative, `len(L)` is effectively +added to it, then if the index is outside the range `[0:len(L)]`, the +nearest value within that range is used; see [Indexing](#indexing). + +`index` fails if `x` is not found in L, or if `start` or `end` +is not a valid index (`int` or `None`). + +```python +x = list("banana".codepoints()) +x.index("a") # 1 (bAnana) +x.index("a", 2) # 3 (banAna) +x.index("a", -2) # 5 (bananA) +``` + +<a id='list·insert'></a> +### list·insert + +`L.insert(i, x)` inserts the value `x` in the list L at index `i`, moving +higher-numbered elements along by one. It returns `None`. + +As usual, the index `i` must be an `int`. If its value is negative, +the length of the list is added, then its value is clamped to the +nearest value in the range `[0:len(L)]` to yield the effective index. + +`insert` fails if the list is frozen or has active iterators. + +```python +x = ["b", "c", "e"] +x.insert(0, "a") # None +x.insert(-1, "d") # None +x # ["a", "b", "c", "d", "e"] +``` + +<a id='list·pop'></a> +### list·pop + +`L.pop([index])` removes and returns the last element of the list L, or, +if the optional index is provided, at that index. + +`pop` fails if the index is not valid for `L[i]`, +or if the list is frozen or has active iterators. + +```python +x = [1, 2, 3, 4, 5] +x.pop() # 5 +x # [1, 2, 3, 4] +x.pop(-2) # 3 +x # [1, 2, 4] +x.pop(-3) # 1 +x # [2, 4] +x.pop() # 4 +x # [2] +``` + +<a id='list·remove'></a> +### list·remove + +`L.remove(x)` removes the first occurrence of the value `x` from the list L, and returns `None`. + +`remove` fails if the list does not contain `x`, is frozen, or has active iterators. + +```python +x = [1, 2, 3, 2] +x.remove(2) # None (x == [1, 3, 2]) +x.remove(2) # None (x == [1, 3]) +x.remove(2) # error: element not found +``` + +<a id='set·union'></a> +### set·union + +`S.union(iterable)` returns a new set into which have been inserted +all the elements of set S and all the elements of the argument, which +must be iterable. + +`union` fails if any element of the iterable is not hashable. + +```python +x = set([1, 2]) +y = set([2, 3]) +x.union(y) # set([1, 2, 3]) +``` + +<a id='string·elem_ords'></a> +### string·elem_ords + +`S.elem_ords()` returns an iterable value containing the +sequence of numeric bytes values in the string S. + +To materialize the entire sequence of bytes, apply `list(...)` to the result. + +Example: + +```python +list("Hello, 世界".elem_ords()) # [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140] +``` + +See also: `string·elems`. + +<b>Implementation note:</b> `elem_ords` is not provided by the Java implementation. + +<a id='string·capitalize'></a> +### string·capitalize + +`S.capitalize()` returns a copy of string S with its first code point +changed to its title case and all subsequent letters changed to their +lower case. + +```python +"hello, world!".capitalize() # "Hello, world!" +"hElLo, wOrLd!".capitalize() # "Hello, world!" +"¿Por qué?".capitalize() # "¿por qué?" +``` + +<a id='string·codepoint_ords'></a> +### string·codepoint_ords + +`S.codepoint_ords()` returns an iterable value containing the +sequence of integer Unicode code points encoded by the string S. +Each invalid code within the string is treated as if it encodes the +Unicode replacement character, U+FFFD. + +By returning an iterable, not a list, the cost of decoding the string +is deferred until actually needed; apply `list(...)` to the result to +materialize the entire sequence. + +Example: + +```python +list("Hello, 世界".codepoint_ords()) # [72, 101, 108, 108, 111, 44, 32, 19990, 30028] + +for cp in "Hello, 世界".codepoint_ords(): + print(chr(cp)) # prints 'H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界' +``` + +See also: `string·codepoints`. + +<b>Implementation note:</b> `codepoint_ords` is not provided by the Java implementation. + +<a id='string·count'></a> +### string·count + +`S.count(sub[, start[, end]])` returns the number of occcurences of +`sub` within the string S, or, if the optional substring indices +`start` and `end` are provided, within the designated substring of S. +They are interpreted according to Starlark's [indexing conventions](#indexing). + +```python +"hello, world!".count("o") # 2 +"hello, world!".count("o", 7, 12) # 1 (in "world") +``` + +<a id='string·endswith'></a> +### string·endswith + +`S.endswith(suffix[, start[, end]])` reports whether the string +`S[start:end]` has the specified suffix. + +```python +"filename.star".endswith(".star") # True +``` + +The `suffix` argument may be a tuple of strings, in which case the +function reports whether any one of them is a suffix. + +```python +'foo.cc'.endswith(('.cc', '.h')) # True +``` + + +<a id='string·find'></a> +### string·find + +`S.find(sub[, start[, end]])` returns the index of the first +occurrence of the substring `sub` within S. + +If either or both of `start` or `end` are specified, +they specify a subrange of S to which the search should be restricted. +They are interpreted according to Starlark's [indexing conventions](#indexing). + +If no occurrence is found, `found` returns -1. + +```python +"bonbon".find("on") # 1 +"bonbon".find("on", 2) # 4 +"bonbon".find("on", 2, 5) # -1 +``` + +<a id='string·format'></a> +### string·format + +`S.format(*args, **kwargs)` returns a version of the format string S +in which bracketed portions `{...}` are replaced +by arguments from `args` and `kwargs`. + +Within the format string, a pair of braces `{{` or `}}` is treated as +a literal open or close brace. +Each unpaired open brace must be matched by a close brace `}`. +The optional text between corresponding open and close braces +specifies which argument to use and how to format it, and consists of +three components, all optional: +a field name, a conversion preceded by '`!`', and a format specifier +preceded by '`:`'. + +```text +{field} +{field:spec} +{field!conv} +{field!conv:spec} +``` + +The *field name* may be either a decimal number or a keyword. +A number is interpreted as the index of a positional argument; +a keyword specifies the value of a keyword argument. +If all the numeric field names form the sequence 0, 1, 2, and so on, +they may be omitted and those values will be implied; however, +the explicit and implicit forms may not be mixed. + +The *conversion* specifies how to convert an argument value `x` to a +string. It may be either `!r`, which converts the value using +`repr(x)`, or `!s`, which converts the value using `str(x)` and is +the default. + +The *format specifier*, after a colon, specifies field width, +alignment, padding, and numeric precision. +Currently it must be empty, but it is reserved for future use. + +```python +"a{x}b{y}c{}".format(1, x=2, y=3) # "a2b3c1" +"a{}b{}c".format(1, 2) # "a1b2c" +"({1}, {0})".format("zero", "one") # "(one, zero)" +"Is {0!r} {0!s}?".format('heterological') # 'is "heterological" heterological?' +``` + +<a id='string·index'></a> +### string·index + +`S.index(sub[, start[, end]])` returns the index of the first +occurrence of the substring `sub` within S, like `S.find`, except +that if the substring is not found, the operation fails. + +```python +"bonbon".index("on") # 1 +"bonbon".index("on", 2) # 4 +"bonbon".index("on", 2, 5) # error: substring not found (in "nbo") +``` + +<a id='string·isalnum'></a> +### string·isalnum + +`S.isalnum()` reports whether the string S is non-empty and consists only +Unicode letters and digits. + +```python +"base64".isalnum() # True +"Catch-22".isalnum() # False +``` + +<a id='string·isalpha'></a> +### string·isalpha + +`S.isalpha()` reports whether the string S is non-empty and consists only of Unicode letters. + +```python +"ABC".isalpha() # True +"Catch-22".isalpha() # False +"".isalpha() # False +``` + +<a id='string·isdigit'></a> +### string·isdigit + +`S.isdigit()` reports whether the string S is non-empty and consists only of Unicode digits. + +```python +"123".isdigit() # True +"Catch-22".isdigit() # False +"".isdigit() # False +``` + +<a id='string·islower'></a> +### string·islower + +`S.islower()` reports whether the string S contains at least one cased Unicode +letter, and all such letters are lowercase. + +```python +"hello, world".islower() # True +"Catch-22".islower() # False +"123".islower() # False +``` + +<a id='string·isspace'></a> +### string·isspace + +`S.isspace()` reports whether the string S is non-empty and consists only of Unicode spaces. + +```python +" ".isspace() # True +"\r\t\n".isspace() # True +"".isspace() # False +``` + +<a id='string·istitle'></a> +### string·istitle + +`S.istitle()` reports whether the string S contains at least one cased Unicode +letter, and all such letters that begin a word are in title case. + +```python +"Hello, World!".istitle() # True +"Catch-22".istitle() # True +"HAL-9000".istitle() # False +"Dženan".istitle() # True +"DŽenan".istitle() # False ("DŽ" is a single Unicode letter) +"123".istitle() # False +``` + +<a id='string·isupper'></a> +### string·isupper + +`S.isupper()` reports whether the string S contains at least one cased Unicode +letter, and all such letters are uppercase. + +```python +"HAL-9000".isupper() # True +"Catch-22".isupper() # False +"123".isupper() # False +``` + +<a id='string·join'></a> +### string·join + +`S.join(iterable)` returns the string formed by concatenating each +element of its argument, with a copy of the string S between +successive elements. The argument must be an iterable whose elements +are strings. + +```python +", ".join(["one", "two", "three"]) # "one, two, three" +"a".join("ctmrn".codepoints()) # "catamaran" +``` + +<a id='string·lower'></a> +### string·lower + +`S.lower()` returns a copy of the string S with letters converted to lowercase. + +```python +"Hello, World!".lower() # "hello, world!" +``` + +<a id='string·lstrip'></a> +### string·lstrip + +`S.lstrip()` returns a copy of the string S with leading whitespace removed. + +Like `strip`, it accepts an optional string parameter that specifies an +alternative set of Unicode code points to remove. + +```python +" hello ".lstrip() # "hello " +" hello ".lstrip("h o") # "ello " +``` + +<a id='string·partition'></a> +### string·partition + +`S.partition(x)` splits string S into three parts and returns them as +a tuple: the portion before the first occurrence of string `x`, `x` itself, +and the portion following it. +If S does not contain `x`, `partition` returns `(S, "", "")`. + +`partition` fails if `x` is not a string, or is the empty string. + +```python +"one/two/three".partition("/") # ("one", "/", "two/three") +``` + +<a id='string·replace'></a> +### string·replace + +`S.replace(old, new[, count])` returns a copy of string S with all +occurrences of substring `old` replaced by `new`. If the optional +argument `count`, which must be an `int`, is non-negative, it +specifies a maximum number of occurrences to replace. + +```python +"banana".replace("a", "o") # "bonono" +"banana".replace("a", "o", 2) # "bonona" +``` + +<a id='string·rfind'></a> +### string·rfind + +`S.rfind(sub[, start[, end]])` returns the index of the substring `sub` within +S, like `S.find`, except that `rfind` returns the index of the substring's +_last_ occurrence. + +```python +"bonbon".rfind("on") # 4 +"bonbon".rfind("on", None, 5) # 1 +"bonbon".rfind("on", 2, 5) # -1 +``` + +<a id='string·rindex'></a> +### string·rindex + +`S.rindex(sub[, start[, end]])` returns the index of the substring `sub` within +S, like `S.index`, except that `rindex` returns the index of the substring's +_last_ occurrence. + +```python +"bonbon".rindex("on") # 4 +"bonbon".rindex("on", None, 5) # 1 (in "bonbo") +"bonbon".rindex("on", 2, 5) # error: substring not found (in "nbo") +``` + +<a id='string·rpartition'></a> +### string·rpartition + +`S.rpartition(x)` is like `partition`, but splits `S` at the last occurrence of `x`. + +```python +"one/two/three".partition("/") # ("one/two", "/", "three") +``` + +<a id='string·rsplit'></a> +### string·rsplit + +`S.rsplit([sep[, maxsplit]])` splits a string into substrings like `S.split`, +except that when a maximum number of splits is specified, `rsplit` chooses the +rightmost splits. + +```python +"banana".rsplit("n") # ["ba", "a", "a"] +"banana".rsplit("n", 1) # ["bana", "a"] +"one two three".rsplit(None, 1) # ["one two", "three"] +"".rsplit("n") # [""] +``` + +<a id='string·rstrip'></a> +### string·rstrip + +`S.rstrip()` returns a copy of the string S with trailing whitespace removed. + +Like `strip`, it accepts an optional string parameter that specifies an +alternative set of Unicode code points to remove. + +```python +" hello ".rstrip() # " hello" +" hello ".rstrip("h o") # " hell" +``` + +<a id='string·split'></a> +### string·split + +`S.split([sep [, maxsplit]])` returns the list of substrings of S, +splitting at occurrences of the delimiter string `sep`. + +Consecutive occurrences of `sep` are considered to delimit empty +strings, so `'food'.split('o')` returns `['f', '', 'd']`. +Splitting an empty string with a specified separator returns `['']`. +If `sep` is the empty string, `split` fails. + +If `sep` is not specified or is `None`, `split` uses a different +algorithm: it removes all leading spaces from S +(or trailing spaces in the case of `rsplit`), +then splits the string around each consecutive non-empty sequence of +Unicode white space characters. +If S consists only of white space, `S.split()` returns the empty list. + +If `maxsplit` is given and non-negative, it specifies a maximum number of splits. + +```python +"one two three".split() # ["one", "two", "three"] +"one two three".split(" ") # ["one", "two", "", "three"] +"one two three".split(None, 1) # ["one", "two three"] +"banana".split("n") # ["ba", "a", "a"] +"banana".split("n", 1) # ["ba", "ana"] +"".split("n") # [""] +``` + +<a id='string·elems'></a> +### string·elems + +`S.elems()` returns an iterable value containing successive +1-byte substrings of S. +To materialize the entire sequence, apply `list(...)` to the result. + +Example: + +```python +list('Hello, 世界'.elems()) # ["H", "e", "l", "l", "o", ",", " ", "\xe4", "\xb8", "\x96", "\xe7", "\x95", "\x8c"] +``` + +See also: `string·elem_ords`. + + +<a id='string·codepoints'></a> +### string·codepoints + +`S.codepoints()` returns an iterable value containing the sequence of +substrings of S that each encode a single Unicode code point. +Each invalid code within the string is treated as if it encodes the +Unicode replacement character, U+FFFD. + +By returning an iterable, not a list, the cost of decoding the string +is deferred until actually needed; apply `list(...)` to the result to +materialize the entire sequence. + +Example: + +```python +list('Hello, 世界'.codepoints()) # ['H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界'] + +for cp in 'Hello, 世界'.codepoints(): + print(cp) # prints 'H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界' +``` + +See also: `string·codepoint_ords`. + +<b>Implementation note:</b> `codepoints` is not provided by the Java implementation. + +<a id='string·splitlines'></a> +### string·splitlines + +`S.splitlines([keepends])` returns a list whose elements are the +successive lines of S, that is, the strings formed by splitting S at +line terminators (currently assumed to be a single newline, `\n`, +regardless of platform). + +The optional argument, `keepends`, is interpreted as a Boolean. +If true, line terminators are preserved in the result, though +the final element does not necessarily end with a line terminator. + +As a special case, if S is the empty string, +`splitlines` returns the empty list. + +```python +"one\n\ntwo".splitlines() # ["one", "", "two"] +"one\n\ntwo".splitlines(True) # ["one\n", "\n", "two"] +"".splitlines() # [] -- a special case +``` + +<a id='string·startswith'></a> +### string·startswith + +`S.startswith(prefix[, start[, end]])` reports whether the string +`S[start:end]` has the specified prefix. + +```python +"filename.star".startswith("filename") # True +``` + +The `prefix` argument may be a tuple of strings, in which case the +function reports whether any one of them is a prefix. + +```python +'abc'.startswith(('a', 'A')) # True +'ABC'.startswith(('a', 'A')) # True +'def'.startswith(('a', 'A')) # False +``` + +<a id='string·strip'></a> +### string·strip + +`S.strip()` returns a copy of the string S with leading and trailing whitespace removed. + +It accepts an optional string argument: +`S.strip(cutset)` instead removes all leading +and trailing Unicode code points contained in `cutset`. + +```python +" hello ".strip() # "hello" +" hello ".strip("h o") # "ell" +``` + +<a id='string·title'></a> +### string·title + +`S.title()` returns a copy of the string S with letters converted to title case. + +Letters are converted to upper case at the start of words, lower case elsewhere. + +```python +"hElLo, WoRlD!".title() # "Hello, World!" +"dženan".title() # "Dženan" ("Dž" is a single Unicode letter) +``` + +<a id='string·upper'></a> +### string·upper + +`S.upper()` returns a copy of the string S with letters converted to uppercase. + +```python +"Hello, World!".upper() # "HELLO, WORLD!" +``` + +## Dialect differences + +The list below summarizes features of the Go implementation that are +known to differ from the Java implementation of Starlark used by Bazel. +Some of these features may be controlled by global options to allow +applications to mimic the Bazel dialect more closely. Our goal is +eventually to eliminate all such differences on a case-by-case basis. +See [Starlark spec issue 20](https://github.com/bazelbuild/starlark/issues/20). + +* String interpolation supports the `[ioxXc]` conversions. +* String elements are bytes. +* Non-ASCII strings are encoded using UTF-8. +* Strings support hex byte escapes. +* Strings have the additional methods `elem_ords`, `codepoint_ords`, and `codepoints`. +* The `chr` and `ord` built-in functions are supported. +* The `set` built-in function is provided (option: `-set`). +* `set & set` and `set | set` compute set intersection and union, respectively. +* `assert` is a valid identifier. +* `if`, `for`, and `while` are permitted at top level (option: `-globalreassign`). +* top-level rebindings are permitted (option: `-globalreassign`). diff --git a/docs/CNAME b/docs/CNAME new file mode 100644 index 0000000..63f0a06 --- /dev/null +++ b/docs/CNAME @@ -0,0 +1 @@ +go.starlark.net
\ No newline at end of file diff --git a/docs/cmd/starlark/index.html b/docs/cmd/starlark/index.html new file mode 100644 index 0000000..29d9c83 --- /dev/null +++ b/docs/cmd/starlark/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/cmd/starlark'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/cmd/starlark... +</body> +</html> diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..ec44a6e --- /dev/null +++ b/docs/index.html @@ -0,0 +1,11 @@ +<html> + <!-- This file will be served at go.starlark.net by GitHub pages. --> + <head> + <!-- This tag causes "go get go.starklark.net" to redirect to GitHub. --> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://github.com/google/starlark-go'" /> + </head> + <body> + Redirecting to GitHub project github.com/google/starlark-go... + </body> +</html> diff --git a/docs/internal/chunkedfile/index.html b/docs/internal/chunkedfile/index.html new file mode 100644 index 0000000..7710919 --- /dev/null +++ b/docs/internal/chunkedfile/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/internal/chunkedfile'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/internal/chunkedfile... +</body> +</html> diff --git a/docs/internal/compile/index.html b/docs/internal/compile/index.html new file mode 100644 index 0000000..12eb87f --- /dev/null +++ b/docs/internal/compile/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/internal/compile'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/internal/compile... +</body> +</html> diff --git a/docs/repl/index.html b/docs/repl/index.html new file mode 100644 index 0000000..bbcc4b2 --- /dev/null +++ b/docs/repl/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/repl'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/repl... +</body> +</html> diff --git a/docs/resolve/index.html b/docs/resolve/index.html new file mode 100644 index 0000000..6d63ca6 --- /dev/null +++ b/docs/resolve/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/resolve'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/resolve... +</body> +</html> diff --git a/docs/starlark/index.html b/docs/starlark/index.html new file mode 100644 index 0000000..58e38f0 --- /dev/null +++ b/docs/starlark/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/starlark'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/starlark... +</body> +</html> diff --git a/docs/starlarkstruct/index.html b/docs/starlarkstruct/index.html new file mode 100644 index 0000000..e187004 --- /dev/null +++ b/docs/starlarkstruct/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/starlarkstruct'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/starlarkstruct... +</body> +</html> diff --git a/docs/starlarktest/index.html b/docs/starlarktest/index.html new file mode 100644 index 0000000..d808e12 --- /dev/null +++ b/docs/starlarktest/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/starlarktest'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/starlarktest... +</body> +</html> diff --git a/docs/syntax/index.html b/docs/syntax/index.html new file mode 100644 index 0000000..a629e81 --- /dev/null +++ b/docs/syntax/index.html @@ -0,0 +1,9 @@ +<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/syntax'" /></meta> +</head> +<body> + Redirecting to godoc.org page for go.starlark.net/syntax... +</body> +</html> diff --git a/docs/update.go b/docs/update.go new file mode 100644 index 0000000..be40427 --- /dev/null +++ b/docs/update.go @@ -0,0 +1,71 @@ +//+build ignore + +// The update command creates/updates the <html><head> elements of +// each subpackage beneath docs so that "go get" requests redirect +// to GitHub and other HTTP requests redirect to godoc.corp. +// +// Usage: +// +// $ cd $GOPATH/src/go.starlark.net +// $ go run docs/update.go +// +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "path/filepath" + "strings" +) + +func main() { + log.SetFlags(0) + log.SetPrefix("update: ") + + cwd, err := os.Getwd() + if err != nil { + log.Fatal(err) + } + if filepath.Base(cwd) != "go.starlark.net" { + log.Fatalf("must run from the go.starlark.net directory") + } + + cmd := exec.Command("go", "list", "./...") + cmd.Stdout = new(bytes.Buffer) + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + log.Fatal(err) + } + for _, pkg := range strings.Split(strings.TrimSpace(fmt.Sprint(cmd.Stdout)), "\n") { + rel := strings.TrimPrefix(pkg, "go.starlark.net/") // e.g. "cmd/starlark" + subdir := filepath.Join("docs", rel) + if err := os.MkdirAll(subdir, 0777); err != nil { + log.Fatal(err) + } + + // Create missing docs/$rel/index.html files. + html := filepath.Join(subdir, "index.html") + if _, err := os.Stat(html); os.IsNotExist(err) { + data := strings.Replace(defaultHTML, "$PKG", pkg, -1) + if err := ioutil.WriteFile(html, []byte(data), 0666); err != nil { + log.Fatal(err) + } + log.Printf("created %s", html) + } + } +} + +const defaultHTML = `<html> +<head> + <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta> + <meta http-equiv="refresh" content="0;URL='http://godoc.org/$PKG'" /></meta> +</head> +<body> + Redirecting to godoc.org page for $PKG... +</body> +</html> +` @@ -0,0 +1,13 @@ +module go.starlark.net + +go 1.13 + +require ( + github.com/chzyer/logex v1.1.10 // indirect + github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e + github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect + github.com/google/go-cmp v0.5.1 // indirect + golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect + google.golang.org/protobuf v1.25.0 +) @@ -0,0 +1,74 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k= +github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/internal/chunkedfile/chunkedfile.go b/internal/chunkedfile/chunkedfile.go new file mode 100644 index 0000000..a591524 --- /dev/null +++ b/internal/chunkedfile/chunkedfile.go @@ -0,0 +1,124 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package chunkedfile provides utilities for testing that source code +// errors are reported in the appropriate places. +// +// A chunked file consists of several chunks of input text separated by +// "---" lines. Each chunk is an input to the program under test, such +// as an evaluator. Lines containing "###" are interpreted as +// expectations of failure: the following text is a Go string literal +// denoting a regular expression that should match the failure message. +// +// Example: +// +// x = 1 / 0 ### "division by zero" +// --- +// x = 1 +// print(x + "") ### "int + string not supported" +// +// A client test feeds each chunk of text into the program under test, +// then calls chunk.GotError for each error that actually occurred. Any +// discrepancy between the actual and expected errors is reported using +// the client's reporter, which is typically a testing.T. +package chunkedfile // import "go.starlark.net/internal/chunkedfile" + +import ( + "fmt" + "io/ioutil" + "regexp" + "strconv" + "strings" +) + +const debug = false + +// A Chunk is a portion of a source file. +// It contains a set of expected errors. +type Chunk struct { + Source string + filename string + report Reporter + wantErrs map[int]*regexp.Regexp +} + +// Reporter is implemented by *testing.T. +type Reporter interface { + Errorf(format string, args ...interface{}) +} + +// Read parses a chunked file and returns its chunks. +// It reports failures using the reporter. +// +// Error messages of the form "file.star:line:col: ..." are prefixed +// by a newline so that the Go source position added by (*testing.T).Errorf +// appears on a separate line so as not to confused editors. +func Read(filename string, report Reporter) (chunks []Chunk) { + data, err := ioutil.ReadFile(filename) + if err != nil { + report.Errorf("%s", err) + return + } + linenum := 1 + for i, chunk := range strings.Split(string(data), "\n---\n") { + if debug { + fmt.Printf("chunk %d at line %d: %s\n", i, linenum, chunk) + } + // Pad with newlines so the line numbers match the original file. + src := strings.Repeat("\n", linenum-1) + chunk + + wantErrs := make(map[int]*regexp.Regexp) + + // Parse comments of the form: + // ### "expected error". + lines := strings.Split(chunk, "\n") + for j := 0; j < len(lines); j, linenum = j+1, linenum+1 { + line := lines[j] + hashes := strings.Index(line, "###") + if hashes < 0 { + continue + } + rest := strings.TrimSpace(line[hashes+len("###"):]) + pattern, err := strconv.Unquote(rest) + if err != nil { + report.Errorf("\n%s:%d: not a quoted regexp: %s", filename, linenum, rest) + continue + } + rx, err := regexp.Compile(pattern) + if err != nil { + report.Errorf("\n%s:%d: %v", filename, linenum, err) + continue + } + wantErrs[linenum] = rx + if debug { + fmt.Printf("\t%d\t%s\n", linenum, rx) + } + } + linenum++ + + chunks = append(chunks, Chunk{src, filename, report, wantErrs}) + } + return chunks +} + +// GotError should be called by the client to report an error at a particular line. +// GotError reports unexpected errors to the chunk's reporter. +func (chunk *Chunk) GotError(linenum int, msg string) { + if rx, ok := chunk.wantErrs[linenum]; ok { + delete(chunk.wantErrs, linenum) + if !rx.MatchString(msg) { + chunk.report.Errorf("\n%s:%d: error %q does not match pattern %q", chunk.filename, linenum, msg, rx) + } + } else { + chunk.report.Errorf("\n%s:%d: unexpected error: %v", chunk.filename, linenum, msg) + } +} + +// Done should be called by the client to indicate that the chunk has no more errors. +// Done reports expected errors that did not occur to the chunk's reporter. +func (chunk *Chunk) Done() { + for linenum, rx := range chunk.wantErrs { + chunk.report.Errorf("\n%s:%d: expected error matching %q", chunk.filename, linenum, rx) + } +} diff --git a/internal/compile/codegen_test.go b/internal/compile/codegen_test.go new file mode 100644 index 0000000..f67204f --- /dev/null +++ b/internal/compile/codegen_test.go @@ -0,0 +1,118 @@ +package compile + +import ( + "bytes" + "fmt" + "testing" + + "go.starlark.net/resolve" + "go.starlark.net/syntax" +) + +// TestPlusFolding ensures that the compiler generates optimized code for +// n-ary addition of strings, lists, and tuples. +func TestPlusFolding(t *testing.T) { + isPredeclared := func(name string) bool { return name == "x" } + isUniversal := func(name string) bool { return false } + for i, test := range []struct { + src string // source expression + want string // disassembled code + }{ + { + // string folding + `"a" + "b" + "c" + "d"`, + `constant "abcd"; return`, + }, + { + // string folding with variable: + `"a" + "b" + x + "c" + "d"`, + `constant "ab"; predeclared x; plus; constant "cd"; plus; return`, + }, + { + // list folding + `[1] + [2] + [3]`, + `constant 1; constant 2; constant 3; makelist<3>; return`, + }, + { + // list folding with variable + `[1] + [2] + x + [3]`, + `constant 1; constant 2; makelist<2>; ` + + `predeclared x; plus; ` + + `constant 3; makelist<1>; plus; ` + + `return`, + }, + { + // tuple folding + `() + (1,) + (2, 3)`, + `constant 1; constant 2; constant 3; maketuple<3>; return`, + }, + { + // tuple folding with variable + `() + (1,) + x + (2, 3)`, + `constant 1; maketuple<1>; predeclared x; plus; ` + + `constant 2; constant 3; maketuple<2>; plus; ` + + `return`, + }, + } { + expr, err := syntax.ParseExpr("in.star", test.src, 0) + if err != nil { + t.Errorf("#%d: %v", i, err) + continue + } + locals, err := resolve.Expr(expr, isPredeclared, isUniversal) + if err != nil { + t.Errorf("#%d: %v", i, err) + continue + } + got := disassemble(Expr(expr, "<expr>", locals).Toplevel) + if test.want != got { + t.Errorf("expression <<%s>> generated <<%s>>, want <<%s>>", + test.src, got, test.want) + } + } +} + +// disassemble is a trivial disassembler tailored to the accumulator test. +func disassemble(f *Funcode) string { + out := new(bytes.Buffer) + code := f.Code + for pc := 0; pc < len(code); { + op := Opcode(code[pc]) + pc++ + // TODO(adonovan): factor in common with interpreter. + var arg uint32 + if op >= OpcodeArgMin { + for s := uint(0); ; s += 7 { + b := code[pc] + pc++ + arg |= uint32(b&0x7f) << s + if b < 0x80 { + break + } + } + } + + if out.Len() > 0 { + out.WriteString("; ") + } + fmt.Fprintf(out, "%s", op) + if op >= OpcodeArgMin { + switch op { + case CONSTANT: + switch x := f.Prog.Constants[arg].(type) { + case string: + fmt.Fprintf(out, " %q", x) + default: + fmt.Fprintf(out, " %v", x) + } + case LOCAL: + fmt.Fprintf(out, " %s", f.Locals[arg].Name) + case PREDECLARED: + fmt.Fprintf(out, " %s", f.Prog.Names[arg]) + default: + fmt.Fprintf(out, "<%d>", arg) + } + } + } + return out.String() +} diff --git a/internal/compile/compile.go b/internal/compile/compile.go new file mode 100644 index 0000000..c314e6e --- /dev/null +++ b/internal/compile/compile.go @@ -0,0 +1,1916 @@ +// Package compile defines the Starlark bytecode compiler. +// It is an internal package of the Starlark interpreter and is not directly accessible to clients. +// +// The compiler generates byte code with optional uint32 operands for a +// virtual machine with the following components: +// - a program counter, which is an index into the byte code array. +// - an operand stack, whose maximum size is computed for each function by the compiler. +// - an stack of active iterators. +// - an array of local variables. +// The number of local variables and their indices are computed by the resolver. +// Locals (possibly including parameters) that are shared with nested functions +// are 'cells': their locals array slot will contain a value of type 'cell', +// an indirect value in a box that is explicitly read/updated by instructions. +// - an array of free variables, for nested functions. +// Free variables are a subset of the ancestors' cell variables. +// As with locals and cells, these are computed by the resolver. +// - an array of global variables, shared among all functions in the same module. +// All elements are initially nil. +// - two maps of predeclared and universal identifiers. +// +// Each function has a line number table that maps each program counter +// offset to a source position, including the column number. +// +// Operands, logically uint32s, are encoded using little-endian 7-bit +// varints, the top bit indicating that more bytes follow. +// +package compile // import "go.starlark.net/internal/compile" + +import ( + "bytes" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "go.starlark.net/resolve" + "go.starlark.net/syntax" +) + +// Disassemble causes the assembly code for each function +// to be printed to stderr as it is generated. +var Disassemble = false + +const debug = false // make code generation verbose, for debugging the compiler + +// Increment this to force recompilation of saved bytecode files. +const Version = 12 + +type Opcode uint8 + +// "x DUP x x" is a "stack picture" that describes the state of the +// stack before and after execution of the instruction. +// +// OP<index> indicates an immediate operand that is an index into the +// specified table: locals, names, freevars, constants. +const ( + NOP Opcode = iota // - NOP - + + // stack operations + DUP // x DUP x x + DUP2 // x y DUP2 x y x y + POP // x POP - + EXCH // x y EXCH y x + + // binary comparisons + // (order must match Token) + LT + GT + GE + LE + EQL + NEQ + + // binary arithmetic + // (order must match Token) + PLUS + MINUS + STAR + SLASH + SLASHSLASH + PERCENT + AMP + PIPE + CIRCUMFLEX + LTLT + GTGT + + IN + + // unary operators + UPLUS // x UPLUS x + UMINUS // x UMINUS -x + TILDE // x TILDE ~x + + NONE // - NONE None + TRUE // - TRUE True + FALSE // - FALSE False + MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] + + ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] + ITERPOP // - ITERPOP - [pops the iterator stack] + NOT // value NOT bool + RETURN // value RETURN - + SETINDEX // a i new SETINDEX - + INDEX // a i INDEX elem + SETDICT // dict key value SETDICT - + SETDICTUNIQ // dict key value SETDICTUNIQ - + APPEND // list elem APPEND - + SLICE // x lo hi step SLICE slice + INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) + MAKEDICT // - MAKEDICT dict + + // --- opcodes with an argument must go below this line --- + + // control flow + JMP // - JMP<addr> - + CJMP // cond CJMP<addr> - + ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] + // or: - ITERJMP<addr> - (and jump) + + CONSTANT // - CONSTANT<constant> value + MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple + MAKELIST // x1 ... xn MAKELIST<n> list + MAKEFUNC // defaults+freevars MAKEFUNC<func> fn + LOAD // from1 ... fromN module LOAD<n> v1 ... vN + SETLOCAL // value SETLOCAL<local> - + SETGLOBAL // value SETGLOBAL<global> - + LOCAL // - LOCAL<local> value + FREE // - FREE<freevar> cell + FREECELL // - FREECELL<freevar> value (content of FREE cell) + LOCALCELL // - LOCALCELL<local> value (content of LOCAL cell) + SETLOCALCELL // value SETLOCALCELL<local> - (set content of LOCAL cell) + GLOBAL // - GLOBAL<global> value + PREDECLARED // - PREDECLARED<name> value + UNIVERSAL // - UNIVERSAL<name> value + ATTR // x ATTR<name> y y = x.name + SETFIELD // x y SETFIELD<name> - x.name = y + UNPACK // iterable UNPACK<n> vn ... v1 + + // n>>8 is #positional args and n&0xff is #named args (pairs). + CALL // fn positional named CALL<n> result + CALL_VAR // fn positional named *args CALL_VAR<n> result + CALL_KW // fn positional named **kwargs CALL_KW<n> result + CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result + + OpcodeArgMin = JMP + OpcodeMax = CALL_VAR_KW +) + +// TODO(adonovan): add dynamic checks for missing opcodes in the tables below. + +var opcodeNames = [...]string{ + AMP: "amp", + APPEND: "append", + ATTR: "attr", + CALL: "call", + CALL_KW: "call_kw ", + CALL_VAR: "call_var", + CALL_VAR_KW: "call_var_kw", + CIRCUMFLEX: "circumflex", + CJMP: "cjmp", + CONSTANT: "constant", + DUP2: "dup2", + DUP: "dup", + EQL: "eql", + EXCH: "exch", + FALSE: "false", + FREE: "free", + FREECELL: "freecell", + GE: "ge", + GLOBAL: "global", + GT: "gt", + GTGT: "gtgt", + IN: "in", + INDEX: "index", + INPLACE_ADD: "inplace_add", + ITERJMP: "iterjmp", + ITERPOP: "iterpop", + ITERPUSH: "iterpush", + JMP: "jmp", + LE: "le", + LOAD: "load", + LOCAL: "local", + LOCALCELL: "localcell", + LT: "lt", + LTLT: "ltlt", + MAKEDICT: "makedict", + MAKEFUNC: "makefunc", + MAKELIST: "makelist", + MAKETUPLE: "maketuple", + MANDATORY: "mandatory", + MINUS: "minus", + NEQ: "neq", + NONE: "none", + NOP: "nop", + NOT: "not", + PERCENT: "percent", + PIPE: "pipe", + PLUS: "plus", + POP: "pop", + PREDECLARED: "predeclared", + RETURN: "return", + SETDICT: "setdict", + SETDICTUNIQ: "setdictuniq", + SETFIELD: "setfield", + SETGLOBAL: "setglobal", + SETINDEX: "setindex", + SETLOCAL: "setlocal", + SETLOCALCELL: "setlocalcell", + SLASH: "slash", + SLASHSLASH: "slashslash", + SLICE: "slice", + STAR: "star", + TILDE: "tilde", + TRUE: "true", + UMINUS: "uminus", + UNIVERSAL: "universal", + UNPACK: "unpack", + UPLUS: "uplus", +} + +const variableStackEffect = 0x7f + +// stackEffect records the effect on the size of the operand stack of +// each kind of instruction. For some instructions this requires computation. +var stackEffect = [...]int8{ + AMP: -1, + APPEND: -2, + ATTR: 0, + CALL: variableStackEffect, + CALL_KW: variableStackEffect, + CALL_VAR: variableStackEffect, + CALL_VAR_KW: variableStackEffect, + CIRCUMFLEX: -1, + CJMP: -1, + CONSTANT: +1, + DUP2: +2, + DUP: +1, + EQL: -1, + FALSE: +1, + FREE: +1, + FREECELL: +1, + GE: -1, + GLOBAL: +1, + GT: -1, + GTGT: -1, + IN: -1, + INDEX: -1, + INPLACE_ADD: -1, + ITERJMP: variableStackEffect, + ITERPOP: 0, + ITERPUSH: -1, + JMP: 0, + LE: -1, + LOAD: -1, + LOCAL: +1, + LOCALCELL: +1, + LT: -1, + LTLT: -1, + MAKEDICT: +1, + MAKEFUNC: 0, + MAKELIST: variableStackEffect, + MAKETUPLE: variableStackEffect, + MANDATORY: +1, + MINUS: -1, + NEQ: -1, + NONE: +1, + NOP: 0, + NOT: 0, + PERCENT: -1, + PIPE: -1, + PLUS: -1, + POP: -1, + PREDECLARED: +1, + RETURN: -1, + SETLOCALCELL: -1, + SETDICT: -3, + SETDICTUNIQ: -3, + SETFIELD: -2, + SETGLOBAL: -1, + SETINDEX: -3, + SETLOCAL: -1, + SLASH: -1, + SLASHSLASH: -1, + SLICE: -3, + STAR: -1, + TRUE: +1, + UMINUS: 0, + UNIVERSAL: +1, + UNPACK: variableStackEffect, + UPLUS: 0, +} + +func (op Opcode) String() string { + if op < OpcodeMax { + if name := opcodeNames[op]; name != "" { + return name + } + } + return fmt.Sprintf("illegal op (%d)", op) +} + +// A Program is a Starlark file in executable form. +// +// Programs are serialized by the Program.Encode method, +// which must be updated whenever this declaration is changed. +type Program struct { + Loads []Binding // name (really, string) and position of each load stmt + Names []string // names of attributes and predeclared variables + Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes + Functions []*Funcode + Globals []Binding // for error messages and tracing + Toplevel *Funcode // module initialization function +} + +// The type of a bytes literal value, to distinguish from text string. +type Bytes string + +// A Funcode is the code of a compiled Starlark function. +// +// Funcodes are serialized by the encoder.function method, +// which must be updated whenever this declaration is changed. +type Funcode struct { + Prog *Program + Pos syntax.Position // position of def or lambda token + Name string // name of this function + Doc string // docstring of this function + Code []byte // the byte code + pclinetab []uint16 // mapping from pc to linenum + Locals []Binding // locals, parameters first + Cells []int // indices of Locals that require cells + Freevars []Binding // for tracing + MaxStack int + NumParams int + NumKwonlyParams int + HasVarargs, HasKwargs bool + + // -- transient state -- + + lntOnce sync.Once + lnt []pclinecol // decoded line number table +} + +type pclinecol struct { + pc uint32 + line, col int32 +} + +// A Binding is the name and position of a binding identifier. +type Binding struct { + Name string + Pos syntax.Position +} + +// A pcomp holds the compiler state for a Program. +type pcomp struct { + prog *Program // what we're building + + names map[string]uint32 + constants map[interface{}]uint32 + functions map[*Funcode]uint32 +} + +// An fcomp holds the compiler state for a Funcode. +type fcomp struct { + fn *Funcode // what we're building + + pcomp *pcomp + pos syntax.Position // current position of generated code + loops []loop + block *block +} + +type loop struct { + break_, continue_ *block +} + +type block struct { + insns []insn + + // If the last insn is a RETURN, jmp and cjmp are nil. + // If the last insn is a CJMP or ITERJMP, + // cjmp and jmp are the "true" and "false" successors. + // Otherwise, jmp is the sole successor. + jmp, cjmp *block + + initialstack int // for stack depth computation + + // Used during encoding + index int // -1 => not encoded yet + addr uint32 +} + +type insn struct { + op Opcode + arg uint32 + line, col int32 +} + +// Position returns the source position for program counter pc. +func (fn *Funcode) Position(pc uint32) syntax.Position { + fn.lntOnce.Do(fn.decodeLNT) + + // Binary search to find last LNT entry not greater than pc. + // To avoid dynamic dispatch, this is a specialization of + // sort.Search using this predicate: + // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) + n := len(fn.lnt) + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) + if !(h >= n-1 || fn.lnt[h+1].pc > pc) { + i = h + 1 + } else { + j = h + } + } + + var line, col int32 + if i < n { + line = fn.lnt[i].line + col = fn.lnt[i].col + } + + pos := fn.Pos // copy the (annoyingly inaccessible) filename + pos.Col = col + pos.Line = line + return pos +} + +// decodeLNT decodes the line number table and populates fn.lnt. +// It is called at most once. +func (fn *Funcode) decodeLNT() { + // Conceptually the table contains rows of the form + // (pc uint32, line int32, col int32), sorted by pc. + // We use a delta encoding, since the differences + // between successive pc, line, and column values + // are typically small and positive (though line and + // especially column differences may be negative). + // The delta encoding starts from + // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. + // + // Each entry is packed into one or more 16-bit values: + // Δpc uint4 + // Δline int5 + // Δcol int6 + // incomplete uint1 + // The top 4 bits are the unsigned delta pc. + // The next 5 bits are the signed line number delta. + // The next 6 bits are the signed column number delta. + // The bottom bit indicates that more rows follow because + // one of the deltas was maxed out. + // These field widths were chosen from a sample of real programs, + // and allow >97% of rows to be encoded in a single uint16. + + fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation + entry := pclinecol{ + pc: 0, + line: fn.Pos.Line, + col: fn.Pos.Col, + } + for _, x := range fn.pclinetab { + entry.pc += uint32(x) >> 12 + entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline + entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol + if (x & 1) == 0 { + fn.lnt = append(fn.lnt, entry) + } + } +} + +// bindings converts resolve.Bindings to compiled form. +func bindings(bindings []*resolve.Binding) []Binding { + res := make([]Binding, len(bindings)) + for i, bind := range bindings { + res[i].Name = bind.First.Name + res[i].Pos = bind.First.NamePos + } + return res +} + +// Expr compiles an expression to a program whose toplevel function evaluates it. +func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program { + pos := syntax.Start(expr) + stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} + return File(stmts, pos, name, locals, nil) +} + +// File compiles the statements of a file into a program. +func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { + pcomp := &pcomp{ + prog: &Program{ + Globals: bindings(globals), + }, + names: make(map[string]uint32), + constants: make(map[interface{}]uint32), + functions: make(map[*Funcode]uint32), + } + pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) + + return pcomp.prog +} + +func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { + fcomp := &fcomp{ + pcomp: pcomp, + pos: pos, + fn: &Funcode{ + Prog: pcomp.prog, + Pos: pos, + Name: name, + Doc: docStringFromBody(stmts), + Locals: bindings(locals), + Freevars: bindings(freevars), + }, + } + + // Record indices of locals that require cells. + for i, local := range locals { + if local.Scope == resolve.Cell { + fcomp.fn.Cells = append(fcomp.fn.Cells, i) + } + } + + if debug { + fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) + } + + // Convert AST to a CFG of instructions. + entry := fcomp.newBlock() + fcomp.block = entry + fcomp.stmts(stmts) + if fcomp.block != nil { + fcomp.emit(NONE) + fcomp.emit(RETURN) + } + + var oops bool // something bad happened + + setinitialstack := func(b *block, depth int) { + if b.initialstack == -1 { + b.initialstack = depth + } else if b.initialstack != depth { + fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", + b.index, b.initialstack, depth) + oops = true + } + } + + // Linearize the CFG: + // compute order, address, and initial + // stack depth of each reachable block. + var pc uint32 + var blocks []*block + var maxstack int + var visit func(b *block) + visit = func(b *block) { + if b.index >= 0 { + return // already visited + } + b.index = len(blocks) + b.addr = pc + blocks = append(blocks, b) + + stack := b.initialstack + if debug { + fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) + } + var cjmpAddr *uint32 + var isiterjmp int + for i, insn := range b.insns { + pc++ + + // Compute size of argument. + if insn.op >= OpcodeArgMin { + switch insn.op { + case ITERJMP: + isiterjmp = 1 + fallthrough + case CJMP: + cjmpAddr = &b.insns[i].arg + pc += 4 + default: + pc += uint32(argLen(insn.arg)) + } + } + + // Compute effect on stack. + se := insn.stackeffect() + if debug { + fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) + } + stack += se + if stack < 0 { + fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) + oops = true + } + if stack+isiterjmp > maxstack { + maxstack = stack + isiterjmp + } + } + + if debug { + fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", + b.addr, b.index) + if b.jmp != nil { + fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) + } + if b.cjmp != nil { + fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) + } + } + + // Place the jmp block next. + if b.jmp != nil { + // jump threading (empty cycles are impossible) + for b.jmp.insns == nil { + b.jmp = b.jmp.jmp + } + + setinitialstack(b.jmp, stack+isiterjmp) + if b.jmp.index < 0 { + // Successor is not yet visited: + // place it next and fall through. + visit(b.jmp) + } else { + // Successor already visited; + // explicit backward jump required. + pc += 5 + } + } + + // Then the cjmp block. + if b.cjmp != nil { + // jump threading (empty cycles are impossible) + for b.cjmp.insns == nil { + b.cjmp = b.cjmp.jmp + } + + setinitialstack(b.cjmp, stack) + visit(b.cjmp) + + // Patch the CJMP/ITERJMP, if present. + if cjmpAddr != nil { + *cjmpAddr = b.cjmp.addr + } + } + } + setinitialstack(entry, 0) + visit(entry) + + fn := fcomp.fn + fn.MaxStack = maxstack + + // Emit bytecode (and position table). + if Disassemble { + fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) + } + fcomp.generate(blocks, pc) + + if debug { + fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) + } + + // Don't panic until we've completed printing of the function. + if oops { + panic("internal error") + } + + if debug { + fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) + } + + return fn +} + +func docStringFromBody(body []syntax.Stmt) string { + if len(body) == 0 { + return "" + } + expr, ok := body[0].(*syntax.ExprStmt) + if !ok { + return "" + } + lit, ok := expr.X.(*syntax.Literal) + if !ok { + return "" + } + if lit.Token != syntax.STRING { + return "" + } + return lit.Value.(string) +} + +func (insn *insn) stackeffect() int { + se := int(stackEffect[insn.op]) + if se == variableStackEffect { + arg := int(insn.arg) + switch insn.op { + case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: + se = -int(2*(insn.arg&0xff) + insn.arg>>8) + if insn.op != CALL { + se-- + } + if insn.op == CALL_VAR_KW { + se-- + } + case ITERJMP: + // Stack effect differs by successor: + // +1 for jmp/false/ok + // 0 for cjmp/true/exhausted + // Handled specially in caller. + se = 0 + case MAKELIST, MAKETUPLE: + se = 1 - arg + case UNPACK: + se = arg - 1 + default: + panic(insn.op) + } + } + return se +} + +// generate emits the linear instruction stream from the CFG, +// and builds the PC-to-line number table. +func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { + code := make([]byte, 0, codelen) + var pclinetab []uint16 + prev := pclinecol{ + pc: 0, + line: fcomp.fn.Pos.Line, + col: fcomp.fn.Pos.Col, + } + + for _, b := range blocks { + if Disassemble { + fmt.Fprintf(os.Stderr, "%d:\n", b.index) + } + pc := b.addr + for _, insn := range b.insns { + if insn.line != 0 { + // Instruction has a source position. Delta-encode it. + // See Funcode.Position for the encoding. + for { + var incomplete uint16 + + // Δpc, uint4 + deltapc := pc - prev.pc + if deltapc > 0x0f { + deltapc = 0x0f + incomplete = 1 + } + prev.pc += deltapc + + // Δline, int5 + deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) + if !ok { + incomplete = 1 + } + prev.line += deltaline + + // Δcol, int6 + deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) + if !ok { + incomplete = 1 + } + prev.col += deltacol + + entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete + pclinetab = append(pclinetab, entry) + if incomplete == 0 { + break + } + } + + if Disassemble { + fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", + filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) + } + } + if Disassemble { + PrintOp(fcomp.fn, pc, insn.op, insn.arg) + } + code = append(code, byte(insn.op)) + pc++ + if insn.op >= OpcodeArgMin { + if insn.op == CJMP || insn.op == ITERJMP { + code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes + } else { + code = addUint32(code, insn.arg, 0) + } + pc = uint32(len(code)) + } + } + + if b.jmp != nil && b.jmp.index != b.index+1 { + addr := b.jmp.addr + if Disassemble { + fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", + pc, addr, b.jmp.index) + } + code = append(code, byte(JMP)) + code = addUint32(code, addr, 4) + } + } + if len(code) != int(codelen) { + panic("internal error: wrong code length") + } + + fcomp.fn.pclinetab = pclinetab + fcomp.fn.Code = code +} + +// clip returns the value nearest x in the range [min...max], +// and whether it equals x. +func clip(x, min, max int32) (int32, bool) { + if x > max { + return max, false + } else if x < min { + return min, false + } else { + return x, true + } +} + +// addUint32 encodes x as 7-bit little-endian varint. +// TODO(adonovan): opt: steal top two bits of opcode +// to encode the number of complete bytes that follow. +func addUint32(code []byte, x uint32, min int) []byte { + end := len(code) + min + for x >= 0x80 { + code = append(code, byte(x)|0x80) + x >>= 7 + } + code = append(code, byte(x)) + // Pad the operand with NOPs to exactly min bytes. + for len(code) < end { + code = append(code, byte(NOP)) + } + return code +} + +func argLen(x uint32) int { + n := 0 + for x >= 0x80 { + n++ + x >>= 7 + } + return n + 1 +} + +// PrintOp prints an instruction. +// It is provided for debugging. +func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { + if op < OpcodeArgMin { + fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) + return + } + + var comment string + switch op { + case CONSTANT: + switch x := fn.Prog.Constants[arg].(type) { + case string: + comment = strconv.Quote(x) + case Bytes: + comment = "b" + strconv.Quote(string(x)) + default: + comment = fmt.Sprint(x) + } + case MAKEFUNC: + comment = fn.Prog.Functions[arg].Name + case SETLOCAL, LOCAL: + comment = fn.Locals[arg].Name + case SETGLOBAL, GLOBAL: + comment = fn.Prog.Globals[arg].Name + case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: + comment = fn.Prog.Names[arg] + case FREE: + comment = fn.Freevars[arg].Name + case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: + comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) + default: + // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: + // arg is just a number + } + var buf bytes.Buffer + fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) + if comment != "" { + fmt.Fprint(&buf, "\t; ", comment) + } + fmt.Fprintln(&buf) + os.Stderr.Write(buf.Bytes()) +} + +// newBlock returns a new block. +func (fcomp) newBlock() *block { + return &block{index: -1, initialstack: -1} +} + +// emit emits an instruction to the current block. +func (fcomp *fcomp) emit(op Opcode) { + if op >= OpcodeArgMin { + panic("missing arg: " + op.String()) + } + insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} + fcomp.block.insns = append(fcomp.block.insns, insn) + fcomp.pos.Line = 0 + fcomp.pos.Col = 0 +} + +// emit1 emits an instruction with an immediate operand. +func (fcomp *fcomp) emit1(op Opcode, arg uint32) { + if op < OpcodeArgMin { + panic("unwanted arg: " + op.String()) + } + insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} + fcomp.block.insns = append(fcomp.block.insns, insn) + fcomp.pos.Line = 0 + fcomp.pos.Col = 0 +} + +// jump emits a jump to the specified block. +// On return, the current block is unset. +func (fcomp *fcomp) jump(b *block) { + if b == fcomp.block { + panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs + } + fcomp.block.jmp = b + fcomp.block = nil +} + +// condjump emits a conditional jump (CJMP or ITERJMP) +// to the specified true/false blocks. +// (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) +// On return, the current block is unset. +func (fcomp *fcomp) condjump(op Opcode, t, f *block) { + if !(op == CJMP || op == ITERJMP) { + panic("not a conditional jump: " + op.String()) + } + fcomp.emit1(op, 0) // fill in address later + fcomp.block.cjmp = t + fcomp.jump(f) +} + +// nameIndex returns the index of the specified name +// within the name pool, adding it if necessary. +func (pcomp *pcomp) nameIndex(name string) uint32 { + index, ok := pcomp.names[name] + if !ok { + index = uint32(len(pcomp.prog.Names)) + pcomp.names[name] = index + pcomp.prog.Names = append(pcomp.prog.Names, name) + } + return index +} + +// constantIndex returns the index of the specified constant +// within the constant pool, adding it if necessary. +func (pcomp *pcomp) constantIndex(v interface{}) uint32 { + index, ok := pcomp.constants[v] + if !ok { + index = uint32(len(pcomp.prog.Constants)) + pcomp.constants[v] = index + pcomp.prog.Constants = append(pcomp.prog.Constants, v) + } + return index +} + +// functionIndex returns the index of the specified function +// AST the nestedfun pool, adding it if necessary. +func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { + index, ok := pcomp.functions[fn] + if !ok { + index = uint32(len(pcomp.prog.Functions)) + pcomp.functions[fn] = index + pcomp.prog.Functions = append(pcomp.prog.Functions, fn) + } + return index +} + +// string emits code to push the specified string. +func (fcomp *fcomp) string(s string) { + fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) +} + +// setPos sets the current source position. +// It should be called prior to any operation that can fail dynamically. +// All positions are assumed to belong to the same file. +func (fcomp *fcomp) setPos(pos syntax.Position) { + fcomp.pos = pos +} + +// set emits code to store the top-of-stack value +// to the specified local, cell, or global variable. +func (fcomp *fcomp) set(id *syntax.Ident) { + bind := id.Binding.(*resolve.Binding) + switch bind.Scope { + case resolve.Local: + fcomp.emit1(SETLOCAL, uint32(bind.Index)) + case resolve.Cell: + fcomp.emit1(SETLOCALCELL, uint32(bind.Index)) + case resolve.Global: + fcomp.emit1(SETGLOBAL, uint32(bind.Index)) + default: + log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) + } +} + +// lookup emits code to push the value of the specified variable. +func (fcomp *fcomp) lookup(id *syntax.Ident) { + bind := id.Binding.(*resolve.Binding) + if bind.Scope != resolve.Universal { // (universal lookup can't fail) + fcomp.setPos(id.NamePos) + } + switch bind.Scope { + case resolve.Local: + fcomp.emit1(LOCAL, uint32(bind.Index)) + case resolve.Free: + fcomp.emit1(FREECELL, uint32(bind.Index)) + case resolve.Cell: + fcomp.emit1(LOCALCELL, uint32(bind.Index)) + case resolve.Global: + fcomp.emit1(GLOBAL, uint32(bind.Index)) + case resolve.Predeclared: + fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) + case resolve.Universal: + fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) + default: + log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) + } +} + +func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { + for _, stmt := range stmts { + fcomp.stmt(stmt) + } +} + +func (fcomp *fcomp) stmt(stmt syntax.Stmt) { + switch stmt := stmt.(type) { + case *syntax.ExprStmt: + if _, ok := stmt.X.(*syntax.Literal); ok { + // Opt: don't compile doc comments only to pop them. + return + } + fcomp.expr(stmt.X) + fcomp.emit(POP) + + case *syntax.BranchStmt: + // Resolver invariant: break/continue appear only within loops. + switch stmt.Token { + case syntax.PASS: + // no-op + case syntax.BREAK: + b := fcomp.loops[len(fcomp.loops)-1].break_ + fcomp.jump(b) + fcomp.block = fcomp.newBlock() // dead code + case syntax.CONTINUE: + b := fcomp.loops[len(fcomp.loops)-1].continue_ + fcomp.jump(b) + fcomp.block = fcomp.newBlock() // dead code + } + + case *syntax.IfStmt: + // Keep consistent with CondExpr. + t := fcomp.newBlock() + f := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.ifelse(stmt.Cond, t, f) + + fcomp.block = t + fcomp.stmts(stmt.True) + fcomp.jump(done) + + fcomp.block = f + fcomp.stmts(stmt.False) + fcomp.jump(done) + + fcomp.block = done + + case *syntax.AssignStmt: + switch stmt.Op { + case syntax.EQ: + // simple assignment: x = y + fcomp.expr(stmt.RHS) + fcomp.assign(stmt.OpPos, stmt.LHS) + + case syntax.PLUS_EQ, + syntax.MINUS_EQ, + syntax.STAR_EQ, + syntax.SLASH_EQ, + syntax.SLASHSLASH_EQ, + syntax.PERCENT_EQ, + syntax.AMP_EQ, + syntax.PIPE_EQ, + syntax.CIRCUMFLEX_EQ, + syntax.LTLT_EQ, + syntax.GTGT_EQ: + // augmented assignment: x += y + + var set func() + + // Evaluate "address" of x exactly once to avoid duplicate side-effects. + switch lhs := unparen(stmt.LHS).(type) { + case *syntax.Ident: + // x = ... + fcomp.lookup(lhs) + set = func() { + fcomp.set(lhs) + } + + case *syntax.IndexExpr: + // x[y] = ... + fcomp.expr(lhs.X) + fcomp.expr(lhs.Y) + fcomp.emit(DUP2) + fcomp.setPos(lhs.Lbrack) + fcomp.emit(INDEX) + set = func() { + fcomp.setPos(lhs.Lbrack) + fcomp.emit(SETINDEX) + } + + case *syntax.DotExpr: + // x.f = ... + fcomp.expr(lhs.X) + fcomp.emit(DUP) + name := fcomp.pcomp.nameIndex(lhs.Name.Name) + fcomp.setPos(lhs.Dot) + fcomp.emit1(ATTR, name) + set = func() { + fcomp.setPos(lhs.Dot) + fcomp.emit1(SETFIELD, name) + } + + default: + panic(lhs) + } + + fcomp.expr(stmt.RHS) + + if stmt.Op == syntax.PLUS_EQ { + // Allow the runtime to optimize list += iterable. + fcomp.setPos(stmt.OpPos) + fcomp.emit(INPLACE_ADD) + } else { + fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) + } + set() + } + + case *syntax.DefStmt: + fcomp.function(stmt.Function.(*resolve.Function)) + fcomp.set(stmt.Name) + + case *syntax.ForStmt: + // Keep consistent with ForClause. + head := fcomp.newBlock() + body := fcomp.newBlock() + tail := fcomp.newBlock() + + fcomp.expr(stmt.X) + fcomp.setPos(stmt.For) + fcomp.emit(ITERPUSH) + fcomp.jump(head) + + fcomp.block = head + fcomp.condjump(ITERJMP, tail, body) + + fcomp.block = body + fcomp.assign(stmt.For, stmt.Vars) + fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) + fcomp.stmts(stmt.Body) + fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] + fcomp.jump(head) + + fcomp.block = tail + fcomp.emit(ITERPOP) + + case *syntax.WhileStmt: + head := fcomp.newBlock() + body := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.jump(head) + fcomp.block = head + fcomp.ifelse(stmt.Cond, body, done) + + fcomp.block = body + fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) + fcomp.stmts(stmt.Body) + fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] + fcomp.jump(head) + + fcomp.block = done + + case *syntax.ReturnStmt: + if stmt.Result != nil { + fcomp.expr(stmt.Result) + } else { + fcomp.emit(NONE) + } + fcomp.emit(RETURN) + fcomp.block = fcomp.newBlock() // dead code + + case *syntax.LoadStmt: + for i := range stmt.From { + fcomp.string(stmt.From[i].Name) + } + module := stmt.Module.Value.(string) + fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ + Name: module, + Pos: stmt.Module.TokenPos, + }) + fcomp.string(module) + fcomp.setPos(stmt.Load) + fcomp.emit1(LOAD, uint32(len(stmt.From))) + for i := range stmt.To { + fcomp.set(stmt.To[len(stmt.To)-1-i]) + } + + default: + start, _ := stmt.Span() + log.Panicf("%s: exec: unexpected statement %T", start, stmt) + } +} + +// assign implements lhs = rhs for arbitrary expressions lhs. +// RHS is on top of stack, consumed. +func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { + switch lhs := lhs.(type) { + case *syntax.ParenExpr: + // (lhs) = rhs + fcomp.assign(pos, lhs.X) + + case *syntax.Ident: + // x = rhs + fcomp.set(lhs) + + case *syntax.TupleExpr: + // x, y = rhs + fcomp.assignSequence(pos, lhs.List) + + case *syntax.ListExpr: + // [x, y] = rhs + fcomp.assignSequence(pos, lhs.List) + + case *syntax.IndexExpr: + // x[y] = rhs + fcomp.expr(lhs.X) + fcomp.emit(EXCH) + fcomp.expr(lhs.Y) + fcomp.emit(EXCH) + fcomp.setPos(lhs.Lbrack) + fcomp.emit(SETINDEX) + + case *syntax.DotExpr: + // x.f = rhs + fcomp.expr(lhs.X) + fcomp.emit(EXCH) + fcomp.setPos(lhs.Dot) + fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) + + default: + panic(lhs) + } +} + +func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { + fcomp.setPos(pos) + fcomp.emit1(UNPACK, uint32(len(lhs))) + for i := range lhs { + fcomp.assign(pos, lhs[i]) + } +} + +func (fcomp *fcomp) expr(e syntax.Expr) { + switch e := e.(type) { + case *syntax.ParenExpr: + fcomp.expr(e.X) + + case *syntax.Ident: + fcomp.lookup(e) + + case *syntax.Literal: + // e.Value is int64, float64, *bigInt, string + v := e.Value + if e.Token == syntax.BYTES { + v = Bytes(v.(string)) + } + fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v)) + + case *syntax.ListExpr: + for _, x := range e.List { + fcomp.expr(x) + } + fcomp.emit1(MAKELIST, uint32(len(e.List))) + + case *syntax.CondExpr: + // Keep consistent with IfStmt. + t := fcomp.newBlock() + f := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.ifelse(e.Cond, t, f) + + fcomp.block = t + fcomp.expr(e.True) + fcomp.jump(done) + + fcomp.block = f + fcomp.expr(e.False) + fcomp.jump(done) + + fcomp.block = done + + case *syntax.IndexExpr: + fcomp.expr(e.X) + fcomp.expr(e.Y) + fcomp.setPos(e.Lbrack) + fcomp.emit(INDEX) + + case *syntax.SliceExpr: + fcomp.setPos(e.Lbrack) + fcomp.expr(e.X) + if e.Lo != nil { + fcomp.expr(e.Lo) + } else { + fcomp.emit(NONE) + } + if e.Hi != nil { + fcomp.expr(e.Hi) + } else { + fcomp.emit(NONE) + } + if e.Step != nil { + fcomp.expr(e.Step) + } else { + fcomp.emit(NONE) + } + fcomp.emit(SLICE) + + case *syntax.Comprehension: + if e.Curly { + fcomp.emit(MAKEDICT) + } else { + fcomp.emit1(MAKELIST, 0) + } + fcomp.comprehension(e, 0) + + case *syntax.TupleExpr: + fcomp.tuple(e.List) + + case *syntax.DictExpr: + fcomp.emit(MAKEDICT) + for _, entry := range e.List { + entry := entry.(*syntax.DictEntry) + fcomp.emit(DUP) + fcomp.expr(entry.Key) + fcomp.expr(entry.Value) + fcomp.setPos(entry.Colon) + fcomp.emit(SETDICTUNIQ) + } + + case *syntax.UnaryExpr: + fcomp.expr(e.X) + fcomp.setPos(e.OpPos) + switch e.Op { + case syntax.MINUS: + fcomp.emit(UMINUS) + case syntax.PLUS: + fcomp.emit(UPLUS) + case syntax.NOT: + fcomp.emit(NOT) + case syntax.TILDE: + fcomp.emit(TILDE) + default: + log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op) + } + + case *syntax.BinaryExpr: + switch e.Op { + // short-circuit operators + // TODO(adonovan): use ifelse to simplify conditions. + case syntax.OR: + // x or y => if x then x else y + done := fcomp.newBlock() + y := fcomp.newBlock() + + fcomp.expr(e.X) + fcomp.emit(DUP) + fcomp.condjump(CJMP, done, y) + + fcomp.block = y + fcomp.emit(POP) // discard X + fcomp.expr(e.Y) + fcomp.jump(done) + + fcomp.block = done + + case syntax.AND: + // x and y => if x then y else x + done := fcomp.newBlock() + y := fcomp.newBlock() + + fcomp.expr(e.X) + fcomp.emit(DUP) + fcomp.condjump(CJMP, y, done) + + fcomp.block = y + fcomp.emit(POP) // discard X + fcomp.expr(e.Y) + fcomp.jump(done) + + fcomp.block = done + + case syntax.PLUS: + fcomp.plus(e) + + default: + // all other strict binary operator (includes comparisons) + fcomp.expr(e.X) + fcomp.expr(e.Y) + fcomp.binop(e.OpPos, e.Op) + } + + case *syntax.DotExpr: + fcomp.expr(e.X) + fcomp.setPos(e.Dot) + fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) + + case *syntax.CallExpr: + fcomp.call(e) + + case *syntax.LambdaExpr: + fcomp.function(e.Function.(*resolve.Function)) + + default: + start, _ := e.Span() + log.Panicf("%s: unexpected expr %T", start, e) + } +} + +type summand struct { + x syntax.Expr + plusPos syntax.Position +} + +// plus emits optimized code for ((a+b)+...)+z that avoids naive +// quadratic behavior for strings, tuples, and lists, +// and folds together adjacent literals of the same type. +func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { + // Gather all the right operands of the left tree of plusses. + // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. + args := make([]summand, 0, 2) // common case: 2 operands + for plus := e; ; { + args = append(args, summand{unparen(plus.Y), plus.OpPos}) + left := unparen(plus.X) + x, ok := left.(*syntax.BinaryExpr) + if !ok || x.Op != syntax.PLUS { + args = append(args, summand{x: left}) + break + } + plus = x + } + // Reverse args to syntactic order. + for i, n := 0, len(args)/2; i < n; i++ { + j := len(args) - 1 - i + args[i], args[j] = args[j], args[i] + } + + // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). + out := args[:0] // compact in situ + for i := 0; i < len(args); { + j := i + 1 + if code := addable(args[i].x); code != 0 { + for j < len(args) && addable(args[j].x) == code { + j++ + } + if j > i+1 { + args[i].x = add(code, args[i:j]) + } + } + out = append(out, args[i]) + i = j + } + args = out + + // Emit code for an n-ary sum (n > 0). + fcomp.expr(args[0].x) + for _, summand := range args[1:] { + fcomp.expr(summand.x) + fcomp.setPos(summand.plusPos) + fcomp.emit(PLUS) + } + + // If len(args) > 2, use of an accumulator instead of a chain of + // PLUS operations may be more efficient. + // However, no gain was measured on a workload analogous to Bazel loading; + // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. + // + // We cannot use a single n-ary SUM operation + // a b c SUM<3> + // because we need to report a distinct error for each + // individual '+' operation, so three additional operations are + // needed: + // + // ACCSTART => create buffer and append to it + // ACCUM => append to buffer + // ACCEND => get contents of buffer + // + // For string, list, and tuple values, the interpreter can + // optimize these operations by using a mutable buffer. + // For all other types, ACCSTART and ACCEND would behave like + // the identity function and ACCUM behaves like PLUS. + // ACCUM must correctly support user-defined operations + // such as list+foo. + // + // fcomp.emit(ACCSTART) + // for _, summand := range args[1:] { + // fcomp.expr(summand.x) + // fcomp.setPos(summand.plusPos) + // fcomp.emit(ACCUM) + // } + // fcomp.emit(ACCEND) +} + +// addable reports whether e is a statically addable +// expression: a [s]tring, [b]ytes, [l]ist, or [t]uple. +func addable(e syntax.Expr) rune { + switch e := e.(type) { + case *syntax.Literal: + // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. + switch e.Token { + case syntax.STRING: + return 's' + case syntax.BYTES: + return 'b' + } + case *syntax.ListExpr: + return 'l' + case *syntax.TupleExpr: + return 't' + } + return 0 +} + +// add returns an expression denoting the sum of args, +// which are all addable values of the type indicated by code. +// The resulting syntax is degenerate, lacking position, etc. +func add(code rune, args []summand) syntax.Expr { + switch code { + case 's', 'b': + var buf strings.Builder + for _, arg := range args { + buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) + } + tok := syntax.STRING + if code == 'b' { + tok = syntax.BYTES + } + return &syntax.Literal{Token: tok, Value: buf.String()} + case 'l': + var elems []syntax.Expr + for _, arg := range args { + elems = append(elems, arg.x.(*syntax.ListExpr).List...) + } + return &syntax.ListExpr{List: elems} + case 't': + var elems []syntax.Expr + for _, arg := range args { + elems = append(elems, arg.x.(*syntax.TupleExpr).List...) + } + return &syntax.TupleExpr{List: elems} + } + panic(code) +} + +func unparen(e syntax.Expr) syntax.Expr { + if p, ok := e.(*syntax.ParenExpr); ok { + return unparen(p.X) + } + return e +} + +func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { + // TODO(adonovan): simplify by assuming syntax and compiler constants align. + fcomp.setPos(pos) + switch op { + // arithmetic + case syntax.PLUS: + fcomp.emit(PLUS) + case syntax.MINUS: + fcomp.emit(MINUS) + case syntax.STAR: + fcomp.emit(STAR) + case syntax.SLASH: + fcomp.emit(SLASH) + case syntax.SLASHSLASH: + fcomp.emit(SLASHSLASH) + case syntax.PERCENT: + fcomp.emit(PERCENT) + case syntax.AMP: + fcomp.emit(AMP) + case syntax.PIPE: + fcomp.emit(PIPE) + case syntax.CIRCUMFLEX: + fcomp.emit(CIRCUMFLEX) + case syntax.LTLT: + fcomp.emit(LTLT) + case syntax.GTGT: + fcomp.emit(GTGT) + case syntax.IN: + fcomp.emit(IN) + case syntax.NOT_IN: + fcomp.emit(IN) + fcomp.emit(NOT) + + // comparisons + case syntax.EQL, + syntax.NEQ, + syntax.GT, + syntax.LT, + syntax.LE, + syntax.GE: + fcomp.emit(Opcode(op-syntax.EQL) + EQL) + + default: + log.Panicf("%s: unexpected binary op: %s", pos, op) + } +} + +func (fcomp *fcomp) call(call *syntax.CallExpr) { + // TODO(adonovan): opt: Use optimized path for calling methods + // of built-ins: x.f(...) to avoid materializing a closure. + // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { + // fcomp.expr(dot.X) + // fcomp.args(call) + // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) + // return + // } + + // usual case + fcomp.expr(call.Fn) + op, arg := fcomp.args(call) + fcomp.setPos(call.Lparen) + fcomp.emit1(op, arg) +} + +// args emits code to push a tuple of positional arguments +// and a tuple of named arguments containing alternating keys and values. +// Either or both tuples may be empty (TODO(adonovan): optimize). +func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { + var callmode int + // Compute the number of each kind of parameter. + var p, n int // number of positional, named arguments + var varargs, kwargs syntax.Expr + for _, arg := range call.Args { + if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { + + // named argument (name, value) + fcomp.string(binary.X.(*syntax.Ident).Name) + fcomp.expr(binary.Y) + n++ + continue + } + if unary, ok := arg.(*syntax.UnaryExpr); ok { + if unary.Op == syntax.STAR { + callmode |= 1 + varargs = unary.X + continue + } else if unary.Op == syntax.STARSTAR { + callmode |= 2 + kwargs = unary.X + continue + } + } + + // positional argument + fcomp.expr(arg) + p++ + } + + // Python2 and Python3 both permit named arguments + // to appear both before and after a *args argument: + // f(1, 2, x=3, *[4], y=5, **dict(z=6)) + // + // They also differ in their evaluation order: + // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) + // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) + // Starlark-in-Java historically used a third order: + // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) + // + // After discussion in github.com/bazelbuild/starlark#13, the + // spec now requires Starlark to statically reject named + // arguments after *args (e.g. y=5), and to use Python2-style + // evaluation order. This is both easy to implement and + // consistent with lexical order: + // + // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 + + // *args + if varargs != nil { + fcomp.expr(varargs) + } + + // **kwargs + if kwargs != nil { + fcomp.expr(kwargs) + } + + // TODO(adonovan): avoid this with a more flexible encoding. + if p >= 256 || n >= 256 { + // resolve already checked this; should be unreachable + panic("too many arguments in call") + } + + return CALL + Opcode(callmode), uint32(p<<8 | n) +} + +func (fcomp *fcomp) tuple(elems []syntax.Expr) { + for _, elem := range elems { + fcomp.expr(elem) + } + fcomp.emit1(MAKETUPLE, uint32(len(elems))) +} + +func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { + if clauseIndex == len(comp.Clauses) { + fcomp.emit(DUP) // accumulator + if comp.Curly { + // dict: {k:v for ...} + // Parser ensures that body is of form k:v. + // Python-style set comprehensions {body for vars in x} + // are not supported. + entry := comp.Body.(*syntax.DictEntry) + fcomp.expr(entry.Key) + fcomp.expr(entry.Value) + fcomp.setPos(entry.Colon) + fcomp.emit(SETDICT) + } else { + // list: [body for vars in x] + fcomp.expr(comp.Body) + fcomp.emit(APPEND) + } + return + } + + clause := comp.Clauses[clauseIndex] + switch clause := clause.(type) { + case *syntax.IfClause: + t := fcomp.newBlock() + done := fcomp.newBlock() + fcomp.ifelse(clause.Cond, t, done) + + fcomp.block = t + fcomp.comprehension(comp, clauseIndex+1) + fcomp.jump(done) + + fcomp.block = done + return + + case *syntax.ForClause: + // Keep consistent with ForStmt. + head := fcomp.newBlock() + body := fcomp.newBlock() + tail := fcomp.newBlock() + + fcomp.expr(clause.X) + fcomp.setPos(clause.For) + fcomp.emit(ITERPUSH) + fcomp.jump(head) + + fcomp.block = head + fcomp.condjump(ITERJMP, tail, body) + + fcomp.block = body + fcomp.assign(clause.For, clause.Vars) + fcomp.comprehension(comp, clauseIndex+1) + fcomp.jump(head) + + fcomp.block = tail + fcomp.emit(ITERPOP) + return + } + + start, _ := clause.Span() + log.Panicf("%s: unexpected comprehension clause %T", start, clause) +} + +func (fcomp *fcomp) function(f *resolve.Function) { + // Evaluation of the defaults may fail, so record the position. + fcomp.setPos(f.Pos) + + // To reduce allocation, we emit a combined tuple + // for the defaults and the freevars. + // The function knows where to split it at run time. + + // Generate tuple of parameter defaults. For: + // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) + // the tuple is: + // (dp2, dp3, MANDATORY, dk2, MANDATORY). + ndefaults := 0 + seenStar := false + for _, param := range f.Params { + switch param := param.(type) { + case *syntax.BinaryExpr: + fcomp.expr(param.Y) + ndefaults++ + case *syntax.UnaryExpr: + seenStar = true // * or *args (also **kwargs) + case *syntax.Ident: + if seenStar { + fcomp.emit(MANDATORY) + ndefaults++ + } + } + } + + // Capture the cells of the function's + // free variables from the lexical environment. + for _, freevar := range f.FreeVars { + // Don't call fcomp.lookup because we want + // the cell itself, not its content. + switch freevar.Scope { + case resolve.Free: + fcomp.emit1(FREE, uint32(freevar.Index)) + case resolve.Cell: + fcomp.emit1(LOCAL, uint32(freevar.Index)) + } + } + + fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) + + funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) + + if debug { + // TODO(adonovan): do compilations sequentially not as a tree, + // to make the log easier to read. + // Simplify by identifying Toplevel and functionIndex 0. + fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) + } + + // def f(a, *, b=1) has only 2 parameters. + numParams := len(f.Params) + if f.NumKwonlyParams > 0 && !f.HasVarargs { + numParams-- + } + + funcode.NumParams = numParams + funcode.NumKwonlyParams = f.NumKwonlyParams + funcode.HasVarargs = f.HasVarargs + funcode.HasKwargs = f.HasKwargs + fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) +} + +// ifelse emits a Boolean control flow decision. +// On return, the current block is unset. +func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { + switch cond := cond.(type) { + case *syntax.UnaryExpr: + if cond.Op == syntax.NOT { + // if not x then goto t else goto f + // => + // if x then goto f else goto t + fcomp.ifelse(cond.X, f, t) + return + } + + case *syntax.BinaryExpr: + switch cond.Op { + case syntax.AND: + // if x and y then goto t else goto f + // => + // if x then ifelse(y, t, f) else goto f + fcomp.expr(cond.X) + y := fcomp.newBlock() + fcomp.condjump(CJMP, y, f) + + fcomp.block = y + fcomp.ifelse(cond.Y, t, f) + return + + case syntax.OR: + // if x or y then goto t else goto f + // => + // if x then goto t else ifelse(y, t, f) + fcomp.expr(cond.X) + y := fcomp.newBlock() + fcomp.condjump(CJMP, t, y) + + fcomp.block = y + fcomp.ifelse(cond.Y, t, f) + return + case syntax.NOT_IN: + // if x not in y then goto t else goto f + // => + // if x in y then goto f else goto t + copy := *cond + copy.Op = syntax.IN + fcomp.expr(©) + fcomp.condjump(CJMP, f, t) + return + } + } + + // general case + fcomp.expr(cond) + fcomp.condjump(CJMP, t, f) +} diff --git a/internal/compile/compile_test.go b/internal/compile/compile_test.go new file mode 100644 index 0000000..2c9917a --- /dev/null +++ b/internal/compile/compile_test.go @@ -0,0 +1,74 @@ +package compile_test + +import ( + "bytes" + "strings" + "testing" + + "go.starlark.net/starlark" +) + +// TestSerialization verifies that a serialized program can be loaded, +// deserialized, and executed. +func TestSerialization(t *testing.T) { + predeclared := starlark.StringDict{ + "x": starlark.String("mur"), + "n": starlark.MakeInt(2), + } + const src = ` +def mul(a, b): + return a * b + +y = mul(x, n) +` + _, oldProg, err := starlark.SourceProgram("mul.star", src, predeclared.Has) + if err != nil { + t.Fatal(err) + } + + buf := new(bytes.Buffer) + if err := oldProg.Write(buf); err != nil { + t.Fatalf("oldProg.WriteTo: %v", err) + } + + newProg, err := starlark.CompiledProgram(buf) + if err != nil { + t.Fatalf("CompiledProgram: %v", err) + } + + thread := new(starlark.Thread) + globals, err := newProg.Init(thread, predeclared) + if err != nil { + t.Fatalf("newProg.Init: %v", err) + } + if got, want := globals["y"], starlark.String("murmur"); got != want { + t.Errorf("Value of global was %s, want %s", got, want) + t.Logf("globals: %v", globals) + } + + // Verify stack frame. + predeclared["n"] = starlark.None + _, err = newProg.Init(thread, predeclared) + evalErr, ok := err.(*starlark.EvalError) + if !ok { + t.Fatalf("newProg.Init call returned err %v, want *EvalError", err) + } + const want = `Traceback (most recent call last): + mul.star:5:8: in <toplevel> + mul.star:3:14: in mul +Error: unknown binary op: string * NoneType` + if got := evalErr.Backtrace(); got != want { + t.Fatalf("got <<%s>>, want <<%s>>", got, want) + } +} + +func TestGarbage(t *testing.T) { + const garbage = "This is not a compiled Starlark program." + _, err := starlark.CompiledProgram(strings.NewReader(garbage)) + if err == nil { + t.Fatalf("CompiledProgram did not report an error when decoding garbage") + } + if !strings.Contains(err.Error(), "not a compiled module") { + t.Fatalf("CompiledProgram reported the wrong error when decoding garbage: %v", err) + } +} diff --git a/internal/compile/serial.go b/internal/compile/serial.go new file mode 100644 index 0000000..adadabf --- /dev/null +++ b/internal/compile/serial.go @@ -0,0 +1,395 @@ +package compile + +// This file defines functions to read and write a compile.Program to a file. +// +// It is the client's responsibility to avoid version skew between the +// compiler used to produce a file and the interpreter that consumes it. +// The version number is provided as a constant. +// Incompatible protocol changes should also increment the version number. +// +// Encoding +// +// Program: +// "sky!" [4]byte # magic number +// str uint32le # offset of <strings> section +// version varint # must match Version +// filename string +// numloads varint +// loads []Ident +// numnames varint +// names []string +// numconsts varint +// consts []Constant +// numglobals varint +// globals []Ident +// toplevel Funcode +// numfuncs varint +// funcs []Funcode +// <strings> []byte # concatenation of all referenced strings +// EOF +// +// Funcode: +// id Ident +// code []byte +// pclinetablen varint +// pclinetab []varint +// numlocals varint +// locals []Ident +// numcells varint +// cells []int +// numfreevars varint +// freevar []Ident +// maxstack varint +// numparams varint +// numkwonlyparams varint +// hasvarargs varint (0 or 1) +// haskwargs varint (0 or 1) +// +// Ident: +// filename string +// line, col varint +// +// Constant: # type data +// type varint # 0=string string +// data ... # 1=bytes string +// # 2=int varint +// # 3=float varint (bits as uint64) +// # 4=bigint string (decimal ASCII text) +// +// The encoding starts with a four-byte magic number. +// The next four bytes are a little-endian uint32 +// that provides the offset of the string section +// at the end of the file, which contains the ordered +// concatenation of all strings referenced by the +// program. This design permits the decoder to read +// the first and second parts of the file into different +// memory allocations: the first (the encoded program) +// is transient, but the second (the strings) persists +// for the life of the Program. +// +// Within the encoded program, all strings are referred +// to by their length. As the encoder and decoder process +// the entire file sequentially, they are in lock step, +// so the start offset of each string is implicit. +// +// Program.Code is represented as a []byte slice to permit +// modification when breakpoints are set. All other strings +// are represented as strings. They all (unsafely) share the +// same backing byte slice. +// +// Aside from the str field, all integers are encoded as varints. + +import ( + "encoding/binary" + "fmt" + "math" + "math/big" + debugpkg "runtime/debug" + "unsafe" + + "go.starlark.net/syntax" +) + +const magic = "!sky" + +// Encode encodes a compiled Starlark program. +func (prog *Program) Encode() []byte { + var e encoder + e.p = append(e.p, magic...) + e.p = append(e.p, "????"...) // string data offset; filled in later + e.int(Version) + e.string(prog.Toplevel.Pos.Filename()) + e.bindings(prog.Loads) + e.int(len(prog.Names)) + for _, name := range prog.Names { + e.string(name) + } + e.int(len(prog.Constants)) + for _, c := range prog.Constants { + switch c := c.(type) { + case string: + e.int(0) + e.string(c) + case Bytes: + e.int(1) + e.string(string(c)) + case int64: + e.int(2) + e.int64(c) + case float64: + e.int(3) + e.uint64(math.Float64bits(c)) + case *big.Int: + e.int(4) + e.string(c.Text(10)) + } + } + e.bindings(prog.Globals) + e.function(prog.Toplevel) + e.int(len(prog.Functions)) + for _, fn := range prog.Functions { + e.function(fn) + } + + // Patch in the offset of the string data section. + binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p))) + + return append(e.p, e.s...) +} + +type encoder struct { + p []byte // encoded program + s []byte // strings + tmp [binary.MaxVarintLen64]byte +} + +func (e *encoder) int(x int) { + e.int64(int64(x)) +} + +func (e *encoder) int64(x int64) { + n := binary.PutVarint(e.tmp[:], x) + e.p = append(e.p, e.tmp[:n]...) +} + +func (e *encoder) uint64(x uint64) { + n := binary.PutUvarint(e.tmp[:], x) + e.p = append(e.p, e.tmp[:n]...) +} + +func (e *encoder) string(s string) { + e.int(len(s)) + e.s = append(e.s, s...) +} + +func (e *encoder) bytes(b []byte) { + e.int(len(b)) + e.s = append(e.s, b...) +} + +func (e *encoder) binding(bind Binding) { + e.string(bind.Name) + e.int(int(bind.Pos.Line)) + e.int(int(bind.Pos.Col)) +} + +func (e *encoder) bindings(binds []Binding) { + e.int(len(binds)) + for _, bind := range binds { + e.binding(bind) + } +} + +func (e *encoder) function(fn *Funcode) { + e.binding(Binding{fn.Name, fn.Pos}) + e.string(fn.Doc) + e.bytes(fn.Code) + e.int(len(fn.pclinetab)) + for _, x := range fn.pclinetab { + e.int64(int64(x)) + } + e.bindings(fn.Locals) + e.int(len(fn.Cells)) + for _, index := range fn.Cells { + e.int(index) + } + e.bindings(fn.Freevars) + e.int(fn.MaxStack) + e.int(fn.NumParams) + e.int(fn.NumKwonlyParams) + e.int(b2i(fn.HasVarargs)) + e.int(b2i(fn.HasKwargs)) +} + +func b2i(b bool) int { + if b { + return 1 + } else { + return 0 + } +} + +// DecodeProgram decodes a compiled Starlark program from data. +func DecodeProgram(data []byte) (_ *Program, err error) { + if len(data) < len(magic) { + return nil, fmt.Errorf("not a compiled module: no magic number") + } + if got := string(data[:4]); got != magic { + return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", + got, magic) + } + defer func() { + if x := recover(); x != nil { + debugpkg.PrintStack() + err = fmt.Errorf("internal error while decoding program: %v", x) + } + }() + + offset := binary.LittleEndian.Uint32(data[4:8]) + d := decoder{ + p: data[8:offset], + s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist + } + + if v := d.int(); v != Version { + return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version) + } + + filename := d.string() + d.filename = &filename + + loads := d.bindings() + + names := make([]string, d.int()) + for i := range names { + names[i] = d.string() + } + + // constants + constants := make([]interface{}, d.int()) + for i := range constants { + var c interface{} + switch d.int() { + case 0: + c = d.string() + case 1: + c = Bytes(d.string()) + case 2: + c = d.int64() + case 3: + c = math.Float64frombits(d.uint64()) + case 4: + c, _ = new(big.Int).SetString(d.string(), 10) + } + constants[i] = c + } + + globals := d.bindings() + toplevel := d.function() + funcs := make([]*Funcode, d.int()) + for i := range funcs { + funcs[i] = d.function() + } + + prog := &Program{ + Loads: loads, + Names: names, + Constants: constants, + Globals: globals, + Functions: funcs, + Toplevel: toplevel, + } + toplevel.Prog = prog + for _, f := range funcs { + f.Prog = prog + } + + if len(d.p)+len(d.s) > 0 { + return nil, fmt.Errorf("internal error: unconsumed data during decoding") + } + + return prog, nil +} + +type decoder struct { + p []byte // encoded program + s []byte // strings + filename *string // (indirect to avoid keeping decoder live) +} + +func (d *decoder) int() int { + return int(d.int64()) +} + +func (d *decoder) int64() int64 { + x, len := binary.Varint(d.p[:]) + d.p = d.p[len:] + return x +} + +func (d *decoder) uint64() uint64 { + x, len := binary.Uvarint(d.p[:]) + d.p = d.p[len:] + return x +} + +func (d *decoder) string() (s string) { + if slice := d.bytes(); len(slice) > 0 { + // Avoid a memory allocation for each string + // by unsafely aliasing slice. + type string struct { + data *byte + len int + } + ptr := (*string)(unsafe.Pointer(&s)) + ptr.data = &slice[0] + ptr.len = len(slice) + } + return s +} + +func (d *decoder) bytes() []byte { + len := d.int() + r := d.s[:len:len] + d.s = d.s[len:] + return r +} + +func (d *decoder) binding() Binding { + name := d.string() + line := int32(d.int()) + col := int32(d.int()) + return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)} +} + +func (d *decoder) bindings() []Binding { + bindings := make([]Binding, d.int()) + for i := range bindings { + bindings[i] = d.binding() + } + return bindings +} + +func (d *decoder) ints() []int { + ints := make([]int, d.int()) + for i := range ints { + ints[i] = d.int() + } + return ints +} + +func (d *decoder) bool() bool { return d.int() != 0 } + +func (d *decoder) function() *Funcode { + id := d.binding() + doc := d.string() + code := d.bytes() + pclinetab := make([]uint16, d.int()) + for i := range pclinetab { + pclinetab[i] = uint16(d.int()) + } + locals := d.bindings() + cells := d.ints() + freevars := d.bindings() + maxStack := d.int() + numParams := d.int() + numKwonlyParams := d.int() + hasVarargs := d.int() != 0 + hasKwargs := d.int() != 0 + return &Funcode{ + // Prog is filled in later. + Pos: id.Pos, + Name: id.Name, + Doc: doc, + Code: code, + pclinetab: pclinetab, + Locals: locals, + Cells: cells, + Freevars: freevars, + MaxStack: maxStack, + NumParams: numParams, + NumKwonlyParams: numKwonlyParams, + HasVarargs: hasVarargs, + HasKwargs: hasKwargs, + } +} diff --git a/internal/spell/spell.go b/internal/spell/spell.go new file mode 100644 index 0000000..7739fab --- /dev/null +++ b/internal/spell/spell.go @@ -0,0 +1,115 @@ +// Package spell file defines a simple spelling checker for use in attribute errors +// such as "no such field .foo; did you mean .food?". +package spell + +import ( + "strings" + "unicode" +) + +// Nearest returns the element of candidates +// nearest to x using the Levenshtein metric, +// or "" if none were promising. +func Nearest(x string, candidates []string) string { + // Ignore underscores and case when matching. + fold := func(s string) string { + return strings.Map(func(r rune) rune { + if r == '_' { + return -1 + } + return unicode.ToLower(r) + }, s) + } + + x = fold(x) + + var best string + bestD := (len(x) + 1) / 2 // allow up to 50% typos + for _, c := range candidates { + d := levenshtein(x, fold(c), bestD) + if d < bestD { + bestD = d + best = c + } + } + return best +} + +// levenshtein returns the non-negative Levenshtein edit distance +// between the byte strings x and y. +// +// If the computed distance exceeds max, +// the function may return early with an approximate value > max. +func levenshtein(x, y string, max int) int { + // This implementation is derived from one by Laurent Le Brun in + // Bazel that uses the single-row space efficiency trick + // described at bitbucket.org/clearer/iosifovich. + + // Let x be the shorter string. + if len(x) > len(y) { + x, y = y, x + } + + // Remove common prefix. + for i := 0; i < len(x); i++ { + if x[i] != y[i] { + x = x[i:] + y = y[i:] + break + } + } + if x == "" { + return len(y) + } + + if d := abs(len(x) - len(y)); d > max { + return d // excessive length divergence + } + + row := make([]int, len(y)+1) + for i := range row { + row[i] = i + } + + for i := 1; i <= len(x); i++ { + row[0] = i + best := i + prev := i - 1 + for j := 1; j <= len(y); j++ { + a := prev + b2i(x[i-1] != y[j-1]) // substitution + b := 1 + row[j-1] // deletion + c := 1 + row[j] // insertion + k := min(a, min(b, c)) + prev, row[j] = row[j], k + best = min(best, k) + } + if best > max { + return best + } + } + return row[len(y)] +} + +func b2i(b bool) int { + if b { + return 1 + } else { + return 0 + } +} + +func min(x, y int) int { + if x < y { + return x + } else { + return y + } +} + +func abs(x int) int { + if x >= 0 { + return x + } else { + return -x + } +} diff --git a/lib/proto/cmd/star2proto/star2proto.go b/lib/proto/cmd/star2proto/star2proto.go new file mode 100644 index 0000000..7911723 --- /dev/null +++ b/lib/proto/cmd/star2proto/star2proto.go @@ -0,0 +1,142 @@ +// The star2proto command executes a Starlark file and prints a protocol +// message, which it expects to find in a module-level variable named 'result'. +// +// THIS COMMAND IS EXPERIMENTAL AND ITS INTERFACE MAY CHANGE. +package main + +// TODO(adonovan): add features to make this a useful tool for querying, +// converting, and building messages in proto, JSON, and YAML. +// - define operations for reading and writing files. +// - support (e.g.) querying a proto file given a '-e expr' flag. +// This will need a convenient way to put the relevant descriptors in scope. + +import ( + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "strings" + + starlarkproto "go.starlark.net/lib/proto" + "go.starlark.net/resolve" + "go.starlark.net/starlark" + "go.starlark.net/starlarkjson" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/encoding/prototext" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protodesc" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/reflect/protoregistry" + "google.golang.org/protobuf/types/descriptorpb" +) + +// flags +var ( + outputFlag = flag.String("output", "text", "output format (text, wire, json)") + varFlag = flag.String("var", "result", "the variable to output") + descriptors = flag.String("descriptors", "", "comma-separated list of names of files containing proto.FileDescriptorProto messages") +) + +// Starlark dialect flags +func init() { + flag.BoolVar(&resolve.AllowFloat, "fp", true, "allow floating-point numbers") + flag.BoolVar(&resolve.AllowSet, "set", resolve.AllowSet, "allow set data type") + flag.BoolVar(&resolve.AllowLambda, "lambda", resolve.AllowLambda, "allow lambda expressions") + flag.BoolVar(&resolve.AllowNestedDef, "nesteddef", resolve.AllowNestedDef, "allow nested def statements") +} + +func main() { + log.SetPrefix("star2proto: ") + log.SetFlags(0) + flag.Parse() + if len(flag.Args()) != 1 { + fatalf("requires a single Starlark file name") + } + filename := flag.Args()[0] + + // By default, use the linked-in descriptors + // (very few in star2proto, e.g. descriptorpb itself). + pool := protoregistry.GlobalFiles + + // Load a user-provided FileDescriptorSet produced by a command such as: + // $ protoc --descriptor_set_out=foo.fds foo.proto + if *descriptors != "" { + var fdset descriptorpb.FileDescriptorSet + for i, filename := range strings.Split(*descriptors, ",") { + data, err := ioutil.ReadFile(filename) + if err != nil { + log.Fatalf("--descriptors[%d]: %s", i, err) + } + // Accumulate into the repeated field of FileDescriptors. + if err := (proto.UnmarshalOptions{Merge: true}).Unmarshal(data, &fdset); err != nil { + log.Fatalf("%s does not contain a proto2.FileDescriptorSet: %v", filename, err) + } + } + + files, err := protodesc.NewFiles(&fdset) + if err != nil { + log.Fatalf("protodesc.NewFiles: could not build FileDescriptor index: %v", err) + } + pool = files + } + + // Execute the Starlark file. + thread := &starlark.Thread{ + Print: func(_ *starlark.Thread, msg string) { fmt.Println(msg) }, + } + starlarkproto.SetPool(thread, pool) + predeclared := starlark.StringDict{ + "proto": starlarkproto.Module, + "json": starlarkjson.Module, + } + globals, err := starlark.ExecFile(thread, filename, nil, predeclared) + if err != nil { + if evalErr, ok := err.(*starlark.EvalError); ok { + fatalf("%s", evalErr.Backtrace()) + } else { + fatalf("%s", err) + } + } + + // Print the output variable as a message. + // TODO(adonovan): this is clumsy. + // Let the user call print(), or provide an expression on the command line. + result, ok := globals[*varFlag] + if !ok { + fatalf("%s must define a module-level variable named %q", filename, *varFlag) + } + msgwrap, ok := result.(*starlarkproto.Message) + if !ok { + fatalf("got %s, want proto.Message, for %q", result.Type(), *varFlag) + } + msg := msgwrap.Message() + + // -output + var marshal func(protoreflect.ProtoMessage) ([]byte, error) + switch *outputFlag { + case "wire": + marshal = proto.Marshal + + case "text": + marshal = prototext.MarshalOptions{Multiline: true, Indent: "\t"}.Marshal + + case "json": + marshal = protojson.MarshalOptions{Multiline: true, Indent: "\t"}.Marshal + + default: + fatalf("unsupported -output format: %s", *outputFlag) + } + data, err := marshal(msg) + if err != nil { + fatalf("%s", err) + } + os.Stdout.Write(data) +} + +func fatalf(format string, args ...interface{}) { + fmt.Fprintf(os.Stderr, "star2proto: ") + fmt.Fprintf(os.Stderr, format, args...) + fmt.Fprintln(os.Stderr) + os.Exit(1) +} diff --git a/lib/proto/proto.go b/lib/proto/proto.go new file mode 100644 index 0000000..149162d --- /dev/null +++ b/lib/proto/proto.go @@ -0,0 +1,1232 @@ +// Copyright 2020 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package proto defines a module of utilities for constructing and +// accessing protocol messages within Starlark programs. +// +// THIS PACKAGE IS EXPERIMENTAL AND ITS INTERFACE MAY CHANGE. +// +// This package defines several types of Starlark value: +// +// Message -- a protocol message +// RepeatedField -- a repeated field of a message, like a list +// +// FileDescriptor -- information about a .proto file +// FieldDescriptor -- information about a message field (or extension field) +// MessageDescriptor -- information about the type of a message +// EnumDescriptor -- information about an enumerated type +// EnumValueDescriptor -- a value of an enumerated type +// +// A Message value is a wrapper around a protocol message instance. +// Starlark programs may access and update Messages using dot notation: +// +// x = msg.field +// msg.field = x + 1 +// msg.field += 1 +// +// Assignments to message fields perform dynamic checks on the type and +// range of the value to ensure that the message is at all times valid. +// +// The value of a repeated field of a message is represented by the +// list-like data type, RepeatedField. Its elements may be accessed, +// iterated, and updated in the usual ways. As with assignments to +// message fields, an assignment to an element of a RepeatedField +// performs a dynamic check to ensure that the RepeatedField holds +// only elements of the correct type. +// +// type(msg.uint32s) # "proto.repeated<uint32>" +// msg.uint32s[0] = 1 +// msg.uint32s[0] = -1 # error: invalid uint32: -1 +// +// Any iterable may be assigned to a repeated field of a message. If +// the iterable is itself a value of type RepeatedField, the message +// field holds a reference to it. +// +// msg2.uint32s = msg.uint32s # both messages share one RepeatedField +// msg.uint32s[0] = 123 +// print(msg2.uint32s[0]) # "123" +// +// The RepeatedFields' element types must match. +// It is not enough for the values to be merely valid: +// +// msg.uint32s = [1, 2, 3] # makes a copy +// msg.uint64s = msg.uint32s # error: repeated field has wrong type +// msg.uint64s = list(msg.uint32s) # ok; makes a copy +// +// For all other iterables, a new RepeatedField is constructed from the +// elements of the iterable. +// +// msg.uints32s = [1, 2, 3] +// print(type(msg.uints32s)) # "proto.repeated<uint32>" +// +// +// To construct a Message from encoded binary or text data, call +// Unmarshal or UnmarshalText. These two functions are exposed to +// Starlark programs as proto.unmarshal{,_text}. +// +// To construct a Message from an existing Go proto.Message instance, +// you must first encode the Go message to binary, then decode it using +// Unmarshal. This ensures that messages visible to Starlark are +// encapsulated and cannot be mutated once their Starlark wrapper values +// are frozen. +// +// TODO(adonovan): document descriptors, enums, message instantiation. +// +// See proto_test.go for an example of how to use the 'proto' +// module in an application that embeds Starlark. +// +package proto + +// TODO(adonovan): Go and Starlark API improvements: +// - Make Message and RepeatedField comparable. +// (NOTE: proto.Equal works only with generated message types.) +// - Support maps, oneof, any. But not messageset if we can avoid it. +// - Support "well-known types". +// - Defend against cycles in object graph. +// - Test missing required fields in marshalling. + +import ( + "bytes" + "fmt" + "sort" + "strings" + "unsafe" + _ "unsafe" // for linkname hack + + "google.golang.org/protobuf/encoding/prototext" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/reflect/protoregistry" + "google.golang.org/protobuf/types/dynamicpb" + + "go.starlark.net/starlark" + "go.starlark.net/starlarkstruct" + "go.starlark.net/syntax" +) + +// SetPool associates with the specified Starlark thread the +// descriptor pool used to find descriptors for .proto files and to +// instantiate messages from descriptors. Clients must call SetPool +// for a Starlark thread to use this package. +// +// For example: +// SetPool(thread, protoregistry.GlobalFiles) +// +func SetPool(thread *starlark.Thread, pool DescriptorPool) { + thread.SetLocal(contextKey, pool) +} + +// Pool returns the descriptor pool previously associated with this thread. +func Pool(thread *starlark.Thread) DescriptorPool { + pool, _ := thread.Local(contextKey).(DescriptorPool) + return pool +} + +const contextKey = "proto.DescriptorPool" + +// A DescriptorPool loads FileDescriptors by path name or package name, +// possibly on demand. +// +// It is a superinterface of protodesc.Resolver, so any Resolver +// implementation is a valid pool. For example. +// protoregistry.GlobalFiles, which loads FileDescriptors from the +// compressed binary information in all the *.pb.go files linked into +// the process; and protodesc.NewFiles, which holds a set of +// FileDescriptorSet messages. See star2proto for example usage. +type DescriptorPool interface { + FindFileByPath(string) (protoreflect.FileDescriptor, error) +} + +var Module = &starlarkstruct.Module{ + Name: "proto", + Members: starlark.StringDict{ + "file": starlark.NewBuiltin("proto.file", file), + "has": starlark.NewBuiltin("proto.has", has), + "marshal": starlark.NewBuiltin("proto.marshal", marshal), + "marshal_text": starlark.NewBuiltin("proto.marshal_text", marshal), + "set_field": starlark.NewBuiltin("proto.set_field", setFieldStarlark), + "get_field": starlark.NewBuiltin("proto.get_field", getFieldStarlark), + "unmarshal": starlark.NewBuiltin("proto.unmarshal", unmarshal), + "unmarshal_text": starlark.NewBuiltin("proto.unmarshal_text", unmarshal_text), + + // TODO(adonovan): + // - merge(msg, msg) -> msg + // - equals(msg, msg) -> bool + // - diff(msg, msg) -> string + // - clone(msg) -> msg + }, +} + +// file(filename) loads the FileDescriptor of the given name, or the +// first if the pool contains more than one. +// +// It's unfortunate that renaming a .proto file in effect breaks the +// interface it presents to Starlark. Ideally one would import +// descriptors by package name, but there may be many FileDescriptors +// for the same package name, and there is no "package descriptor". +// (Technically a pool may also have many FileDescriptors with the same +// file name, but this can't happen with a single consistent snapshot.) +func file(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var filename string + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 1, &filename); err != nil { + return nil, err + } + + pool := Pool(thread) + if pool == nil { + return nil, fmt.Errorf("internal error: SetPool was not called") + } + + desc, err := pool.FindFileByPath(filename) + if err != nil { + return nil, err + } + + return FileDescriptor{Desc: desc}, nil +} + +// has(msg, field) reports whether the specified field of the message is present. +// A field may be specified by name (string) or FieldDescriptor. +// has reports an error if the message type has no such field. +func has(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var x, field starlark.Value + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &x, &field); err != nil { + return nil, err + } + msg, ok := x.(*Message) + if !ok { + return nil, fmt.Errorf("%s: got %s, want proto.Message", fn.Name(), x.Type()) + } + + var fdesc protoreflect.FieldDescriptor + switch field := field.(type) { + case starlark.String: + var err error + fdesc, err = fieldDesc(msg.desc(), string(field)) + if err != nil { + return nil, err + } + + case FieldDescriptor: + if field.Desc.ContainingMessage() != msg.desc() { + return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), msg.desc().FullName(), field) + } + fdesc = field.Desc + + default: + return nil, fmt.Errorf("%s: for field argument, got %s, want string or proto.FieldDescriptor", fn.Name(), field.Type()) + } + + return starlark.Bool(msg.msg.Has(fdesc)), nil +} + +// marshal{,_text}(msg) encodes a Message value to binary or text form. +func marshal(_ *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var m *Message + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 1, &m); err != nil { + return nil, err + } + if fn.Name() == "proto.marshal" { + data, err := proto.Marshal(m.Message()) + if err != nil { + return nil, fmt.Errorf("%s: %v", fn.Name(), err) + } + return starlark.Bytes(data), nil + } else { + text, err := prototext.MarshalOptions{Indent: " "}.Marshal(m.Message()) + if err != nil { + return nil, fmt.Errorf("%s: %v", fn.Name(), err) + } + return starlark.String(text), nil + } +} + +// unmarshal(msg) decodes a binary protocol message to a Message. +func unmarshal(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var desc MessageDescriptor + var data starlark.Bytes + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil { + return nil, err + } + return unmarshalData(desc.Desc, []byte(data), true) +} + +// unmarshal_text(msg) decodes a text protocol message to a Message. +func unmarshal_text(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var desc MessageDescriptor + var data string + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil { + return nil, err + } + return unmarshalData(desc.Desc, []byte(data), false) +} + +// set_field(msg, field, value) updates the value of a field. +// It is typically used for extensions, which cannot be updated using msg.field = v notation. +func setFieldStarlark(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + // TODO(adonovan): allow field to be specified by name (for non-extension fields), like has? + var m *Message + var field FieldDescriptor + var v starlark.Value + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 3, &m, &field, &v); err != nil { + return nil, err + } + + if *m.frozen { + return nil, fmt.Errorf("%s: cannot set %v field of frozen %v message", fn.Name(), field, m.desc().FullName()) + } + + if field.Desc.ContainingMessage() != m.desc() { + return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), m.desc().FullName(), field) + } + + return starlark.None, setField(m.msg, field.Desc, v) +} + +// get_field(msg, field) retrieves the value of a field. +// It is typically used for extension fields, which cannot be accessed using msg.field notation. +func getFieldStarlark(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + // TODO(adonovan): allow field to be specified by name (for non-extension fields), like has? + var msg *Message + var field FieldDescriptor + if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &msg, &field); err != nil { + return nil, err + } + + if field.Desc.ContainingMessage() != msg.desc() { + return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), msg.desc().FullName(), field) + } + + return msg.getField(field.Desc), nil +} + +// The Call method implements the starlark.Callable interface. +// When a message descriptor is called, it returns a new instance of the +// protocol message it describes. +// +// Message(msg) -- return a shallow copy of an existing message +// Message(k=v, ...) -- return a new message with the specified fields +// Message(dict(...)) -- return a new message with the specified fields +// +func (d MessageDescriptor) CallInternal(thread *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + dest := &Message{ + msg: newMessage(d.Desc), + frozen: new(bool), + } + + // Single positional argument? + if len(args) > 0 { + if len(kwargs) > 0 { + return nil, fmt.Errorf("%s: got both positional and named arguments", d.Desc.Name()) + } + if len(args) > 1 { + return nil, fmt.Errorf("%s: got %d positional arguments, want at most 1", d.Desc.Name(), len(args)) + } + + // Keep consistent with MessageKind case of toProto. + // (support the same argument types). + switch src := args[0].(type) { + case *Message: + if dest.desc() != src.desc() { + return nil, fmt.Errorf("%s: got message of type %s, want type %s", d.Desc.Name(), src.desc().FullName(), dest.desc().FullName()) + } + + // Make shallow copy of message. + // TODO(adonovan): How does frozen work if we have shallow copy? + src.msg.Range(func(fdesc protoreflect.FieldDescriptor, v protoreflect.Value) bool { + dest.msg.Set(fdesc, v) + return true + }) + return dest, nil + + case *starlark.Dict: + kwargs = src.Items() + // fall through + + default: + return nil, fmt.Errorf("%s: got %s, want dict or message", d.Desc.Name(), src.Type()) + } + } + + // Convert named arguments to field values. + err := setFields(dest.msg, kwargs) + return dest, err +} + +// setFields updates msg as if by msg.name=value for each (name, value) in items. +func setFields(msg protoreflect.Message, items []starlark.Tuple) error { + for _, item := range items { + name, ok := starlark.AsString(item[0]) + if !ok { + return fmt.Errorf("got %s, want string", item[0].Type()) + } + fdesc, err := fieldDesc(msg.Descriptor(), name) + if err != nil { + return err + } + if err := setField(msg, fdesc, item[1]); err != nil { + return err + } + } + return nil +} + +// setField validates a Starlark field value, converts it to canonical form, +// and assigns to the field of msg. If value is None, the field is unset. +func setField(msg protoreflect.Message, fdesc protoreflect.FieldDescriptor, value starlark.Value) error { + // None unsets a field. + if value == starlark.None { + msg.Clear(fdesc) + return nil + } + + // Assigning to a repeated field must make a copy, + // because the fields.Set doesn't specify whether + // it aliases the list or not, so we cannot assume. + // + // This is potentially surprising as + // x = []; msg.x = x; y = msg.x + // causes x and y not to alias. + if fdesc.IsList() { + iter := starlark.Iterate(value) + if iter == nil { + return fmt.Errorf("got %s for .%s field, want iterable", value.Type(), fdesc.Name()) + } + defer iter.Done() + + // TODO(adonovan): handle maps + list := msg.Mutable(fdesc).List() + var x starlark.Value + for i := 0; iter.Next(&x); i++ { + v, err := toProto(fdesc, x) + if err != nil { + return fmt.Errorf("index %d: %v", i, err) + } + list.Append(v) + } + return nil + } + + v, err := toProto(fdesc, value) + if err != nil { + return fmt.Errorf("in field %s: %v", fdesc.Name(), err) + } + + if fdesc.IsExtension() { + // The protoreflect.Message.NewField method must be able + // to return a new instance of the field type. Without + // having the Go type information available for extensions, + // the implementation of NewField won't know what to do. + // + // Thus we must augment the FieldDescriptor to one that + // additional holds Go representation type information + // (based in this case on dynamicpb). + fdesc = dynamicpb.NewExtensionType(fdesc).TypeDescriptor() + _ = fdesc.(protoreflect.ExtensionTypeDescriptor) + } + + msg.Set(fdesc, v) + return nil +} + +// toProto converts a Starlark value for a message field into protoreflect form. +func toProto(fdesc protoreflect.FieldDescriptor, v starlark.Value) (protoreflect.Value, error) { + switch fdesc.Kind() { + case protoreflect.BoolKind: + // To avoid mistakes, we require v be exactly a bool. + if v, ok := v.(starlark.Bool); ok { + return protoreflect.ValueOfBool(bool(v)), nil + } + + case protoreflect.Fixed32Kind, + protoreflect.Uint32Kind: + // uint32 + if i, ok := v.(starlark.Int); ok { + if u, ok := i.Uint64(); ok && uint64(uint32(u)) == u { + return protoreflect.ValueOfUint32(uint32(u)), nil + } + return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i) + } + + case protoreflect.Int32Kind, + protoreflect.Sfixed32Kind, + protoreflect.Sint32Kind: + // int32 + if i, ok := v.(starlark.Int); ok { + if i, ok := i.Int64(); ok && int64(int32(i)) == i { + return protoreflect.ValueOfInt32(int32(i)), nil + } + return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i) + } + + case protoreflect.Uint64Kind, + protoreflect.Fixed64Kind: + // uint64 + if i, ok := v.(starlark.Int); ok { + if u, ok := i.Uint64(); ok { + return protoreflect.ValueOfUint64(u), nil + } + return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i) + } + + case protoreflect.Int64Kind, + protoreflect.Sfixed64Kind, + protoreflect.Sint64Kind: + // int64 + if i, ok := v.(starlark.Int); ok { + if i, ok := i.Int64(); ok { + return protoreflect.ValueOfInt64(i), nil + } + return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i) + } + + case protoreflect.StringKind: + if s, ok := starlark.AsString(v); ok { + return protoreflect.ValueOfString(s), nil + } else if b, ok := v.(starlark.Bytes); ok { + // TODO(adonovan): allow bytes for string? Not friendly to a Java port. + return protoreflect.ValueOfBytes([]byte(b)), nil + } + + case protoreflect.BytesKind: + if s, ok := starlark.AsString(v); ok { + // TODO(adonovan): don't allow string for bytes: it's hostile to a Java port. + // Instead provide b"..." literals in the core + // and a bytes(str) conversion. + return protoreflect.ValueOfBytes([]byte(s)), nil + } else if b, ok := v.(starlark.Bytes); ok { + return protoreflect.ValueOfBytes([]byte(b)), nil + } + + case protoreflect.DoubleKind: + switch v := v.(type) { + case starlark.Float: + return protoreflect.ValueOfFloat64(float64(v)), nil + case starlark.Int: + return protoreflect.ValueOfFloat64(float64(v.Float())), nil + } + + case protoreflect.FloatKind: + switch v := v.(type) { + case starlark.Float: + return protoreflect.ValueOfFloat32(float32(v)), nil + case starlark.Int: + return protoreflect.ValueOfFloat32(float32(v.Float())), nil + } + + case protoreflect.GroupKind, + protoreflect.MessageKind: + // Keep consistent with MessageDescriptor.CallInternal! + desc := fdesc.Message() + switch v := v.(type) { + case *Message: + if desc != v.desc() { + return noValue, fmt.Errorf("got %s, want %s", v.desc().FullName(), desc.FullName()) + } + return protoreflect.ValueOfMessage(v.msg), nil // alias it directly + + case *starlark.Dict: + dest := newMessage(desc) + err := setFields(dest, v.Items()) + return protoreflect.ValueOfMessage(dest), err + } + + case protoreflect.EnumKind: + enumval, err := enumValueOf(fdesc.Enum(), v) + if err != nil { + return noValue, err + } + return protoreflect.ValueOfEnum(enumval.Number()), nil + } + + return noValue, fmt.Errorf("got %s, want %s", v.Type(), typeString(fdesc)) +} + +var noValue protoreflect.Value + +// toStarlark returns a Starlark value for the value x of a message field. +// If the result is a repeated field or message, +// the result aliases the original and has the specified "frozenness" flag. +// +// fdesc is only used for the type, not other properties of the field. +func toStarlark(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen *bool) starlark.Value { + if list, ok := x.Interface().(protoreflect.List); ok { + return &RepeatedField{ + typ: typ, + list: list, + frozen: frozen, + } + } + return toStarlark1(typ, x, frozen) +} + +// toStarlark1, for scalar (non-repeated) values only. +func toStarlark1(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen *bool) starlark.Value { + + switch typ.Kind() { + case protoreflect.BoolKind: + return starlark.Bool(x.Bool()) + + case protoreflect.Fixed32Kind, + protoreflect.Uint32Kind, + protoreflect.Uint64Kind, + protoreflect.Fixed64Kind: + return starlark.MakeUint64(x.Uint()) + + case protoreflect.Int32Kind, + protoreflect.Sfixed32Kind, + protoreflect.Sint32Kind, + protoreflect.Int64Kind, + protoreflect.Sfixed64Kind, + protoreflect.Sint64Kind: + return starlark.MakeInt64(x.Int()) + + case protoreflect.StringKind: + return starlark.String(x.String()) + + case protoreflect.BytesKind: + return starlark.Bytes(x.Bytes()) + + case protoreflect.DoubleKind, protoreflect.FloatKind: + return starlark.Float(x.Float()) + + case protoreflect.GroupKind, protoreflect.MessageKind: + return &Message{ + msg: x.Message(), + frozen: frozen, + } + + case protoreflect.EnumKind: + // Invariant: only EnumValueDescriptor may appear here. + enumval := typ.Enum().Values().ByNumber(x.Enum()) + return EnumValueDescriptor{Desc: enumval} + } + + panic(fmt.Sprintf("got %T, want %s", x, typeString(typ))) +} + +// A Message is a Starlark value that wraps a protocol message. +// +// Two Messages are equivalent if and only if they are identical. +// +// When a Message value becomes frozen, a Starlark program may +// not modify the underlying protocol message, nor any Message +// or RepeatedField wrapper values derived from it. +type Message struct { + msg protoreflect.Message // any concrete type is allowed + frozen *bool // shared by a group of related Message/RepeatedField wrappers +} + +// Message returns the wrapped message. +func (m *Message) Message() protoreflect.ProtoMessage { return m.msg.Interface() } + +func (m *Message) desc() protoreflect.MessageDescriptor { return m.msg.Descriptor() } + +var _ starlark.HasSetField = (*Message)(nil) + +// Unmarshal parses the data as a binary protocol message of the specified type, +// and returns it as a new Starlark message value. +func Unmarshal(desc protoreflect.MessageDescriptor, data []byte) (*Message, error) { + return unmarshalData(desc, data, true) +} + +// UnmarshalText parses the data as a text protocol message of the specified type, +// and returns it as a new Starlark message value. +func UnmarshalText(desc protoreflect.MessageDescriptor, data []byte) (*Message, error) { + return unmarshalData(desc, data, false) +} + +// unmarshalData constructs a Starlark proto.Message by decoding binary or text data. +func unmarshalData(desc protoreflect.MessageDescriptor, data []byte, binary bool) (*Message, error) { + m := &Message{ + msg: newMessage(desc), + frozen: new(bool), + } + var err error + if binary { + err = proto.Unmarshal(data, m.Message()) + } else { + err = prototext.Unmarshal(data, m.Message()) + } + if err != nil { + return nil, fmt.Errorf("unmarshalling %s failed: %v", desc.FullName(), err) + } + return m, nil +} + +func (m *Message) String() string { + buf := new(bytes.Buffer) + buf.WriteString(string(m.desc().FullName())) + buf.WriteByte('(') + + // Sort fields (including extensions) by number. + var fields []protoreflect.FieldDescriptor + m.msg.Range(func(fdesc protoreflect.FieldDescriptor, v protoreflect.Value) bool { + // TODO(adonovan): opt: save v in table too. + fields = append(fields, fdesc) + return true + }) + sort.Slice(fields, func(i, j int) bool { + return fields[i].Number() < fields[j].Number() + }) + + for i, fdesc := range fields { + if i > 0 { + buf.WriteString(", ") + } + if fdesc.IsExtension() { + // extension field: "[pkg.Msg.field]" + buf.WriteString(string(fdesc.FullName())) + } else if fdesc.Kind() != protoreflect.GroupKind { + // ordinary field: "field" + buf.WriteString(string(fdesc.Name())) + } else { + // group field: "MyGroup" + // + // The name of a group is the mangled version, + // while the true name of a group is the message itself. + // For example, for a group called "MyGroup", + // the inlined message will be called "MyGroup", + // but the field will be named "mygroup". + // This rule complicates name logic everywhere. + buf.WriteString(string(fdesc.Message().Name())) + } + buf.WriteString("=") + writeString(buf, fdesc, m.msg.Get(fdesc)) + } + buf.WriteByte(')') + return buf.String() +} + +func (m *Message) Type() string { return "proto.Message" } +func (m *Message) Truth() starlark.Bool { return true } +func (m *Message) Freeze() { *m.frozen = true } +func (m *Message) Hash() (h uint32, err error) { return uint32(uintptr(unsafe.Pointer(m))), nil } // identity hash + +// Attr returns the value of this message's field of the specified name. +// Extension fields are not accessible this way as their names are not unique. +func (m *Message) Attr(name string) (starlark.Value, error) { + // The name 'descriptor' is already effectively reserved + // by the Go API for generated message types. + if name == "descriptor" { + return MessageDescriptor{Desc: m.desc()}, nil + } + + fdesc, err := fieldDesc(m.desc(), name) + if err != nil { + return nil, err + } + return m.getField(fdesc), nil +} + +func (m *Message) getField(fdesc protoreflect.FieldDescriptor) starlark.Value { + if fdesc.IsExtension() { + // See explanation in setField. + fdesc = dynamicpb.NewExtensionType(fdesc).TypeDescriptor() + } + + if m.msg.Has(fdesc) { + return toStarlark(fdesc, m.msg.Get(fdesc), m.frozen) + } + return defaultValue(fdesc) +} + +//go:linkname detrandDisable google.golang.org/protobuf/internal/detrand.Disable +func detrandDisable() + +func init() { + // Nasty hack to disable the randomization of output that occurs in textproto. + // TODO(adonovan): once go/proto-proposals/canonical-serialization + // is resolved the need for the hack should go away. See also go/go-proto-stability. + // If the proposal is rejected, we will need our own text-mode formatter. + detrandDisable() +} + +// defaultValue returns the (frozen) default Starlark value for a given message field. +func defaultValue(fdesc protoreflect.FieldDescriptor) starlark.Value { + frozen := true + + // The default value of a repeated field is an empty list. + if fdesc.IsList() { + return &RepeatedField{typ: fdesc, list: emptyList{}, frozen: &frozen} + } + + // The zero value for a message type is an empty instance of that message. + if desc := fdesc.Message(); desc != nil { + return &Message{msg: newMessage(desc), frozen: &frozen} + } + + // Convert the default value, which is not necessarily zero, to Starlark. + // The frozenness isn't used as the remaining types are all immutable. + return toStarlark1(fdesc, fdesc.Default(), &frozen) +} + +// A frozen empty implementation of protoreflect.List. +type emptyList struct{ protoreflect.List } + +func (emptyList) Len() int { return 0 } + +// newMessage returns a new empty instance of the message type described by desc. +func newMessage(desc protoreflect.MessageDescriptor) protoreflect.Message { + // If desc refers to a built-in message, + // use the more efficient generated type descriptor (a Go struct). + mt, err := protoregistry.GlobalTypes.FindMessageByName(desc.FullName()) + if err == nil && mt.Descriptor() == desc { + return mt.New() + } + + // For all others, use the generic dynamicpb representation. + return dynamicpb.NewMessage(desc).ProtoReflect() +} + +// fieldDesc returns the descriptor for the named non-extension field. +func fieldDesc(desc protoreflect.MessageDescriptor, name string) (protoreflect.FieldDescriptor, error) { + if fdesc := desc.Fields().ByName(protoreflect.Name(name)); fdesc != nil { + return fdesc, nil + } + return nil, starlark.NoSuchAttrError(fmt.Sprintf("%s has no .%s field", desc.FullName(), name)) +} + +// SetField updates a non-extension field of this message. +// It implements the HasSetField interface. +func (m *Message) SetField(name string, v starlark.Value) error { + fdesc, err := fieldDesc(m.desc(), name) + if err != nil { + return err + } + if *m.frozen { + return fmt.Errorf("cannot set .%s field of frozen %s message", + name, m.desc().FullName()) + } + return setField(m.msg, fdesc, v) +} + +// AttrNames returns the set of field names defined for this message. +// It satisfies the starlark.HasAttrs interface. +func (m *Message) AttrNames() []string { + seen := make(map[string]bool) + + // standard fields + seen["descriptor"] = true + + // non-extension fields + fields := m.desc().Fields() + for i := 0; i < fields.Len(); i++ { + fdesc := fields.Get(i) + if !fdesc.IsExtension() { + seen[string(fdesc.Name())] = true + } + } + + names := make([]string, 0, len(seen)) + for name := range seen { + names = append(names, name) + } + sort.Strings(names) + return names +} + +// typeString returns a user-friendly description of the type of a +// protocol message field (or element of a repeated field). +func typeString(fdesc protoreflect.FieldDescriptor) string { + switch fdesc.Kind() { + case protoreflect.GroupKind, + protoreflect.MessageKind: + return string(fdesc.Message().FullName()) + + case protoreflect.EnumKind: + return string(fdesc.Enum().FullName()) + + default: + return strings.ToLower(strings.TrimPrefix(fdesc.Kind().String(), "TYPE_")) + } +} + +// A RepeatedField is a Starlark value that wraps a repeated field of a protocol message. +// +// An assignment to an element of a repeated field incurs a dynamic +// check that the new value has (or can be converted to) the correct +// type using conversions similar to those done when calling a +// MessageDescriptor to construct a message. +// +// TODO(adonovan): make RepeatedField implement starlark.Comparable. +// Should the comparison include type, or be defined on the elements alone? +type RepeatedField struct { + typ protoreflect.FieldDescriptor // only for type information, not field name + list protoreflect.List + frozen *bool + itercount int +} + +var _ starlark.HasSetIndex = (*RepeatedField)(nil) + +func (rf *RepeatedField) Type() string { + return fmt.Sprintf("proto.repeated<%s>", typeString(rf.typ)) +} + +func (rf *RepeatedField) SetIndex(i int, v starlark.Value) error { + if *rf.frozen { + return fmt.Errorf("cannot insert value in frozen repeated field") + } + if rf.itercount > 0 { + return fmt.Errorf("cannot insert value in repeated field with active iterators") + } + x, err := toProto(rf.typ, v) + if err != nil { + // The repeated field value cannot know which field it + // belongs to---it might be shared by several of the + // same type---so the error message is suboptimal. + return fmt.Errorf("setting element of repeated field: %v", err) + } + rf.list.Set(i, x) + return nil +} + +func (rf *RepeatedField) Freeze() { *rf.frozen = true } +func (rf *RepeatedField) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", rf.Type()) } +func (rf *RepeatedField) Index(i int) starlark.Value { + return toStarlark1(rf.typ, rf.list.Get(i), rf.frozen) +} +func (rf *RepeatedField) Iterate() starlark.Iterator { + if !*rf.frozen { + rf.itercount++ + } + return &repeatedFieldIterator{rf, 0} +} +func (rf *RepeatedField) Len() int { return rf.list.Len() } +func (rf *RepeatedField) String() string { + // We use list [...] notation even though it not exactly a list. + buf := new(bytes.Buffer) + buf.WriteByte('[') + for i := 0; i < rf.list.Len(); i++ { + if i > 0 { + buf.WriteString(", ") + } + writeString(buf, rf.typ, rf.list.Get(i)) + } + buf.WriteByte(']') + return buf.String() +} +func (rf *RepeatedField) Truth() starlark.Bool { return rf.list.Len() > 0 } + +type repeatedFieldIterator struct { + rf *RepeatedField + i int +} + +func (it *repeatedFieldIterator) Next(p *starlark.Value) bool { + if it.i < it.rf.Len() { + *p = it.rf.Index(it.i) + it.i++ + return true + } + return false +} + +func (it *repeatedFieldIterator) Done() { + if !*it.rf.frozen { + it.rf.itercount-- + } +} + +func writeString(buf *bytes.Buffer, fdesc protoreflect.FieldDescriptor, v protoreflect.Value) { + // TODO(adonovan): opt: don't materialize the Starlark value. + // TODO(adonovan): skip message type when printing submessages? {...}? + var frozen bool // ignored + x := toStarlark(fdesc, v, &frozen) + buf.WriteString(x.String()) +} + +// -------- descriptor values -------- + +// A FileDescriptor is an immutable Starlark value that describes a +// .proto file. It is a reference to a protoreflect.FileDescriptor. +// Two FileDescriptor values compare equal if and only if they refer to +// the same protoreflect.FileDescriptor. +// +// Its fields are the names of the message types (MessageDescriptor) and enum +// types (EnumDescriptor). +type FileDescriptor struct { + Desc protoreflect.FileDescriptor // TODO(adonovan): hide field, expose method? +} + +var _ starlark.HasAttrs = FileDescriptor{} + +func (f FileDescriptor) String() string { return string(f.Desc.Path()) } +func (f FileDescriptor) Type() string { return "proto.FileDescriptor" } +func (f FileDescriptor) Truth() starlark.Bool { return true } +func (f FileDescriptor) Freeze() {} // immutable +func (f FileDescriptor) Hash() (h uint32, err error) { return starlark.String(f.Desc.Path()).Hash() } +func (f FileDescriptor) Attr(name string) (starlark.Value, error) { + if desc := f.Desc.Messages().ByName(protoreflect.Name(name)); desc != nil { + return MessageDescriptor{Desc: desc}, nil + } + if desc := f.Desc.Extensions().ByName(protoreflect.Name(name)); desc != nil { + return FieldDescriptor{desc}, nil + } + if enum := f.Desc.Enums().ByName(protoreflect.Name(name)); enum != nil { + return EnumDescriptor{Desc: enum}, nil + } + return nil, nil +} +func (f FileDescriptor) AttrNames() []string { + var names []string + messages := f.Desc.Messages() + for i, n := 0, messages.Len(); i < n; i++ { + names = append(names, string(messages.Get(i).Name())) + } + extensions := f.Desc.Extensions() + for i, n := 0, extensions.Len(); i < n; i++ { + names = append(names, string(extensions.Get(i).Name())) + } + enums := f.Desc.Enums() + for i, n := 0, enums.Len(); i < n; i++ { + names = append(names, string(enums.Get(i).Name())) + } + sort.Strings(names) + return names +} + +// A MessageDescriptor is an immutable Starlark value that describes a protocol +// message type. +// +// A MessageDescriptor value contains a reference to a protoreflect.MessageDescriptor. +// Two MessageDescriptor values compare equal if and only if they refer to the +// same protoreflect.MessageDescriptor. +// +// The fields of a MessageDescriptor value are the names of any message types +// (MessageDescriptor), fields or extension fields (FieldDescriptor), +// and enum types (EnumDescriptor) nested within the declaration of this message type. +type MessageDescriptor struct { + Desc protoreflect.MessageDescriptor +} + +var ( + _ starlark.Callable = MessageDescriptor{} + _ starlark.HasAttrs = MessageDescriptor{} +) + +func (d MessageDescriptor) String() string { return string(d.Desc.FullName()) } +func (d MessageDescriptor) Type() string { return "proto.MessageDescriptor" } +func (d MessageDescriptor) Truth() starlark.Bool { return true } +func (d MessageDescriptor) Freeze() {} // immutable +func (d MessageDescriptor) Hash() (h uint32, err error) { + return starlark.String(d.Desc.FullName()).Hash() +} +func (d MessageDescriptor) Attr(name string) (starlark.Value, error) { + if desc := d.Desc.Messages().ByName(protoreflect.Name(name)); desc != nil { + return MessageDescriptor{desc}, nil + } + if desc := d.Desc.Extensions().ByName(protoreflect.Name(name)); desc != nil { + return FieldDescriptor{desc}, nil + } + if desc := d.Desc.Fields().ByName(protoreflect.Name(name)); desc != nil { + return FieldDescriptor{desc}, nil + } + if desc := d.Desc.Enums().ByName(protoreflect.Name(name)); desc != nil { + return EnumDescriptor{desc}, nil + } + return nil, nil +} +func (d MessageDescriptor) AttrNames() []string { + var names []string + messages := d.Desc.Messages() + for i, n := 0, messages.Len(); i < n; i++ { + names = append(names, string(messages.Get(i).Name())) + } + enums := d.Desc.Enums() + for i, n := 0, enums.Len(); i < n; i++ { + names = append(names, string(enums.Get(i).Name())) + } + sort.Strings(names) + return names +} +func (d MessageDescriptor) Name() string { return string(d.Desc.Name()) } // for Callable + +// A FieldDescriptor is an immutable Starlark value that describes +// a field (possibly an extension field) of protocol message. +// +// A FieldDescriptor value contains a reference to a protoreflect.FieldDescriptor. +// Two FieldDescriptor values compare equal if and only if they refer to the +// same protoreflect.FieldDescriptor. +// +// The primary use for FieldDescriptors is to access extension fields of a message. +// +// A FieldDescriptor value has not attributes. +// TODO(adonovan): expose metadata fields (e.g. name, type). +type FieldDescriptor struct { + Desc protoreflect.FieldDescriptor +} + +var ( + _ starlark.HasAttrs = FieldDescriptor{} +) + +func (d FieldDescriptor) String() string { return string(d.Desc.FullName()) } +func (d FieldDescriptor) Type() string { return "proto.FieldDescriptor" } +func (d FieldDescriptor) Truth() starlark.Bool { return true } +func (d FieldDescriptor) Freeze() {} // immutable +func (d FieldDescriptor) Hash() (h uint32, err error) { + return starlark.String(d.Desc.FullName()).Hash() +} +func (d FieldDescriptor) Attr(name string) (starlark.Value, error) { + // TODO(adonovan): expose metadata fields of Desc? + return nil, nil +} +func (d FieldDescriptor) AttrNames() []string { + var names []string + // TODO(adonovan): expose metadata fields of Desc? + sort.Strings(names) + return names +} + +// An EnumDescriptor is an immutable Starlark value that describes an +// protocol enum type. +// +// An EnumDescriptor contains a reference to a protoreflect.EnumDescriptor. +// Two EnumDescriptor values compare equal if and only if they +// refer to the same protoreflect.EnumDescriptor. +// +// An EnumDescriptor may be called like a function. It converts its +// sole argument, which must be an int, string, or EnumValueDescriptor, +// to an EnumValueDescriptor. +// +// The fields of an EnumDescriptor value are the values of the +// enumeration, each of type EnumValueDescriptor. +type EnumDescriptor struct { + Desc protoreflect.EnumDescriptor +} + +var ( + _ starlark.HasAttrs = EnumDescriptor{} + _ starlark.Callable = EnumDescriptor{} +) + +func (e EnumDescriptor) String() string { return string(e.Desc.FullName()) } +func (e EnumDescriptor) Type() string { return "proto.EnumDescriptor" } +func (e EnumDescriptor) Truth() starlark.Bool { return true } +func (e EnumDescriptor) Freeze() {} // immutable +func (e EnumDescriptor) Hash() (h uint32, err error) { return 0, nil } // TODO(adonovan): number? +func (e EnumDescriptor) Attr(name string) (starlark.Value, error) { + if v := e.Desc.Values().ByName(protoreflect.Name(name)); v != nil { + return EnumValueDescriptor{v}, nil + } + return nil, nil +} +func (e EnumDescriptor) AttrNames() []string { + var names []string + values := e.Desc.Values() + for i, n := 0, values.Len(); i < n; i++ { + names = append(names, string(values.Get(i).Name())) + } + sort.Strings(names) + return names +} +func (e EnumDescriptor) Name() string { return string(e.Desc.Name()) } // for Callable + +// The Call method implements the starlark.Callable interface. +// A call to an enum descriptor converts its argument to a value of that enum type. +func (e EnumDescriptor) CallInternal(_ *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var x starlark.Value + if err := starlark.UnpackPositionalArgs(string(e.Desc.Name()), args, kwargs, 1, &x); err != nil { + return nil, err + } + v, err := enumValueOf(e.Desc, x) + if err != nil { + return nil, fmt.Errorf("%s: %v", e.Desc.Name(), err) + } + return EnumValueDescriptor{Desc: v}, nil +} + +// enumValueOf converts an int, string, or enum value to a value of the specified enum type. +func enumValueOf(enum protoreflect.EnumDescriptor, x starlark.Value) (protoreflect.EnumValueDescriptor, error) { + switch x := x.(type) { + case starlark.Int: + i, err := starlark.AsInt32(x) + if err != nil { + return nil, fmt.Errorf("invalid number %s for %s enum", x, enum.Name()) + } + desc := enum.Values().ByNumber(protoreflect.EnumNumber(i)) + if desc == nil { + return nil, fmt.Errorf("invalid number %d for %s enum", i, enum.Name()) + } + return desc, nil + + case starlark.String: + name := protoreflect.Name(x) + desc := enum.Values().ByName(name) + if desc == nil { + return nil, fmt.Errorf("invalid name %q for %s enum", name, enum.Name()) + } + return desc, nil + + case EnumValueDescriptor: + if parent := x.Desc.Parent(); parent != enum { + return nil, fmt.Errorf("invalid value %s.%s for %s enum", + parent.Name(), x.Desc.Name(), enum.Name()) + } + return x.Desc, nil + } + + return nil, fmt.Errorf("cannot convert %s to %s enum", x.Type(), enum.Name()) +} + +// An EnumValueDescriptor is an immutable Starlark value that represents one value of an enumeration. +// +// An EnumValueDescriptor contains a reference to a protoreflect.EnumValueDescriptor. +// Two EnumValueDescriptor values compare equal if and only if they +// refer to the same protoreflect.EnumValueDescriptor. +// +// An EnumValueDescriptor has the following fields: +// +// index -- int, index of this value within the enum sequence +// name -- string, name of this enum value +// number -- int, numeric value of this enum value +// type -- EnumDescriptor, the enum type to which this value belongs +// +type EnumValueDescriptor struct { + Desc protoreflect.EnumValueDescriptor +} + +var ( + _ starlark.HasAttrs = EnumValueDescriptor{} + _ starlark.Comparable = EnumValueDescriptor{} +) + +func (e EnumValueDescriptor) String() string { + enum := e.Desc.Parent() + return string(enum.Name() + "." + e.Desc.Name()) // "Enum.EnumValue" +} +func (e EnumValueDescriptor) Type() string { return "proto.EnumValueDescriptor" } +func (e EnumValueDescriptor) Truth() starlark.Bool { return true } +func (e EnumValueDescriptor) Freeze() {} // immutable +func (e EnumValueDescriptor) Hash() (h uint32, err error) { return uint32(e.Desc.Number()), nil } +func (e EnumValueDescriptor) AttrNames() []string { + return []string{"index", "name", "number", "type"} +} +func (e EnumValueDescriptor) Attr(name string) (starlark.Value, error) { + switch name { + case "index": + return starlark.MakeInt(e.Desc.Index()), nil + case "name": + return starlark.String(e.Desc.Name()), nil + case "number": + return starlark.MakeInt(int(e.Desc.Number())), nil + case "type": + enum := e.Desc.Parent() + return EnumDescriptor{Desc: enum.(protoreflect.EnumDescriptor)}, nil + } + return nil, nil +} +func (x EnumValueDescriptor) CompareSameType(op syntax.Token, y_ starlark.Value, depth int) (bool, error) { + y := y_.(EnumValueDescriptor) + switch op { + case syntax.EQL: + return x.Desc == y.Desc, nil + case syntax.NEQ: + return x.Desc != y.Desc, nil + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y_.Type()) + } +} diff --git a/repl/repl.go b/repl/repl.go new file mode 100644 index 0000000..97109c6 --- /dev/null +++ b/repl/repl.go @@ -0,0 +1,185 @@ +// Package repl provides a read/eval/print loop for Starlark. +// +// It supports readline-style command editing, +// and interrupts through Control-C. +// +// If an input line can be parsed as an expression, +// the REPL parses and evaluates it and prints its result. +// Otherwise the REPL reads lines until a blank line, +// then tries again to parse the multi-line input as an +// expression. If the input still cannot be parsed as an expression, +// the REPL parses and executes it as a file (a list of statements), +// for side effects. +package repl // import "go.starlark.net/repl" + +import ( + "context" + "fmt" + "io" + "os" + "os/signal" + + "github.com/chzyer/readline" + "go.starlark.net/resolve" + "go.starlark.net/starlark" + "go.starlark.net/syntax" +) + +var interrupted = make(chan os.Signal, 1) + +// REPL executes a read, eval, print loop. +// +// Before evaluating each expression, it sets the Starlark thread local +// variable named "context" to a context.Context that is cancelled by a +// SIGINT (Control-C). Client-supplied global functions may use this +// context to make long-running operations interruptable. +// +func REPL(thread *starlark.Thread, globals starlark.StringDict) { + signal.Notify(interrupted, os.Interrupt) + defer signal.Stop(interrupted) + + rl, err := readline.New(">>> ") + if err != nil { + PrintError(err) + return + } + defer rl.Close() + for { + if err := rep(rl, thread, globals); err != nil { + if err == readline.ErrInterrupt { + fmt.Println(err) + continue + } + break + } + } + fmt.Println() +} + +// rep reads, evaluates, and prints one item. +// +// It returns an error (possibly readline.ErrInterrupt) +// only if readline failed. Starlark errors are printed. +func rep(rl *readline.Instance, thread *starlark.Thread, globals starlark.StringDict) error { + // Each item gets its own context, + // which is cancelled by a SIGINT. + // + // Note: during Readline calls, Control-C causes Readline to return + // ErrInterrupt but does not generate a SIGINT. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + select { + case <-interrupted: + cancel() + case <-ctx.Done(): + } + }() + + thread.SetLocal("context", ctx) + + eof := false + + // readline returns EOF, ErrInterrupted, or a line including "\n". + rl.SetPrompt(">>> ") + readline := func() ([]byte, error) { + line, err := rl.Readline() + rl.SetPrompt("... ") + if err != nil { + if err == io.EOF { + eof = true + } + return nil, err + } + return []byte(line + "\n"), nil + } + + // parse + f, err := syntax.ParseCompoundStmt("<stdin>", readline) + if err != nil { + if eof { + return io.EOF + } + PrintError(err) + return nil + } + + // Treat load bindings as global (like they used to be) in the REPL. + // This is a workaround for github.com/google/starlark-go/issues/224. + // TODO(adonovan): not safe wrt concurrent interpreters. + // Come up with a more principled solution (or plumb options everywhere). + defer func(prev bool) { resolve.LoadBindsGlobally = prev }(resolve.LoadBindsGlobally) + resolve.LoadBindsGlobally = true + + if expr := soleExpr(f); expr != nil { + // eval + v, err := starlark.EvalExpr(thread, expr, globals) + if err != nil { + PrintError(err) + return nil + } + + // print + if v != starlark.None { + fmt.Println(v) + } + } else if err := starlark.ExecREPLChunk(f, thread, globals); err != nil { + PrintError(err) + return nil + } + + return nil +} + +func soleExpr(f *syntax.File) syntax.Expr { + if len(f.Stmts) == 1 { + if stmt, ok := f.Stmts[0].(*syntax.ExprStmt); ok { + return stmt.X + } + } + return nil +} + +// PrintError prints the error to stderr, +// or its backtrace if it is a Starlark evaluation error. +func PrintError(err error) { + if evalErr, ok := err.(*starlark.EvalError); ok { + fmt.Fprintln(os.Stderr, evalErr.Backtrace()) + } else { + fmt.Fprintln(os.Stderr, err) + } +} + +// MakeLoad returns a simple sequential implementation of module loading +// suitable for use in the REPL. +// Each function returned by MakeLoad accesses a distinct private cache. +func MakeLoad() func(thread *starlark.Thread, module string) (starlark.StringDict, error) { + type entry struct { + globals starlark.StringDict + err error + } + + var cache = make(map[string]*entry) + + return func(thread *starlark.Thread, module string) (starlark.StringDict, error) { + e, ok := cache[module] + if e == nil { + if ok { + // request for package whose loading is in progress + return nil, fmt.Errorf("cycle in load graph") + } + + // Add a placeholder to indicate "load in progress". + cache[module] = nil + + // Load it. + thread := &starlark.Thread{Name: "exec " + module, Load: thread.Load} + globals, err := starlark.ExecFile(thread, module, nil, nil) + e = &entry{globals, err} + + // Update the cache. + cache[module] = e + } + return e.globals, e.err + } +} diff --git a/resolve/binding.go b/resolve/binding.go new file mode 100644 index 0000000..6b99f4b --- /dev/null +++ b/resolve/binding.go @@ -0,0 +1,74 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package resolve + +import "go.starlark.net/syntax" + +// This file defines resolver data types saved in the syntax tree. +// We cannot guarantee API stability for these types +// as they are closely tied to the implementation. + +// A Binding contains resolver information about an identifer. +// The resolver populates the Binding field of each syntax.Identifier. +// The Binding ties together all identifiers that denote the same variable. +type Binding struct { + Scope Scope + + // Index records the index into the enclosing + // - {DefStmt,File}.Locals, if Scope==Local + // - DefStmt.FreeVars, if Scope==Free + // - File.Globals, if Scope==Global. + // It is zero if Scope is Predeclared, Universal, or Undefined. + Index int + + First *syntax.Ident // first binding use (iff Scope==Local/Free/Global) +} + +// The Scope of Binding indicates what kind of scope it has. +type Scope uint8 + +const ( + Undefined Scope = iota // name is not defined + Local // name is local to its function or file + Cell // name is function-local but shared with a nested function + Free // name is cell of some enclosing function + Global // name is global to module + Predeclared // name is predeclared for this module (e.g. glob) + Universal // name is universal (e.g. len) +) + +var scopeNames = [...]string{ + Undefined: "undefined", + Local: "local", + Cell: "cell", + Free: "free", + Global: "global", + Predeclared: "predeclared", + Universal: "universal", +} + +func (scope Scope) String() string { return scopeNames[scope] } + +// A Module contains resolver information about a file. +// The resolver populates the Module field of each syntax.File. +type Module struct { + Locals []*Binding // the file's (comprehension-)local variables + Globals []*Binding // the file's global variables +} + +// A Function contains resolver information about a named or anonymous function. +// The resolver populates the Function field of each syntax.DefStmt and syntax.LambdaExpr. +type Function struct { + Pos syntax.Position // of DEF or LAMBDA + Name string // name of def, or "lambda" + Params []syntax.Expr // param = ident | ident=expr | * | *ident | **ident + Body []syntax.Stmt // contains synthetic 'return expr' for lambda + + HasVarargs bool // whether params includes *args (convenience) + HasKwargs bool // whether params includes **kwargs (convenience) + NumKwonlyParams int // number of keyword-only optional parameters + Locals []*Binding // this function's local/cell variables, parameters first + FreeVars []*Binding // enclosing cells to capture in closure +} diff --git a/resolve/resolve.go b/resolve/resolve.go new file mode 100644 index 0000000..56e33ba --- /dev/null +++ b/resolve/resolve.go @@ -0,0 +1,969 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package resolve defines a name-resolution pass for Starlark abstract +// syntax trees. +// +// The resolver sets the Locals and FreeVars arrays of each DefStmt and +// the LocalIndex field of each syntax.Ident that refers to a local or +// free variable. It also sets the Locals array of a File for locals +// bound by top-level comprehensions and load statements. +// Identifiers for global variables do not get an index. +package resolve // import "go.starlark.net/resolve" + +// All references to names are statically resolved. Names may be +// predeclared, global, or local to a function or file. +// File-local variables include those bound by top-level comprehensions +// and by load statements. ("Top-level" means "outside of any function".) +// The resolver maps each global name to a small integer and each local +// name to a small integer; these integers enable a fast and compact +// representation of globals and locals in the evaluator. +// +// As an optimization, the resolver classifies each predeclared name as +// either universal (e.g. None, len) or per-module (e.g. glob in Bazel's +// build language), enabling the evaluator to share the representation +// of the universal environment across all modules. +// +// The lexical environment is a tree of blocks with the file block at +// its root. The file's child blocks may be of two kinds: functions +// and comprehensions, and these may have further children of either +// kind. +// +// Python-style resolution requires multiple passes because a name is +// determined to be local to a function only if the function contains a +// "binding" use of it; similarly, a name is determined to be global (as +// opposed to predeclared) if the module contains a top-level binding use. +// Unlike ordinary top-level assignments, the bindings created by load +// statements are local to the file block. +// A non-binding use may lexically precede the binding to which it is resolved. +// In the first pass, we inspect each function, recording in +// 'uses' each identifier and the environment block in which it occurs. +// If a use of a name is binding, such as a function parameter or +// assignment, we add the name to the block's bindings mapping and add a +// local variable to the enclosing function. +// +// As we finish resolving each function, we inspect all the uses within +// that function and discard ones that were found to be function-local. The +// remaining ones must be either free (local to some lexically enclosing +// function), or top-level (global, predeclared, or file-local), but we cannot tell +// which until we have finished inspecting the outermost enclosing +// function. At that point, we can distinguish local from top-level names +// (and this is when Python would compute free variables). +// +// However, Starlark additionally requires that all references to global +// names are satisfied by some declaration in the current module; +// Starlark permits a function to forward-reference a global or file-local +// that has not +// been declared yet so long as it is declared before the end of the +// module. So, instead of re-resolving the unresolved references after +// each top-level function, we defer this until the end of the module +// and ensure that all such references are satisfied by some definition. +// +// At the end of the module, we visit each of the nested function blocks +// in bottom-up order, doing a recursive lexical lookup for each +// unresolved name. If the name is found to be local to some enclosing +// function, we must create a DefStmt.FreeVar (capture) parameter for +// each intervening function. We enter these synthetic bindings into +// the bindings map so that we create at most one freevar per name. If +// the name was not local, we check that it was defined at module level. +// +// We resolve all uses of locals in the module (due to load statements +// and comprehensions) in a similar way and compute the file's set of +// local variables. +// +// Starlark enforces that all global names are assigned at most once on +// all control flow paths by forbidding if/else statements and loops at +// top level. A global may be used before it is defined, leading to a +// dynamic error. However, the AllowGlobalReassign flag (really: allow +// top-level reassign) makes the resolver allow multiple to a variable +// at top-level. It also allows if-, for-, and while-loops at top-level, +// which in turn may make the evaluator dynamically assign multiple +// values to a variable at top-level. (These two roles should be separated.) + +import ( + "fmt" + "log" + "sort" + "strings" + + "go.starlark.net/internal/spell" + "go.starlark.net/syntax" +) + +const debug = false +const doesnt = "this Starlark dialect does not " + +// global options +// These features are either not standard Starlark (yet), or deprecated +// features of the BUILD language, so we put them behind flags. +var ( + AllowSet = false // allow the 'set' built-in + AllowGlobalReassign = false // allow reassignment to top-level names; also, allow if/for/while at top-level + AllowRecursion = false // allow while statements and recursive functions + LoadBindsGlobally = false // load creates global not file-local bindings (deprecated) + + // obsolete flags for features that are now standard. No effect. + AllowNestedDef = true + AllowLambda = true + AllowFloat = true + AllowBitwise = true +) + +// File resolves the specified file and records information about the +// module in file.Module. +// +// The isPredeclared and isUniversal predicates report whether a name is +// a pre-declared identifier (visible in the current module) or a +// universal identifier (visible in every module). +// Clients should typically pass predeclared.Has for the first and +// starlark.Universe.Has for the second, where predeclared is the +// module's StringDict of predeclared names and starlark.Universe is the +// standard set of built-ins. +// The isUniverse predicate is supplied a parameter to avoid a cyclic +// dependency upon starlark.Universe, not because users should ever need +// to redefine it. +func File(file *syntax.File, isPredeclared, isUniversal func(name string) bool) error { + return REPLChunk(file, nil, isPredeclared, isUniversal) +} + +// REPLChunk is a generalization of the File function that supports a +// non-empty initial global block, as occurs in a REPL. +func REPLChunk(file *syntax.File, isGlobal, isPredeclared, isUniversal func(name string) bool) error { + r := newResolver(isGlobal, isPredeclared, isUniversal) + r.stmts(file.Stmts) + + r.env.resolveLocalUses() + + // At the end of the module, resolve all non-local variable references, + // computing closures. + // Function bodies may contain forward references to later global declarations. + r.resolveNonLocalUses(r.env) + + file.Module = &Module{ + Locals: r.moduleLocals, + Globals: r.moduleGlobals, + } + + if len(r.errors) > 0 { + return r.errors + } + return nil +} + +// Expr resolves the specified expression. +// It returns the local variables bound within the expression. +// +// The isPredeclared and isUniversal predicates behave as for the File function. +func Expr(expr syntax.Expr, isPredeclared, isUniversal func(name string) bool) ([]*Binding, error) { + r := newResolver(nil, isPredeclared, isUniversal) + r.expr(expr) + r.env.resolveLocalUses() + r.resolveNonLocalUses(r.env) // globals & universals + if len(r.errors) > 0 { + return nil, r.errors + } + return r.moduleLocals, nil +} + +// An ErrorList is a non-empty list of resolver error messages. +type ErrorList []Error // len > 0 + +func (e ErrorList) Error() string { return e[0].Error() } + +// An Error describes the nature and position of a resolver error. +type Error struct { + Pos syntax.Position + Msg string +} + +func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg } + +func newResolver(isGlobal, isPredeclared, isUniversal func(name string) bool) *resolver { + file := new(block) + return &resolver{ + file: file, + env: file, + isGlobal: isGlobal, + isPredeclared: isPredeclared, + isUniversal: isUniversal, + globals: make(map[string]*Binding), + predeclared: make(map[string]*Binding), + } +} + +type resolver struct { + // env is the current local environment: + // a linked list of blocks, innermost first. + // The tail of the list is the file block. + env *block + file *block // file block (contains load bindings) + + // moduleLocals contains the local variables of the module + // (due to load statements and comprehensions outside any function). + // moduleGlobals contains the global variables of the module. + moduleLocals []*Binding + moduleGlobals []*Binding + + // globals maps each global name in the module to its binding. + // predeclared does the same for predeclared and universal names. + globals map[string]*Binding + predeclared map[string]*Binding + + // These predicates report whether a name is + // pre-declared, either in this module or universally, + // or already declared in the module globals (as in a REPL). + // isGlobal may be nil. + isGlobal, isPredeclared, isUniversal func(name string) bool + + loops int // number of enclosing for/while loops + ifstmts int // number of enclosing if statements loops + + errors ErrorList +} + +// container returns the innermost enclosing "container" block: +// a function (function != nil) or file (function == nil). +// Container blocks accumulate local variable bindings. +func (r *resolver) container() *block { + for b := r.env; ; b = b.parent { + if b.function != nil || b == r.file { + return b + } + } +} + +func (r *resolver) push(b *block) { + r.env.children = append(r.env.children, b) + b.parent = r.env + r.env = b +} + +func (r *resolver) pop() { r.env = r.env.parent } + +type block struct { + parent *block // nil for file block + + // In the file (root) block, both these fields are nil. + function *Function // only for function blocks + comp *syntax.Comprehension // only for comprehension blocks + + // bindings maps a name to its binding. + // A local binding has an index into its innermost enclosing container's locals array. + // A free binding has an index into its innermost enclosing function's freevars array. + bindings map[string]*Binding + + // children records the child blocks of the current one. + children []*block + + // uses records all identifiers seen in this container (function or file), + // and a reference to the environment in which they appear. + // As we leave each container block, we resolve them, + // so that only free and global ones remain. + // At the end of each top-level function we compute closures. + uses []use +} + +func (b *block) bind(name string, bind *Binding) { + if b.bindings == nil { + b.bindings = make(map[string]*Binding) + } + b.bindings[name] = bind +} + +func (b *block) String() string { + if b.function != nil { + return "function block at " + fmt.Sprint(b.function.Pos) + } + if b.comp != nil { + return "comprehension block at " + fmt.Sprint(b.comp.Span()) + } + return "file block" +} + +func (r *resolver) errorf(posn syntax.Position, format string, args ...interface{}) { + r.errors = append(r.errors, Error{posn, fmt.Sprintf(format, args...)}) +} + +// A use records an identifier and the environment in which it appears. +type use struct { + id *syntax.Ident + env *block +} + +// bind creates a binding for id: a global (not file-local) +// binding at top-level, a local binding otherwise. +// At top-level, it reports an error if a global or file-local +// binding already exists, unless AllowGlobalReassign. +// It sets id.Binding to the binding (whether old or new), +// and returns whether a binding already existed. +func (r *resolver) bind(id *syntax.Ident) bool { + // Binding outside any local (comprehension/function) block? + if r.env == r.file { + bind, ok := r.file.bindings[id.Name] + if !ok { + bind, ok = r.globals[id.Name] + if !ok { + // first global binding of this name + bind = &Binding{ + First: id, + Scope: Global, + Index: len(r.moduleGlobals), + } + r.globals[id.Name] = bind + r.moduleGlobals = append(r.moduleGlobals, bind) + } + } + if ok && !AllowGlobalReassign { + r.errorf(id.NamePos, "cannot reassign %s %s declared at %s", + bind.Scope, id.Name, bind.First.NamePos) + } + id.Binding = bind + return ok + } + + return r.bindLocal(id) +} + +func (r *resolver) bindLocal(id *syntax.Ident) bool { + // Mark this name as local to current block. + // Assign it a new local (positive) index in the current container. + _, ok := r.env.bindings[id.Name] + if !ok { + var locals *[]*Binding + if fn := r.container().function; fn != nil { + locals = &fn.Locals + } else { + locals = &r.moduleLocals + } + bind := &Binding{ + First: id, + Scope: Local, + Index: len(*locals), + } + r.env.bind(id.Name, bind) + *locals = append(*locals, bind) + } + + r.use(id) + return ok +} + +func (r *resolver) use(id *syntax.Ident) { + use := use{id, r.env} + + // The spec says that if there is a global binding of a name + // then all references to that name in that block refer to the + // global, even if the use precedes the def---just as for locals. + // For example, in this code, + // + // print(len); len=1; print(len) + // + // both occurrences of len refer to the len=1 binding, which + // completely shadows the predeclared len function. + // + // The rationale for these semantics, which differ from Python, + // is that the static meaning of len (a reference to a global) + // does not change depending on where it appears in the file. + // Of course, its dynamic meaning does change, from an error + // into a valid reference, so it's not clear these semantics + // have any practical advantage. + // + // In any case, the Bazel implementation lags behind the spec + // and follows Python behavior, so the first use of len refers + // to the predeclared function. This typically used in a BUILD + // file that redefines a predeclared name half way through, + // for example: + // + // proto_library(...) # built-in rule + // load("myproto.bzl", "proto_library") + // proto_library(...) # user-defined rule + // + // We will piggyback support for the legacy semantics on the + // AllowGlobalReassign flag, which is loosely related and also + // required for Bazel. + if AllowGlobalReassign && r.env == r.file { + r.useToplevel(use) + return + } + + b := r.container() + b.uses = append(b.uses, use) +} + +// useToplevel resolves use.id as a reference to a name visible at top-level. +// The use.env field captures the original environment for error reporting. +func (r *resolver) useToplevel(use use) (bind *Binding) { + id := use.id + + if prev, ok := r.file.bindings[id.Name]; ok { + // use of load-defined name in file block + bind = prev + } else if prev, ok := r.globals[id.Name]; ok { + // use of global declared by module + bind = prev + } else if r.isGlobal != nil && r.isGlobal(id.Name) { + // use of global defined in a previous REPL chunk + bind = &Binding{ + First: id, // wrong: this is not even a binding use + Scope: Global, + Index: len(r.moduleGlobals), + } + r.globals[id.Name] = bind + r.moduleGlobals = append(r.moduleGlobals, bind) + } else if prev, ok := r.predeclared[id.Name]; ok { + // repeated use of predeclared or universal + bind = prev + } else if r.isPredeclared(id.Name) { + // use of pre-declared name + bind = &Binding{Scope: Predeclared} + r.predeclared[id.Name] = bind // save it + } else if r.isUniversal(id.Name) { + // use of universal name + if !AllowSet && id.Name == "set" { + r.errorf(id.NamePos, doesnt+"support sets") + } + bind = &Binding{Scope: Universal} + r.predeclared[id.Name] = bind // save it + } else { + bind = &Binding{Scope: Undefined} + var hint string + if n := r.spellcheck(use); n != "" { + hint = fmt.Sprintf(" (did you mean %s?)", n) + } + r.errorf(id.NamePos, "undefined: %s%s", id.Name, hint) + } + id.Binding = bind + return bind +} + +// spellcheck returns the most likely misspelling of +// the name use.id in the environment use.env. +func (r *resolver) spellcheck(use use) string { + var names []string + + // locals + for b := use.env; b != nil; b = b.parent { + for name := range b.bindings { + names = append(names, name) + } + } + + // globals + // + // We have no way to enumerate the sets whose membership + // tests are isPredeclared, isUniverse, and isGlobal, + // which includes prior names in the REPL session. + for _, bind := range r.moduleGlobals { + names = append(names, bind.First.Name) + } + + sort.Strings(names) + return spell.Nearest(use.id.Name, names) +} + +// resolveLocalUses is called when leaving a container (function/module) +// block. It resolves all uses of locals/cells within that block. +func (b *block) resolveLocalUses() { + unresolved := b.uses[:0] + for _, use := range b.uses { + if bind := lookupLocal(use); bind != nil && (bind.Scope == Local || bind.Scope == Cell) { + use.id.Binding = bind + } else { + unresolved = append(unresolved, use) + } + } + b.uses = unresolved +} + +func (r *resolver) stmts(stmts []syntax.Stmt) { + for _, stmt := range stmts { + r.stmt(stmt) + } +} + +func (r *resolver) stmt(stmt syntax.Stmt) { + switch stmt := stmt.(type) { + case *syntax.ExprStmt: + r.expr(stmt.X) + + case *syntax.BranchStmt: + if r.loops == 0 && (stmt.Token == syntax.BREAK || stmt.Token == syntax.CONTINUE) { + r.errorf(stmt.TokenPos, "%s not in a loop", stmt.Token) + } + + case *syntax.IfStmt: + if !AllowGlobalReassign && r.container().function == nil { + r.errorf(stmt.If, "if statement not within a function") + } + r.expr(stmt.Cond) + r.ifstmts++ + r.stmts(stmt.True) + r.stmts(stmt.False) + r.ifstmts-- + + case *syntax.AssignStmt: + r.expr(stmt.RHS) + isAugmented := stmt.Op != syntax.EQ + r.assign(stmt.LHS, isAugmented) + + case *syntax.DefStmt: + r.bind(stmt.Name) + fn := &Function{ + Name: stmt.Name.Name, + Pos: stmt.Def, + Params: stmt.Params, + Body: stmt.Body, + } + stmt.Function = fn + r.function(fn, stmt.Def) + + case *syntax.ForStmt: + if !AllowGlobalReassign && r.container().function == nil { + r.errorf(stmt.For, "for loop not within a function") + } + r.expr(stmt.X) + const isAugmented = false + r.assign(stmt.Vars, isAugmented) + r.loops++ + r.stmts(stmt.Body) + r.loops-- + + case *syntax.WhileStmt: + if !AllowRecursion { + r.errorf(stmt.While, doesnt+"support while loops") + } + if !AllowGlobalReassign && r.container().function == nil { + r.errorf(stmt.While, "while loop not within a function") + } + r.expr(stmt.Cond) + r.loops++ + r.stmts(stmt.Body) + r.loops-- + + case *syntax.ReturnStmt: + if r.container().function == nil { + r.errorf(stmt.Return, "return statement not within a function") + } + if stmt.Result != nil { + r.expr(stmt.Result) + } + + case *syntax.LoadStmt: + // A load statement may not be nested in any other statement. + if r.container().function != nil { + r.errorf(stmt.Load, "load statement within a function") + } else if r.loops > 0 { + r.errorf(stmt.Load, "load statement within a loop") + } else if r.ifstmts > 0 { + r.errorf(stmt.Load, "load statement within a conditional") + } + + for i, from := range stmt.From { + if from.Name == "" { + r.errorf(from.NamePos, "load: empty identifier") + continue + } + if from.Name[0] == '_' { + r.errorf(from.NamePos, "load: names with leading underscores are not exported: %s", from.Name) + } + + id := stmt.To[i] + if LoadBindsGlobally { + r.bind(id) + } else if r.bindLocal(id) && !AllowGlobalReassign { + // "Global" in AllowGlobalReassign is a misnomer for "toplevel". + // Sadly we can't report the previous declaration + // as id.Binding may not be set yet. + r.errorf(id.NamePos, "cannot reassign top-level %s", id.Name) + } + } + + default: + log.Panicf("unexpected stmt %T", stmt) + } +} + +func (r *resolver) assign(lhs syntax.Expr, isAugmented bool) { + switch lhs := lhs.(type) { + case *syntax.Ident: + // x = ... + r.bind(lhs) + + case *syntax.IndexExpr: + // x[i] = ... + r.expr(lhs.X) + r.expr(lhs.Y) + + case *syntax.DotExpr: + // x.f = ... + r.expr(lhs.X) + + case *syntax.TupleExpr: + // (x, y) = ... + if isAugmented { + r.errorf(syntax.Start(lhs), "can't use tuple expression in augmented assignment") + } + for _, elem := range lhs.List { + r.assign(elem, isAugmented) + } + + case *syntax.ListExpr: + // [x, y, z] = ... + if isAugmented { + r.errorf(syntax.Start(lhs), "can't use list expression in augmented assignment") + } + for _, elem := range lhs.List { + r.assign(elem, isAugmented) + } + + case *syntax.ParenExpr: + r.assign(lhs.X, isAugmented) + + default: + name := strings.ToLower(strings.TrimPrefix(fmt.Sprintf("%T", lhs), "*syntax.")) + r.errorf(syntax.Start(lhs), "can't assign to %s", name) + } +} + +func (r *resolver) expr(e syntax.Expr) { + switch e := e.(type) { + case *syntax.Ident: + r.use(e) + + case *syntax.Literal: + + case *syntax.ListExpr: + for _, x := range e.List { + r.expr(x) + } + + case *syntax.CondExpr: + r.expr(e.Cond) + r.expr(e.True) + r.expr(e.False) + + case *syntax.IndexExpr: + r.expr(e.X) + r.expr(e.Y) + + case *syntax.DictEntry: + r.expr(e.Key) + r.expr(e.Value) + + case *syntax.SliceExpr: + r.expr(e.X) + if e.Lo != nil { + r.expr(e.Lo) + } + if e.Hi != nil { + r.expr(e.Hi) + } + if e.Step != nil { + r.expr(e.Step) + } + + case *syntax.Comprehension: + // The 'in' operand of the first clause (always a ForClause) + // is resolved in the outer block; consider: [x for x in x]. + clause := e.Clauses[0].(*syntax.ForClause) + r.expr(clause.X) + + // A list/dict comprehension defines a new lexical block. + // Locals defined within the block will be allotted + // distinct slots in the locals array of the innermost + // enclosing container (function/module) block. + r.push(&block{comp: e}) + + const isAugmented = false + r.assign(clause.Vars, isAugmented) + + for _, clause := range e.Clauses[1:] { + switch clause := clause.(type) { + case *syntax.IfClause: + r.expr(clause.Cond) + case *syntax.ForClause: + r.assign(clause.Vars, isAugmented) + r.expr(clause.X) + } + } + r.expr(e.Body) // body may be *DictEntry + r.pop() + + case *syntax.TupleExpr: + for _, x := range e.List { + r.expr(x) + } + + case *syntax.DictExpr: + for _, entry := range e.List { + entry := entry.(*syntax.DictEntry) + r.expr(entry.Key) + r.expr(entry.Value) + } + + case *syntax.UnaryExpr: + r.expr(e.X) + + case *syntax.BinaryExpr: + r.expr(e.X) + r.expr(e.Y) + + case *syntax.DotExpr: + r.expr(e.X) + // ignore e.Name + + case *syntax.CallExpr: + r.expr(e.Fn) + var seenVarargs, seenKwargs bool + var seenName map[string]bool + var n, p int + for _, arg := range e.Args { + pos, _ := arg.Span() + if unop, ok := arg.(*syntax.UnaryExpr); ok && unop.Op == syntax.STARSTAR { + // **kwargs + if seenKwargs { + r.errorf(pos, "multiple **kwargs not allowed") + } + seenKwargs = true + r.expr(arg) + } else if ok && unop.Op == syntax.STAR { + // *args + if seenKwargs { + r.errorf(pos, "*args may not follow **kwargs") + } else if seenVarargs { + r.errorf(pos, "multiple *args not allowed") + } + seenVarargs = true + r.expr(arg) + } else if binop, ok := arg.(*syntax.BinaryExpr); ok && binop.Op == syntax.EQ { + // k=v + n++ + if seenKwargs { + r.errorf(pos, "keyword argument may not follow **kwargs") + } else if seenVarargs { + r.errorf(pos, "keyword argument may not follow *args") + } + x := binop.X.(*syntax.Ident) + if seenName[x.Name] { + r.errorf(x.NamePos, "keyword argument %s repeated", x.Name) + } else { + if seenName == nil { + seenName = make(map[string]bool) + } + seenName[x.Name] = true + } + r.expr(binop.Y) + } else { + // positional argument + p++ + if seenVarargs { + r.errorf(pos, "positional argument may not follow *args") + } else if seenKwargs { + r.errorf(pos, "positional argument may not follow **kwargs") + } else if len(seenName) > 0 { + r.errorf(pos, "positional argument may not follow named") + } + r.expr(arg) + } + } + + // Fail gracefully if compiler-imposed limit is exceeded. + if p >= 256 { + pos, _ := e.Span() + r.errorf(pos, "%v positional arguments in call, limit is 255", p) + } + if n >= 256 { + pos, _ := e.Span() + r.errorf(pos, "%v keyword arguments in call, limit is 255", n) + } + + case *syntax.LambdaExpr: + fn := &Function{ + Name: "lambda", + Pos: e.Lambda, + Params: e.Params, + Body: []syntax.Stmt{&syntax.ReturnStmt{Result: e.Body}}, + } + e.Function = fn + r.function(fn, e.Lambda) + + case *syntax.ParenExpr: + r.expr(e.X) + + default: + log.Panicf("unexpected expr %T", e) + } +} + +func (r *resolver) function(function *Function, pos syntax.Position) { + // Resolve defaults in enclosing environment. + for _, param := range function.Params { + if binary, ok := param.(*syntax.BinaryExpr); ok { + r.expr(binary.Y) + } + } + + // Enter function block. + b := &block{function: function} + r.push(b) + + var seenOptional bool + var star *syntax.UnaryExpr // * or *args param + var starStar *syntax.Ident // **kwargs ident + var numKwonlyParams int + for _, param := range function.Params { + switch param := param.(type) { + case *syntax.Ident: + // e.g. x + if starStar != nil { + r.errorf(param.NamePos, "required parameter may not follow **%s", starStar.Name) + } else if star != nil { + numKwonlyParams++ + } else if seenOptional { + r.errorf(param.NamePos, "required parameter may not follow optional") + } + if r.bind(param) { + r.errorf(param.NamePos, "duplicate parameter: %s", param.Name) + } + + case *syntax.BinaryExpr: + // e.g. y=dflt + if starStar != nil { + r.errorf(param.OpPos, "optional parameter may not follow **%s", starStar.Name) + } else if star != nil { + numKwonlyParams++ + } + if id := param.X.(*syntax.Ident); r.bind(id) { + r.errorf(param.OpPos, "duplicate parameter: %s", id.Name) + } + seenOptional = true + + case *syntax.UnaryExpr: + // * or *args or **kwargs + if param.Op == syntax.STAR { + if starStar != nil { + r.errorf(param.OpPos, "* parameter may not follow **%s", starStar.Name) + } else if star != nil { + r.errorf(param.OpPos, "multiple * parameters not allowed") + } else { + star = param + } + } else { + if starStar != nil { + r.errorf(param.OpPos, "multiple ** parameters not allowed") + } + starStar = param.X.(*syntax.Ident) + } + } + } + + // Bind the *args and **kwargs parameters at the end, + // so that regular parameters a/b/c are contiguous and + // there is no hole for the "*": + // def f(a, b, *args, c=0, **kwargs) + // def f(a, b, *, c=0, **kwargs) + if star != nil { + if id, _ := star.X.(*syntax.Ident); id != nil { + // *args + if r.bind(id) { + r.errorf(id.NamePos, "duplicate parameter: %s", id.Name) + } + function.HasVarargs = true + } else if numKwonlyParams == 0 { + r.errorf(star.OpPos, "bare * must be followed by keyword-only parameters") + } + } + if starStar != nil { + if r.bind(starStar) { + r.errorf(starStar.NamePos, "duplicate parameter: %s", starStar.Name) + } + function.HasKwargs = true + } + + function.NumKwonlyParams = numKwonlyParams + r.stmts(function.Body) + + // Resolve all uses of this function's local vars, + // and keep just the remaining uses of free/global vars. + b.resolveLocalUses() + + // Leave function block. + r.pop() + + // References within the function body to globals are not + // resolved until the end of the module. +} + +func (r *resolver) resolveNonLocalUses(b *block) { + // First resolve inner blocks. + for _, child := range b.children { + r.resolveNonLocalUses(child) + } + for _, use := range b.uses { + use.id.Binding = r.lookupLexical(use, use.env) + } +} + +// lookupLocal looks up an identifier within its immediately enclosing function. +func lookupLocal(use use) *Binding { + for env := use.env; env != nil; env = env.parent { + if bind, ok := env.bindings[use.id.Name]; ok { + if bind.Scope == Free { + // shouldn't exist till later + log.Panicf("%s: internal error: %s, %v", use.id.NamePos, use.id.Name, bind) + } + return bind // found + } + if env.function != nil { + break + } + } + return nil // not found in this function +} + +// lookupLexical looks up an identifier use.id within its lexically enclosing environment. +// The use.env field captures the original environment for error reporting. +func (r *resolver) lookupLexical(use use, env *block) (bind *Binding) { + if debug { + fmt.Printf("lookupLexical %s in %s = ...\n", use.id.Name, env) + defer func() { fmt.Printf("= %v\n", bind) }() + } + + // Is this the file block? + if env == r.file { + return r.useToplevel(use) // file-local, global, predeclared, or not found + } + + // Defined in this block? + bind, ok := env.bindings[use.id.Name] + if !ok { + // Defined in parent block? + bind = r.lookupLexical(use, env.parent) + if env.function != nil && (bind.Scope == Local || bind.Scope == Free || bind.Scope == Cell) { + // Found in parent block, which belongs to enclosing function. + // Add the parent's binding to the function's freevars, + // and add a new 'free' binding to the inner function's block, + // and turn the parent's local into cell. + if bind.Scope == Local { + bind.Scope = Cell + } + index := len(env.function.FreeVars) + env.function.FreeVars = append(env.function.FreeVars, bind) + bind = &Binding{ + First: bind.First, + Scope: Free, + Index: index, + } + if debug { + fmt.Printf("creating freevar %v in function at %s: %s\n", + len(env.function.FreeVars), env.function.Pos, use.id.Name) + } + } + + // Memoize, to avoid duplicate free vars + // and redundant global (failing) lookups. + env.bind(use.id.Name, bind) + } + return bind +} diff --git a/resolve/resolve_test.go b/resolve/resolve_test.go new file mode 100644 index 0000000..50d1cc5 --- /dev/null +++ b/resolve/resolve_test.go @@ -0,0 +1,89 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package resolve_test + +import ( + "strings" + "testing" + + "go.starlark.net/internal/chunkedfile" + "go.starlark.net/resolve" + "go.starlark.net/starlarktest" + "go.starlark.net/syntax" +) + +func setOptions(src string) { + resolve.AllowGlobalReassign = option(src, "globalreassign") + resolve.AllowRecursion = option(src, "recursion") + resolve.AllowSet = option(src, "set") + resolve.LoadBindsGlobally = option(src, "loadbindsglobally") +} + +func option(chunk, name string) bool { + return strings.Contains(chunk, "option:"+name) +} + +func TestResolve(t *testing.T) { + defer setOptions("") + filename := starlarktest.DataFile("resolve", "testdata/resolve.star") + for _, chunk := range chunkedfile.Read(filename, t) { + f, err := syntax.Parse(filename, chunk.Source, 0) + if err != nil { + t.Error(err) + continue + } + + // A chunk may set options by containing e.g. "option:recursion". + setOptions(chunk.Source) + + if err := resolve.File(f, isPredeclared, isUniversal); err != nil { + for _, err := range err.(resolve.ErrorList) { + chunk.GotError(int(err.Pos.Line), err.Msg) + } + } + chunk.Done() + } +} + +func TestDefVarargsAndKwargsSet(t *testing.T) { + source := "def f(*args, **kwargs): pass\n" + file, err := syntax.Parse("foo.star", source, 0) + if err != nil { + t.Fatal(err) + } + if err := resolve.File(file, isPredeclared, isUniversal); err != nil { + t.Fatal(err) + } + fn := file.Stmts[0].(*syntax.DefStmt).Function.(*resolve.Function) + if !fn.HasVarargs { + t.Error("HasVarargs not set") + } + if !fn.HasKwargs { + t.Error("HasKwargs not set") + } +} + +func TestLambdaVarargsAndKwargsSet(t *testing.T) { + resolve.AllowLambda = true + source := "f = lambda *args, **kwargs: 0\n" + file, err := syntax.Parse("foo.star", source, 0) + if err != nil { + t.Fatal(err) + } + if err := resolve.File(file, isPredeclared, isUniversal); err != nil { + t.Fatal(err) + } + lam := file.Stmts[0].(*syntax.AssignStmt).RHS.(*syntax.LambdaExpr).Function.(*resolve.Function) + if !lam.HasVarargs { + t.Error("HasVarargs not set") + } + if !lam.HasKwargs { + t.Error("HasKwargs not set") + } +} + +func isPredeclared(name string) bool { return name == "M" } + +func isUniversal(name string) bool { return name == "U" || name == "float" } diff --git a/resolve/testdata/resolve.star b/resolve/testdata/resolve.star new file mode 100644 index 0000000..ce67110 --- /dev/null +++ b/resolve/testdata/resolve.star @@ -0,0 +1,383 @@ +# Tests of resolver errors. +# +# The initial environment contains the predeclared names "M" +# (module-specific) and "U" (universal). This distinction +# should be unobservable to the Starlark program. + +# use of declared global +x = 1 +_ = x + +--- +# premature use of global is not a static error; +# see github.com/google/skylark/issues/116. +_ = x +x = 1 + +--- +# use of undefined global +_ = x ### "undefined: x" + +--- +# redeclaration of global +x = 1 +x = 2 ### "cannot reassign global x declared at .*resolve.star:23:1" + +--- +# Redeclaration of predeclared names is allowed. +# +# This rule permits tool maintainers to add members to the predeclared +# environment without breaking existing programs. + +# module-specific predeclared name +M = 1 # ok +M = 2 ### "cannot reassign global M declared at .*/resolve.star" + +# universal predeclared name +U = 1 # ok +U = 1 ### "cannot reassign global U declared at .*/resolve.star" + +--- +# A global declaration shadows all references to a predeclared; +# see github.com/google/skylark/issues/116. + +a = U # ok: U is a reference to the global defined on the next line. +U = 1 + +--- +# reference to predeclared name +M() + +--- +# locals may be referenced before they are defined + +def f(): + M(x) # dynamic error + x = 1 + +--- +# Various forms of assignment: + +def f(x): # parameter + M(x) + M(y) ### "undefined: y" + +(a, b) = 1, 2 +M(a) +M(b) +M(c) ### "undefined: c" + +[p, q] = 1, 2 +M(p) +M(q) +M(r) ### "undefined: r" + +--- +# a comprehension introduces a separate lexical block + +_ = [x for x in "abc"] +M(x) ### "undefined: x" + +--- +# Functions may have forward refs. +def f(): + g() + h() ### "undefined: h" + def inner(): + i() + i = lambda: 0 + +def g(): + f() + +--- +# It is not permitted to rebind a global using a += assignment. + +x = [1] +x.extend([2]) # ok +x += [3] ### `cannot reassign global x` + +def f(): + x += [4] # x is local to f + +y = 1 +y += 2 ### `cannot reassign global y` +z += 3 # ok (but fails dynamically because z is undefined) + +--- +def f(a): + if 1==1: + b = 1 + c = 1 + M(a) # ok: param + M(b) # ok: maybe bound local + M(c) # ok: bound local + M(d) # NB: we don't do a use-before-def check on local vars! + M(e) # ok: global + M(f) # ok: global + d = 1 + +e = 1 + +--- +# This program should resolve successfully but fail dynamically. +x = 1 + +def f(): + M(x) # dynamic error: reference to undefined local + x = 2 + +f() + +--- +load("module", "name") # ok + +def f(): + load("foo", "bar") ### "load statement within a function" + +load("foo", + "", ### "load: empty identifier" + "_a", ### "load: names with leading underscores are not exported: _a" + b="", ### "load: empty identifier" + c="_d", ### "load: names with leading underscores are not exported: _d" + _e="f") # ok + +--- +# option:globalreassign +if M: + load("foo", "bar") ### "load statement within a conditional" + +--- +# option:globalreassign +for x in M: + load("foo", "bar") ### "load statement within a loop" + +--- +# option:recursion option:globalreassign +while M: + load("foo", "bar") ### "load statement within a loop" + +--- +# return statements must be within a function + +return ### "return statement not within a function" + +--- +# if-statements and for-loops at top-level are forbidden +# (without globalreassign option) + +for x in "abc": ### "for loop not within a function" + pass + +if x: ### "if statement not within a function" + pass + +--- +# option:globalreassign + +for x in "abc": # ok + pass + +if x: # ok + pass + +--- +# while loops are forbidden (without -recursion option) + +def f(): + while U: ### "dialect does not support while loops" + pass + +--- +# option:recursion + +def f(): + while U: # ok + pass + +while U: ### "while loop not within a function" + pass + +--- +# option:globalreassign option:recursion + +while U: # ok + pass + +--- +# The parser allows any expression on the LHS of an assignment. + +1 = 0 ### "can't assign to literal" +1+2 = 0 ### "can't assign to binaryexpr" +f() = 0 ### "can't assign to callexpr" + +[a, b] = 0 +[c, d] += 0 ### "can't use list expression in augmented assignment" +(e, f) += 0 ### "can't use tuple expression in augmented assignment" + +[] = 0 # ok +() = 0 # ok + +--- +# break and continue statements must appear within a loop + +break ### "break not in a loop" + +continue ### "continue not in a loop" + +pass + +--- +# Positional arguments (and required parameters) +# must appear before named arguments (and optional parameters). + +M(x=1, 2) ### `positional argument may not follow named` + +def f(x=1, y): pass ### `required parameter may not follow optional` +--- +# No parameters may follow **kwargs in a declaration. + +def f(**kwargs, x): ### `parameter may not follow \*\*kwargs` + pass + +def g(**kwargs, *args): ### `\* parameter may not follow \*\*kwargs` + pass + +def h(**kwargs1, **kwargs2): ### `multiple \*\* parameters not allowed` + pass + +--- +# Only keyword-only params and **kwargs may follow *args in a declaration. + +def f(*args, x): # ok + pass + +def g(*args1, *args2): ### `multiple \* parameters not allowed` + pass + +def h(*, ### `bare \* must be followed by keyword-only parameters` + *): ### `multiple \* parameters not allowed` + pass + +def i(*args, *): ### `multiple \* parameters not allowed` + pass + +def j(*, ### `bare \* must be followed by keyword-only parameters` + *args): ### `multiple \* parameters not allowed` + pass + +def k(*, **kwargs): ### `bare \* must be followed by keyword-only parameters` + pass + +def l(*): ### `bare \* must be followed by keyword-only parameters` + pass + +def m(*args, a=1, **kwargs): # ok + pass + +def n(*, a=1, **kwargs): # ok + pass + +--- +# No arguments may follow **kwargs in a call. +def f(*args, **kwargs): + pass + +f(**{}, 1) ### `argument may not follow \*\*kwargs` +f(**{}, x=1) ### `argument may not follow \*\*kwargs` +f(**{}, *[]) ### `\*args may not follow \*\*kwargs` +f(**{}, **{}) ### `multiple \*\*kwargs not allowed` + +--- +# Only **kwargs may follow *args in a call. +def f(*args, **kwargs): + pass + +f(*[], 1) ### `positional argument may not follow \*args` +f(*[], a=1) ### `keyword argument may not follow \*args` +f(*[], *[]) ### `multiple \*args not allowed` +f(*[], **{}) # ok + +--- +# Parameter names must be unique. + +def f(a, b, a): pass ### "duplicate parameter: a" +def g(args, b, *args): pass ### "duplicate parameter: args" +def h(kwargs, a, **kwargs): pass ### "duplicate parameter: kwargs" +def i(*x, **x): pass ### "duplicate parameter: x" + +--- +# Floating-point support is now standard. +a = float("3.141") +b = 1 / 2 +c = 3.141 + +--- +# option:globalreassign +# Legacy Bazel (and Python) semantics: def must precede use even for globals. + +_ = x ### `undefined: x` +x = 1 + +--- +# option:globalreassign +# Legacy Bazel (and Python) semantics: reassignment of globals is allowed. +x = 1 +x = 2 # ok + +--- +# option:globalreassign +# Redeclaration of predeclared names is allowed. + +# module-specific predeclared name +M = 1 # ok +M = 2 # ok (legacy) + +# universal predeclared name +U = 1 # ok +U = 1 # ok (legacy) + +--- +# https://github.com/bazelbuild/starlark/starlark/issues/21 +def f(**kwargs): pass +f(a=1, a=1) ### `keyword argument a repeated` + + +--- +# spelling + +print = U + +hello = 1 +print(hollo) ### `undefined: hollo \(did you mean hello\?\)` + +def f(abc): + print(abd) ### `undefined: abd \(did you mean abc\?\)` + print(goodbye) ### `undefined: goodbye$` + +--- +load("module", "x") # ok +x = 1 ### `cannot reassign local x` +load("module", "x") ### `cannot reassign top-level x` + +--- +# option:loadbindsglobally +load("module", "x") # ok +x = 1 ### `cannot reassign global x` +load("module", "x") ### `cannot reassign global x` + +--- +# option:globalreassign +load("module", "x") # ok +x = 1 # ok +load("module", "x") # ok + +--- +# option:globalreassign option:loadbindsglobally +load("module", "x") # ok +x = 1 +load("module", "x") # ok + +--- +_ = x # forward ref to file-local +load("module", "x") # ok diff --git a/starlark/bench_test.go b/starlark/bench_test.go new file mode 100644 index 0000000..7cfefe0 --- /dev/null +++ b/starlark/bench_test.go @@ -0,0 +1,169 @@ +// Copyright 2018 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark_test + +import ( + "bytes" + "fmt" + "io/ioutil" + "path/filepath" + "strings" + "testing" + + "go.starlark.net/starlark" + "go.starlark.net/starlarktest" +) + +func Benchmark(b *testing.B) { + defer setOptions("") + + testdata := starlarktest.DataFile("starlark", ".") + thread := new(starlark.Thread) + for _, file := range []string{ + "testdata/benchmark.star", + // ... + } { + + filename := filepath.Join(testdata, file) + + src, err := ioutil.ReadFile(filename) + if err != nil { + b.Error(err) + continue + } + setOptions(string(src)) + + // Evaluate the file once. + globals, err := starlark.ExecFile(thread, filename, src, nil) + if err != nil { + reportEvalError(b, err) + } + + // Repeatedly call each global function named bench_* as a benchmark. + for _, name := range globals.Keys() { + value := globals[name] + if fn, ok := value.(*starlark.Function); ok && strings.HasPrefix(name, "bench_") { + b.Run(name, func(b *testing.B) { + _, err := starlark.Call(thread, fn, starlark.Tuple{benchmark{b}}, nil) + if err != nil { + reportEvalError(b, err) + } + }) + } + } + } +} + +// A benchmark is passed to each bench_xyz(b) function in a bench_*.star file. +// It provides b.n, the number of iterations that must be executed by the function, +// which is typically of the form: +// +// def bench_foo(b): +// for _ in range(b.n): +// ...work... +// +// It also provides stop, start, and restart methods to stop the clock in case +// there is significant set-up work that should not count against the measured +// operation. +// +// (This interface is inspired by Go's testing.B, and is also implemented +// by the java.starlark.net implementation; see +// https://github.com/bazelbuild/starlark/pull/75#pullrequestreview-275604129.) +type benchmark struct { + b *testing.B +} + +func (benchmark) Freeze() {} +func (benchmark) Truth() starlark.Bool { return true } +func (benchmark) Type() string { return "benchmark" } +func (benchmark) String() string { return "<benchmark>" } +func (benchmark) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: benchmark") } +func (benchmark) AttrNames() []string { return []string{"n", "restart", "start", "stop"} } +func (b benchmark) Attr(name string) (starlark.Value, error) { + switch name { + case "n": + return starlark.MakeInt(b.b.N), nil + case "restart": + return benchmarkRestart.BindReceiver(b), nil + case "start": + return benchmarkStart.BindReceiver(b), nil + case "stop": + return benchmarkStop.BindReceiver(b), nil + } + return nil, nil +} + +var ( + benchmarkRestart = starlark.NewBuiltin("restart", benchmarkRestartImpl) + benchmarkStart = starlark.NewBuiltin("start", benchmarkStartImpl) + benchmarkStop = starlark.NewBuiltin("stop", benchmarkStopImpl) +) + +func benchmarkRestartImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + b.Receiver().(benchmark).b.ResetTimer() + return starlark.None, nil +} + +func benchmarkStartImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + b.Receiver().(benchmark).b.StartTimer() + return starlark.None, nil +} + +func benchmarkStopImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + b.Receiver().(benchmark).b.StopTimer() + return starlark.None, nil +} + +// BenchmarkProgram measures operations relevant to compiled programs. +// TODO(adonovan): use a bigger testdata program. +func BenchmarkProgram(b *testing.B) { + // Measure time to read a source file (approx 600us but depends on hardware and file system). + filename := starlarktest.DataFile("starlark", "testdata/paths.star") + var src []byte + b.Run("read", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var err error + src, err = ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + } + }) + + // Measure time to turn a source filename into a compiled program (approx 450us). + var prog *starlark.Program + b.Run("compile", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var err error + _, prog, err = starlark.SourceProgram(filename, src, starlark.StringDict(nil).Has) + if err != nil { + b.Fatal(err) + } + } + }) + + // Measure time to encode a compiled program to a memory buffer + // (approx 20us; was 75-120us with gob encoding). + var out bytes.Buffer + b.Run("encode", func(b *testing.B) { + for i := 0; i < b.N; i++ { + out.Reset() + if err := prog.Write(&out); err != nil { + b.Fatal(err) + } + } + }) + + // Measure time to decode a compiled program from a memory buffer + // (approx 20us; was 135-250us with gob encoding) + b.Run("decode", func(b *testing.B) { + for i := 0; i < b.N; i++ { + in := bytes.NewReader(out.Bytes()) + if _, err := starlark.CompiledProgram(in); err != nil { + b.Fatal(err) + } + } + }) +} diff --git a/starlark/debug.go b/starlark/debug.go new file mode 100644 index 0000000..22a2124 --- /dev/null +++ b/starlark/debug.go @@ -0,0 +1,42 @@ +package starlark + +import "go.starlark.net/syntax" + +// This file defines an experimental API for the debugging tools. +// Some of these declarations expose details of internal packages. +// (The debugger makes liberal use of exported fields of unexported types.) +// Breaking changes may occur without notice. + +// Local returns the value of the i'th local variable. +// It may be nil if not yet assigned. +// +// Local may be called only for frames whose Callable is a *Function (a +// function defined by Starlark source code), and only while the frame +// is active; it will panic otherwise. +// +// This function is provided only for debugging tools. +// +// THIS API IS EXPERIMENTAL AND MAY CHANGE WITHOUT NOTICE. +func (fr *frame) Local(i int) Value { return fr.locals[i] } + +// DebugFrame is the debugger API for a frame of the interpreter's call stack. +// +// Most applications have no need for this API; use CallFrame instead. +// +// Clients must not retain a DebugFrame nor call any of its methods once +// the current built-in call has returned or execution has resumed +// after a breakpoint as this may have unpredictable effects, including +// but not limited to retention of object that would otherwise be garbage. +type DebugFrame interface { + Callable() Callable // returns the frame's function + Local(i int) Value // returns the value of the (Starlark) frame's ith local variable + Position() syntax.Position // returns the current position of execution in this frame +} + +// DebugFrame returns the debugger interface for +// the specified frame of the interpreter's call stack. +// Frame numbering is as for Thread.CallFrame. +// +// This function is intended for use in debugging tools. +// Most applications should have no need for it; use CallFrame instead. +func (thread *Thread) DebugFrame(depth int) DebugFrame { return thread.frameAt(depth) } diff --git a/starlark/empty.s b/starlark/empty.s new file mode 100644 index 0000000..3b82169 --- /dev/null +++ b/starlark/empty.s @@ -0,0 +1,3 @@ +// The presence of this file allows the package to use the +// "go:linkname" hack to call non-exported functions in the +// Go runtime, such as hardware-accelerated string hashing. diff --git a/starlark/eval.go b/starlark/eval.go new file mode 100644 index 0000000..d0ad91f --- /dev/null +++ b/starlark/eval.go @@ -0,0 +1,1618 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +import ( + "fmt" + "io" + "io/ioutil" + "log" + "math/big" + "sort" + "strings" + "sync/atomic" + "time" + "unicode" + "unicode/utf8" + "unsafe" + + "go.starlark.net/internal/compile" + "go.starlark.net/internal/spell" + "go.starlark.net/resolve" + "go.starlark.net/syntax" +) + +// A Thread contains the state of a Starlark thread, +// such as its call stack and thread-local storage. +// The Thread is threaded throughout the evaluator. +type Thread struct { + // Name is an optional name that describes the thread, for debugging. + Name string + + // stack is the stack of (internal) call frames. + stack []*frame + + // Print is the client-supplied implementation of the Starlark + // 'print' function. If nil, fmt.Fprintln(os.Stderr, msg) is + // used instead. + Print func(thread *Thread, msg string) + + // Load is the client-supplied implementation of module loading. + // Repeated calls with the same module name must return the same + // module environment or error. + // The error message need not include the module name. + // + // See example_test.go for some example implementations of Load. + Load func(thread *Thread, module string) (StringDict, error) + + // steps counts abstract computation steps executed by this thread. + steps, maxSteps uint64 + + // cancelReason records the reason from the first call to Cancel. + cancelReason *string + + // locals holds arbitrary "thread-local" Go values belonging to the client. + // They are accessible to the client but not to any Starlark program. + locals map[string]interface{} + + // proftime holds the accumulated execution time since the last profile event. + proftime time.Duration +} + +// ExecutionSteps returns a count of abstract computation steps executed +// by this thread. It is incremented by the interpreter. It may be used +// as a measure of the approximate cost of Starlark execution, by +// computing the difference in its value before and after a computation. +// +// The precise meaning of "step" is not specified and may change. +func (thread *Thread) ExecutionSteps() uint64 { + return thread.steps +} + +// SetMaxExecutionSteps sets a limit on the number of Starlark +// computation steps that may be executed by this thread. If the +// thread's step counter exceeds this limit, the interpreter calls +// thread.Cancel("too many steps"). +func (thread *Thread) SetMaxExecutionSteps(max uint64) { + thread.maxSteps = max +} + +// Cancel causes execution of Starlark code in the specified thread to +// promptly fail with an EvalError that includes the specified reason. +// There may be a delay before the interpreter observes the cancellation +// if the thread is currently in a call to a built-in function. +// +// Cancellation cannot be undone. +// +// Unlike most methods of Thread, it is safe to call Cancel from any +// goroutine, even if the thread is actively executing. +func (thread *Thread) Cancel(reason string) { + // Atomically set cancelReason, preserving earlier reason if any. + atomic.CompareAndSwapPointer((*unsafe.Pointer)(unsafe.Pointer(&thread.cancelReason)), nil, unsafe.Pointer(&reason)) +} + +// SetLocal sets the thread-local value associated with the specified key. +// It must not be called after execution begins. +func (thread *Thread) SetLocal(key string, value interface{}) { + if thread.locals == nil { + thread.locals = make(map[string]interface{}) + } + thread.locals[key] = value +} + +// Local returns the thread-local value associated with the specified key. +func (thread *Thread) Local(key string) interface{} { + return thread.locals[key] +} + +// CallFrame returns a copy of the specified frame of the callstack. +// It should only be used in built-ins called from Starlark code. +// Depth 0 means the frame of the built-in itself, 1 is its caller, and so on. +// +// It is equivalent to CallStack().At(depth), but more efficient. +func (thread *Thread) CallFrame(depth int) CallFrame { + return thread.frameAt(depth).asCallFrame() +} + +func (thread *Thread) frameAt(depth int) *frame { + return thread.stack[len(thread.stack)-1-depth] +} + +// CallStack returns a new slice containing the thread's stack of call frames. +func (thread *Thread) CallStack() CallStack { + frames := make([]CallFrame, len(thread.stack)) + for i, fr := range thread.stack { + frames[i] = fr.asCallFrame() + } + return frames +} + +// CallStackDepth returns the number of frames in the current call stack. +func (thread *Thread) CallStackDepth() int { return len(thread.stack) } + +// A StringDict is a mapping from names to values, and represents +// an environment such as the global variables of a module. +// It is not a true starlark.Value. +type StringDict map[string]Value + +// Keys returns a new sorted slice of d's keys. +func (d StringDict) Keys() []string { + names := make([]string, 0, len(d)) + for name := range d { + names = append(names, name) + } + sort.Strings(names) + return names +} + +func (d StringDict) String() string { + buf := new(strings.Builder) + buf.WriteByte('{') + sep := "" + for _, name := range d.Keys() { + buf.WriteString(sep) + buf.WriteString(name) + buf.WriteString(": ") + writeValue(buf, d[name], nil) + sep = ", " + } + buf.WriteByte('}') + return buf.String() +} + +func (d StringDict) Freeze() { + for _, v := range d { + v.Freeze() + } +} + +// Has reports whether the dictionary contains the specified key. +func (d StringDict) Has(key string) bool { _, ok := d[key]; return ok } + +// A frame records a call to a Starlark function (including module toplevel) +// or a built-in function or method. +type frame struct { + callable Callable // current function (or toplevel) or built-in + pc uint32 // program counter (Starlark frames only) + locals []Value // local variables (Starlark frames only) + spanStart int64 // start time of current profiler span +} + +// Position returns the source position of the current point of execution in this frame. +func (fr *frame) Position() syntax.Position { + switch c := fr.callable.(type) { + case *Function: + // Starlark function + return c.funcode.Position(fr.pc) + case callableWithPosition: + // If a built-in Callable defines + // a Position method, use it. + return c.Position() + } + return syntax.MakePosition(&builtinFilename, 0, 0) +} + +var builtinFilename = "<builtin>" + +// Function returns the frame's function or built-in. +func (fr *frame) Callable() Callable { return fr.callable } + +// A CallStack is a stack of call frames, outermost first. +type CallStack []CallFrame + +// At returns a copy of the frame at depth i. +// At(0) returns the topmost frame. +func (stack CallStack) At(i int) CallFrame { return stack[len(stack)-1-i] } + +// Pop removes and returns the topmost frame. +func (stack *CallStack) Pop() CallFrame { + last := len(*stack) - 1 + top := (*stack)[last] + *stack = (*stack)[:last] + return top +} + +// String returns a user-friendly description of the stack. +func (stack CallStack) String() string { + out := new(strings.Builder) + if len(stack) > 0 { + fmt.Fprintf(out, "Traceback (most recent call last):\n") + } + for _, fr := range stack { + fmt.Fprintf(out, " %s: in %s\n", fr.Pos, fr.Name) + } + return out.String() +} + +// An EvalError is a Starlark evaluation error and +// a copy of the thread's stack at the moment of the error. +type EvalError struct { + Msg string + CallStack CallStack + cause error +} + +// A CallFrame represents the function name and current +// position of execution of an enclosing call frame. +type CallFrame struct { + Name string + Pos syntax.Position +} + +func (fr *frame) asCallFrame() CallFrame { + return CallFrame{ + Name: fr.Callable().Name(), + Pos: fr.Position(), + } +} + +func (thread *Thread) evalError(err error) *EvalError { + return &EvalError{ + Msg: err.Error(), + CallStack: thread.CallStack(), + cause: err, + } +} + +func (e *EvalError) Error() string { return e.Msg } + +// Backtrace returns a user-friendly error message describing the stack +// of calls that led to this error. +func (e *EvalError) Backtrace() string { + // If the topmost stack frame is a built-in function, + // remove it from the stack and add print "Error in fn:". + stack := e.CallStack + suffix := "" + if last := len(stack) - 1; last >= 0 && stack[last].Pos.Filename() == builtinFilename { + suffix = " in " + stack[last].Name + stack = stack[:last] + } + return fmt.Sprintf("%sError%s: %s", stack, suffix, e.Msg) +} + +func (e *EvalError) Unwrap() error { return e.cause } + +// A Program is a compiled Starlark program. +// +// Programs are immutable, and contain no Values. +// A Program may be created by parsing a source file (see SourceProgram) +// or by loading a previously saved compiled program (see CompiledProgram). +type Program struct { + compiled *compile.Program +} + +// CompilerVersion is the version number of the protocol for compiled +// files. Applications must not run programs compiled by one version +// with an interpreter at another version, and should thus incorporate +// the compiler version into the cache key when reusing compiled code. +const CompilerVersion = compile.Version + +// Filename returns the name of the file from which this program was loaded. +func (prog *Program) Filename() string { return prog.compiled.Toplevel.Pos.Filename() } + +func (prog *Program) String() string { return prog.Filename() } + +// NumLoads returns the number of load statements in the compiled program. +func (prog *Program) NumLoads() int { return len(prog.compiled.Loads) } + +// Load(i) returns the name and position of the i'th module directly +// loaded by this one, where 0 <= i < NumLoads(). +// The name is unresolved---exactly as it appears in the source. +func (prog *Program) Load(i int) (string, syntax.Position) { + id := prog.compiled.Loads[i] + return id.Name, id.Pos +} + +// WriteTo writes the compiled module to the specified output stream. +func (prog *Program) Write(out io.Writer) error { + data := prog.compiled.Encode() + _, err := out.Write(data) + return err +} + +// ExecFile parses, resolves, and executes a Starlark file in the +// specified global environment, which may be modified during execution. +// +// Thread is the state associated with the Starlark thread. +// +// The filename and src parameters are as for syntax.Parse: +// filename is the name of the file to execute, +// and the name that appears in error messages; +// src is an optional source of bytes to use +// instead of filename. +// +// predeclared defines the predeclared names specific to this module. +// Execution does not modify this dictionary, though it may mutate +// its values. +// +// If ExecFile fails during evaluation, it returns an *EvalError +// containing a backtrace. +func ExecFile(thread *Thread, filename string, src interface{}, predeclared StringDict) (StringDict, error) { + // Parse, resolve, and compile a Starlark source file. + _, mod, err := SourceProgram(filename, src, predeclared.Has) + if err != nil { + return nil, err + } + + g, err := mod.Init(thread, predeclared) + g.Freeze() + return g, err +} + +// SourceProgram produces a new program by parsing, resolving, +// and compiling a Starlark source file. +// On success, it returns the parsed file and the compiled program. +// The filename and src parameters are as for syntax.Parse. +// +// The isPredeclared predicate reports whether a name is +// a pre-declared identifier of the current module. +// Its typical value is predeclared.Has, +// where predeclared is a StringDict of pre-declared values. +func SourceProgram(filename string, src interface{}, isPredeclared func(string) bool) (*syntax.File, *Program, error) { + f, err := syntax.Parse(filename, src, 0) + if err != nil { + return nil, nil, err + } + prog, err := FileProgram(f, isPredeclared) + return f, prog, err +} + +// FileProgram produces a new program by resolving, +// and compiling the Starlark source file syntax tree. +// On success, it returns the compiled program. +// +// Resolving a syntax tree mutates it. +// Do not call FileProgram more than once on the same file. +// +// The isPredeclared predicate reports whether a name is +// a pre-declared identifier of the current module. +// Its typical value is predeclared.Has, +// where predeclared is a StringDict of pre-declared values. +func FileProgram(f *syntax.File, isPredeclared func(string) bool) (*Program, error) { + if err := resolve.File(f, isPredeclared, Universe.Has); err != nil { + return nil, err + } + + var pos syntax.Position + if len(f.Stmts) > 0 { + pos = syntax.Start(f.Stmts[0]) + } else { + pos = syntax.MakePosition(&f.Path, 1, 1) + } + + module := f.Module.(*resolve.Module) + compiled := compile.File(f.Stmts, pos, "<toplevel>", module.Locals, module.Globals) + + return &Program{compiled}, nil +} + +// CompiledProgram produces a new program from the representation +// of a compiled program previously saved by Program.Write. +func CompiledProgram(in io.Reader) (*Program, error) { + data, err := ioutil.ReadAll(in) + if err != nil { + return nil, err + } + compiled, err := compile.DecodeProgram(data) + if err != nil { + return nil, err + } + return &Program{compiled}, nil +} + +// Init creates a set of global variables for the program, +// executes the toplevel code of the specified program, +// and returns a new, unfrozen dictionary of the globals. +func (prog *Program) Init(thread *Thread, predeclared StringDict) (StringDict, error) { + toplevel := makeToplevelFunction(prog.compiled, predeclared) + + _, err := Call(thread, toplevel, nil, nil) + + // Convert the global environment to a map. + // We return a (partial) map even in case of error. + return toplevel.Globals(), err +} + +// ExecREPLChunk compiles and executes file f in the specified thread +// and global environment. This is a variant of ExecFile specialized to +// the needs of a REPL, in which a sequence of input chunks, each +// syntactically a File, manipulates the same set of module globals, +// which are not frozen after execution. +// +// This function is intended to support only go.starlark.net/repl. +// Its API stability is not guaranteed. +func ExecREPLChunk(f *syntax.File, thread *Thread, globals StringDict) error { + var predeclared StringDict + + // -- variant of FileProgram -- + + if err := resolve.REPLChunk(f, globals.Has, predeclared.Has, Universe.Has); err != nil { + return err + } + + var pos syntax.Position + if len(f.Stmts) > 0 { + pos = syntax.Start(f.Stmts[0]) + } else { + pos = syntax.MakePosition(&f.Path, 1, 1) + } + + module := f.Module.(*resolve.Module) + compiled := compile.File(f.Stmts, pos, "<toplevel>", module.Locals, module.Globals) + prog := &Program{compiled} + + // -- variant of Program.Init -- + + toplevel := makeToplevelFunction(prog.compiled, predeclared) + + // Initialize module globals from parameter. + for i, id := range prog.compiled.Globals { + if v := globals[id.Name]; v != nil { + toplevel.module.globals[i] = v + } + } + + _, err := Call(thread, toplevel, nil, nil) + + // Reflect changes to globals back to parameter, even after an error. + for i, id := range prog.compiled.Globals { + if v := toplevel.module.globals[i]; v != nil { + globals[id.Name] = v + } + } + + return err +} + +func makeToplevelFunction(prog *compile.Program, predeclared StringDict) *Function { + // Create the Starlark value denoted by each program constant c. + constants := make([]Value, len(prog.Constants)) + for i, c := range prog.Constants { + var v Value + switch c := c.(type) { + case int64: + v = MakeInt64(c) + case *big.Int: + v = MakeBigInt(c) + case string: + v = String(c) + case compile.Bytes: + v = Bytes(c) + case float64: + v = Float(c) + default: + log.Panicf("unexpected constant %T: %v", c, c) + } + constants[i] = v + } + + return &Function{ + funcode: prog.Toplevel, + module: &module{ + program: prog, + predeclared: predeclared, + globals: make([]Value, len(prog.Globals)), + constants: constants, + }, + } +} + +// Eval parses, resolves, and evaluates an expression within the +// specified (predeclared) environment. +// +// Evaluation cannot mutate the environment dictionary itself, +// though it may modify variables reachable from the dictionary. +// +// The filename and src parameters are as for syntax.Parse. +// +// If Eval fails during evaluation, it returns an *EvalError +// containing a backtrace. +func Eval(thread *Thread, filename string, src interface{}, env StringDict) (Value, error) { + expr, err := syntax.ParseExpr(filename, src, 0) + if err != nil { + return nil, err + } + f, err := makeExprFunc(expr, env) + if err != nil { + return nil, err + } + return Call(thread, f, nil, nil) +} + +// EvalExpr resolves and evaluates an expression within the +// specified (predeclared) environment. +// Evaluating a comma-separated list of expressions yields a tuple value. +// +// Resolving an expression mutates it. +// Do not call EvalExpr more than once for the same expression. +// +// Evaluation cannot mutate the environment dictionary itself, +// though it may modify variables reachable from the dictionary. +// +// If Eval fails during evaluation, it returns an *EvalError +// containing a backtrace. +func EvalExpr(thread *Thread, expr syntax.Expr, env StringDict) (Value, error) { + fn, err := makeExprFunc(expr, env) + if err != nil { + return nil, err + } + return Call(thread, fn, nil, nil) +} + +// ExprFunc returns a no-argument function +// that evaluates the expression whose source is src. +func ExprFunc(filename string, src interface{}, env StringDict) (*Function, error) { + expr, err := syntax.ParseExpr(filename, src, 0) + if err != nil { + return nil, err + } + return makeExprFunc(expr, env) +} + +// makeExprFunc returns a no-argument function whose body is expr. +func makeExprFunc(expr syntax.Expr, env StringDict) (*Function, error) { + locals, err := resolve.Expr(expr, env.Has, Universe.Has) + if err != nil { + return nil, err + } + + return makeToplevelFunction(compile.Expr(expr, "<expr>", locals), env), nil +} + +// The following functions are primitive operations of the byte code interpreter. + +// list += iterable +func listExtend(x *List, y Iterable) { + if ylist, ok := y.(*List); ok { + // fast path: list += list + x.elems = append(x.elems, ylist.elems...) + } else { + iter := y.Iterate() + defer iter.Done() + var z Value + for iter.Next(&z) { + x.elems = append(x.elems, z) + } + } +} + +// getAttr implements x.dot. +func getAttr(x Value, name string) (Value, error) { + hasAttr, ok := x.(HasAttrs) + if !ok { + return nil, fmt.Errorf("%s has no .%s field or method", x.Type(), name) + } + + var errmsg string + v, err := hasAttr.Attr(name) + if err == nil { + if v != nil { + return v, nil // success + } + // (nil, nil) => generic error + errmsg = fmt.Sprintf("%s has no .%s field or method", x.Type(), name) + } else if nsa, ok := err.(NoSuchAttrError); ok { + errmsg = string(nsa) + } else { + return nil, err // return error as is + } + + // add spelling hint + if n := spell.Nearest(name, hasAttr.AttrNames()); n != "" { + errmsg = fmt.Sprintf("%s (did you mean .%s?)", errmsg, n) + } + + return nil, fmt.Errorf("%s", errmsg) +} + +// setField implements x.name = y. +func setField(x Value, name string, y Value) error { + if x, ok := x.(HasSetField); ok { + err := x.SetField(name, y) + if _, ok := err.(NoSuchAttrError); ok { + // No such field: check spelling. + if n := spell.Nearest(name, x.AttrNames()); n != "" { + err = fmt.Errorf("%s (did you mean .%s?)", err, n) + } + } + return err + } + + return fmt.Errorf("can't assign to .%s field of %s", name, x.Type()) +} + +// getIndex implements x[y]. +func getIndex(x, y Value) (Value, error) { + switch x := x.(type) { + case Mapping: // dict + z, found, err := x.Get(y) + if err != nil { + return nil, err + } + if !found { + return nil, fmt.Errorf("key %v not in %s", y, x.Type()) + } + return z, nil + + case Indexable: // string, list, tuple + n := x.Len() + i, err := AsInt32(y) + if err != nil { + return nil, fmt.Errorf("%s index: %s", x.Type(), err) + } + origI := i + if i < 0 { + i += n + } + if i < 0 || i >= n { + return nil, outOfRange(origI, n, x) + } + return x.Index(i), nil + } + return nil, fmt.Errorf("unhandled index operation %s[%s]", x.Type(), y.Type()) +} + +func outOfRange(i, n int, x Value) error { + if n == 0 { + return fmt.Errorf("index %d out of range: empty %s", i, x.Type()) + } else { + return fmt.Errorf("%s index %d out of range [%d:%d]", x.Type(), i, -n, n-1) + } +} + +// setIndex implements x[y] = z. +func setIndex(x, y, z Value) error { + switch x := x.(type) { + case HasSetKey: + if err := x.SetKey(y, z); err != nil { + return err + } + + case HasSetIndex: + n := x.Len() + i, err := AsInt32(y) + if err != nil { + return err + } + origI := i + if i < 0 { + i += n + } + if i < 0 || i >= n { + return outOfRange(origI, n, x) + } + return x.SetIndex(i, z) + + default: + return fmt.Errorf("%s value does not support item assignment", x.Type()) + } + return nil +} + +// Unary applies a unary operator (+, -, ~, not) to its operand. +func Unary(op syntax.Token, x Value) (Value, error) { + // The NOT operator is not customizable. + if op == syntax.NOT { + return !x.Truth(), nil + } + + // Int, Float, and user-defined types + if x, ok := x.(HasUnary); ok { + // (nil, nil) => unhandled + y, err := x.Unary(op) + if y != nil || err != nil { + return y, err + } + } + + return nil, fmt.Errorf("unknown unary op: %s %s", op, x.Type()) +} + +// Binary applies a strict binary operator (not AND or OR) to its operands. +// For equality tests or ordered comparisons, use Compare instead. +func Binary(op syntax.Token, x, y Value) (Value, error) { + switch op { + case syntax.PLUS: + switch x := x.(type) { + case String: + if y, ok := y.(String); ok { + return x + y, nil + } + case Int: + switch y := y.(type) { + case Int: + return x.Add(y), nil + case Float: + xf, err := x.finiteFloat() + if err != nil { + return nil, err + } + return xf + y, nil + } + case Float: + switch y := y.(type) { + case Float: + return x + y, nil + case Int: + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + return x + yf, nil + } + case *List: + if y, ok := y.(*List); ok { + z := make([]Value, 0, x.Len()+y.Len()) + z = append(z, x.elems...) + z = append(z, y.elems...) + return NewList(z), nil + } + case Tuple: + if y, ok := y.(Tuple); ok { + z := make(Tuple, 0, len(x)+len(y)) + z = append(z, x...) + z = append(z, y...) + return z, nil + } + } + + case syntax.MINUS: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + return x.Sub(y), nil + case Float: + xf, err := x.finiteFloat() + if err != nil { + return nil, err + } + return xf - y, nil + } + case Float: + switch y := y.(type) { + case Float: + return x - y, nil + case Int: + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + return x - yf, nil + } + } + + case syntax.STAR: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + return x.Mul(y), nil + case Float: + xf, err := x.finiteFloat() + if err != nil { + return nil, err + } + return xf * y, nil + case String: + return stringRepeat(y, x) + case Bytes: + return bytesRepeat(y, x) + case *List: + elems, err := tupleRepeat(Tuple(y.elems), x) + if err != nil { + return nil, err + } + return NewList(elems), nil + case Tuple: + return tupleRepeat(y, x) + } + case Float: + switch y := y.(type) { + case Float: + return x * y, nil + case Int: + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + return x * yf, nil + } + case String: + if y, ok := y.(Int); ok { + return stringRepeat(x, y) + } + case Bytes: + if y, ok := y.(Int); ok { + return bytesRepeat(x, y) + } + case *List: + if y, ok := y.(Int); ok { + elems, err := tupleRepeat(Tuple(x.elems), y) + if err != nil { + return nil, err + } + return NewList(elems), nil + } + case Tuple: + if y, ok := y.(Int); ok { + return tupleRepeat(x, y) + } + + } + + case syntax.SLASH: + switch x := x.(type) { + case Int: + xf, err := x.finiteFloat() + if err != nil { + return nil, err + } + switch y := y.(type) { + case Int: + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + if yf == 0.0 { + return nil, fmt.Errorf("floating-point division by zero") + } + return xf / yf, nil + case Float: + if y == 0.0 { + return nil, fmt.Errorf("floating-point division by zero") + } + return xf / y, nil + } + case Float: + switch y := y.(type) { + case Float: + if y == 0.0 { + return nil, fmt.Errorf("floating-point division by zero") + } + return x / y, nil + case Int: + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + if yf == 0.0 { + return nil, fmt.Errorf("floating-point division by zero") + } + return x / yf, nil + } + } + + case syntax.SLASHSLASH: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + if y.Sign() == 0 { + return nil, fmt.Errorf("floored division by zero") + } + return x.Div(y), nil + case Float: + xf, err := x.finiteFloat() + if err != nil { + return nil, err + } + if y == 0.0 { + return nil, fmt.Errorf("floored division by zero") + } + return floor(xf / y), nil + } + case Float: + switch y := y.(type) { + case Float: + if y == 0.0 { + return nil, fmt.Errorf("floored division by zero") + } + return floor(x / y), nil + case Int: + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + if yf == 0.0 { + return nil, fmt.Errorf("floored division by zero") + } + return floor(x / yf), nil + } + } + + case syntax.PERCENT: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + if y.Sign() == 0 { + return nil, fmt.Errorf("integer modulo by zero") + } + return x.Mod(y), nil + case Float: + xf, err := x.finiteFloat() + if err != nil { + return nil, err + } + if y == 0 { + return nil, fmt.Errorf("floating-point modulo by zero") + } + return xf.Mod(y), nil + } + case Float: + switch y := y.(type) { + case Float: + if y == 0.0 { + return nil, fmt.Errorf("floating-point modulo by zero") + } + return x.Mod(y), nil + case Int: + if y.Sign() == 0 { + return nil, fmt.Errorf("floating-point modulo by zero") + } + yf, err := y.finiteFloat() + if err != nil { + return nil, err + } + return x.Mod(yf), nil + } + case String: + return interpolate(string(x), y) + } + + case syntax.NOT_IN: + z, err := Binary(syntax.IN, x, y) + if err != nil { + return nil, err + } + return !z.Truth(), nil + + case syntax.IN: + switch y := y.(type) { + case *List: + for _, elem := range y.elems { + if eq, err := Equal(elem, x); err != nil { + return nil, err + } else if eq { + return True, nil + } + } + return False, nil + case Tuple: + for _, elem := range y { + if eq, err := Equal(elem, x); err != nil { + return nil, err + } else if eq { + return True, nil + } + } + return False, nil + case Mapping: // e.g. dict + // Ignore error from Get as we cannot distinguish true + // errors (value cycle, type error) from "key not found". + _, found, _ := y.Get(x) + return Bool(found), nil + case *Set: + ok, err := y.Has(x) + return Bool(ok), err + case String: + needle, ok := x.(String) + if !ok { + return nil, fmt.Errorf("'in <string>' requires string as left operand, not %s", x.Type()) + } + return Bool(strings.Contains(string(y), string(needle))), nil + case Bytes: + switch needle := x.(type) { + case Bytes: + return Bool(strings.Contains(string(y), string(needle))), nil + case Int: + var b byte + if err := AsInt(needle, &b); err != nil { + return nil, fmt.Errorf("int in bytes: %s", err) + } + return Bool(strings.IndexByte(string(y), b) >= 0), nil + default: + return nil, fmt.Errorf("'in bytes' requires bytes or int as left operand, not %s", x.Type()) + } + case rangeValue: + i, err := NumberToInt(x) + if err != nil { + return nil, fmt.Errorf("'in <range>' requires integer as left operand, not %s", x.Type()) + } + return Bool(y.contains(i)), nil + } + + case syntax.PIPE: + switch x := x.(type) { + case Int: + if y, ok := y.(Int); ok { + return x.Or(y), nil + } + case *Set: // union + if y, ok := y.(*Set); ok { + iter := Iterate(y) + defer iter.Done() + return x.Union(iter) + } + } + + case syntax.AMP: + switch x := x.(type) { + case Int: + if y, ok := y.(Int); ok { + return x.And(y), nil + } + case *Set: // intersection + if y, ok := y.(*Set); ok { + set := new(Set) + if x.Len() > y.Len() { + x, y = y, x // opt: range over smaller set + } + for _, xelem := range x.elems() { + // Has, Insert cannot fail here. + if found, _ := y.Has(xelem); found { + set.Insert(xelem) + } + } + return set, nil + } + } + + case syntax.CIRCUMFLEX: + switch x := x.(type) { + case Int: + if y, ok := y.(Int); ok { + return x.Xor(y), nil + } + case *Set: // symmetric difference + if y, ok := y.(*Set); ok { + set := new(Set) + for _, xelem := range x.elems() { + if found, _ := y.Has(xelem); !found { + set.Insert(xelem) + } + } + for _, yelem := range y.elems() { + if found, _ := x.Has(yelem); !found { + set.Insert(yelem) + } + } + return set, nil + } + } + + case syntax.LTLT, syntax.GTGT: + if x, ok := x.(Int); ok { + y, err := AsInt32(y) + if err != nil { + return nil, err + } + if y < 0 { + return nil, fmt.Errorf("negative shift count: %v", y) + } + if op == syntax.LTLT { + if y >= 512 { + return nil, fmt.Errorf("shift count too large: %v", y) + } + return x.Lsh(uint(y)), nil + } else { + return x.Rsh(uint(y)), nil + } + } + + default: + // unknown operator + goto unknown + } + + // user-defined types + // (nil, nil) => unhandled + if x, ok := x.(HasBinary); ok { + z, err := x.Binary(op, y, Left) + if z != nil || err != nil { + return z, err + } + } + if y, ok := y.(HasBinary); ok { + z, err := y.Binary(op, x, Right) + if z != nil || err != nil { + return z, err + } + } + + // unsupported operand types +unknown: + return nil, fmt.Errorf("unknown binary op: %s %s %s", x.Type(), op, y.Type()) +} + +// It's always possible to overeat in small bites but we'll +// try to stop someone swallowing the world in one gulp. +const maxAlloc = 1 << 30 + +func tupleRepeat(elems Tuple, n Int) (Tuple, error) { + if len(elems) == 0 { + return nil, nil + } + i, err := AsInt32(n) + if err != nil { + return nil, fmt.Errorf("repeat count %s too large", n) + } + if i < 1 { + return nil, nil + } + // Inv: i > 0, len > 0 + sz := len(elems) * i + if sz < 0 || sz >= maxAlloc { // sz < 0 => overflow + // Don't print sz. + return nil, fmt.Errorf("excessive repeat (%d * %d elements)", len(elems), i) + } + res := make([]Value, sz) + // copy elems into res, doubling each time + x := copy(res, elems) + for x < len(res) { + copy(res[x:], res[:x]) + x *= 2 + } + return res, nil +} + +func bytesRepeat(b Bytes, n Int) (Bytes, error) { + res, err := stringRepeat(String(b), n) + return Bytes(res), err +} + +func stringRepeat(s String, n Int) (String, error) { + if s == "" { + return "", nil + } + i, err := AsInt32(n) + if err != nil { + return "", fmt.Errorf("repeat count %s too large", n) + } + if i < 1 { + return "", nil + } + // Inv: i > 0, len > 0 + sz := len(s) * i + if sz < 0 || sz >= maxAlloc { // sz < 0 => overflow + // Don't print sz. + return "", fmt.Errorf("excessive repeat (%d * %d elements)", len(s), i) + } + return String(strings.Repeat(string(s), i)), nil +} + +// Call calls the function fn with the specified positional and keyword arguments. +func Call(thread *Thread, fn Value, args Tuple, kwargs []Tuple) (Value, error) { + c, ok := fn.(Callable) + if !ok { + return nil, fmt.Errorf("invalid call of non-function (%s)", fn.Type()) + } + + // Allocate and push a new frame. + var fr *frame + // Optimization: use slack portion of thread.stack + // slice as a freelist of empty frames. + if n := len(thread.stack); n < cap(thread.stack) { + fr = thread.stack[n : n+1][0] + } + if fr == nil { + fr = new(frame) + } + + if thread.stack == nil { + // one-time initialization of thread + if thread.maxSteps == 0 { + thread.maxSteps-- // (MaxUint64) + } + } + + thread.stack = append(thread.stack, fr) // push + + fr.callable = c + + thread.beginProfSpan() + result, err := c.CallInternal(thread, args, kwargs) + thread.endProfSpan() + + // Sanity check: nil is not a valid Starlark value. + if result == nil && err == nil { + err = fmt.Errorf("internal error: nil (not None) returned from %s", fn) + } + + // Always return an EvalError with an accurate frame. + if err != nil { + if _, ok := err.(*EvalError); !ok { + err = thread.evalError(err) + } + } + + *fr = frame{} // clear out any references + thread.stack = thread.stack[:len(thread.stack)-1] // pop + + return result, err +} + +func slice(x, lo, hi, step_ Value) (Value, error) { + sliceable, ok := x.(Sliceable) + if !ok { + return nil, fmt.Errorf("invalid slice operand %s", x.Type()) + } + + n := sliceable.Len() + step := 1 + if step_ != None { + var err error + step, err = AsInt32(step_) + if err != nil { + return nil, fmt.Errorf("invalid slice step: %s", err) + } + if step == 0 { + return nil, fmt.Errorf("zero is not a valid slice step") + } + } + + // TODO(adonovan): opt: preallocate result array. + + var start, end int + if step > 0 { + // positive stride + // default indices are [0:n]. + var err error + start, end, err = indices(lo, hi, n) + if err != nil { + return nil, err + } + + if end < start { + end = start // => empty result + } + } else { + // negative stride + // default indices are effectively [n-1:-1], though to + // get this effect using explicit indices requires + // [n-1:-1-n:-1] because of the treatment of -ve values. + start = n - 1 + if err := asIndex(lo, n, &start); err != nil { + return nil, fmt.Errorf("invalid start index: %s", err) + } + if start >= n { + start = n - 1 + } + + end = -1 + if err := asIndex(hi, n, &end); err != nil { + return nil, fmt.Errorf("invalid end index: %s", err) + } + if end < -1 { + end = -1 + } + + if start < end { + start = end // => empty result + } + } + + return sliceable.Slice(start, end, step), nil +} + +// From Hacker's Delight, section 2.8. +func signum64(x int64) int { return int(uint64(x>>63) | uint64(-x)>>63) } +func signum(x int) int { return signum64(int64(x)) } + +// indices converts start_ and end_ to indices in the range [0:len]. +// The start index defaults to 0 and the end index defaults to len. +// An index -len < i < 0 is treated like i+len. +// All other indices outside the range are clamped to the nearest value in the range. +// Beware: start may be greater than end. +// This function is suitable only for slices with positive strides. +func indices(start_, end_ Value, len int) (start, end int, err error) { + start = 0 + if err := asIndex(start_, len, &start); err != nil { + return 0, 0, fmt.Errorf("invalid start index: %s", err) + } + // Clamp to [0:len]. + if start < 0 { + start = 0 + } else if start > len { + start = len + } + + end = len + if err := asIndex(end_, len, &end); err != nil { + return 0, 0, fmt.Errorf("invalid end index: %s", err) + } + // Clamp to [0:len]. + if end < 0 { + end = 0 + } else if end > len { + end = len + } + + return start, end, nil +} + +// asIndex sets *result to the integer value of v, adding len to it +// if it is negative. If v is nil or None, *result is unchanged. +func asIndex(v Value, len int, result *int) error { + if v != nil && v != None { + var err error + *result, err = AsInt32(v) + if err != nil { + return err + } + if *result < 0 { + *result += len + } + } + return nil +} + +// setArgs sets the values of the formal parameters of function fn in +// based on the actual parameter values in args and kwargs. +func setArgs(locals []Value, fn *Function, args Tuple, kwargs []Tuple) error { + + // This is the general schema of a function: + // + // def f(p1, p2=dp2, p3=dp3, *args, k1, k2=dk2, k3, **kwargs) + // + // The p parameters are non-kwonly, and may be specified positionally. + // The k parameters are kwonly, and must be specified by name. + // The defaults tuple is (dp2, dp3, mandatory, dk2, mandatory). + // + // Arguments are processed as follows: + // - positional arguments are bound to a prefix of [p1, p2, p3]. + // - surplus positional arguments are bound to *args. + // - keyword arguments are bound to any of {p1, p2, p3, k1, k2, k3}; + // duplicate bindings are rejected. + // - surplus keyword arguments are bound to **kwargs. + // - defaults are bound to each parameter from p2 to k3 if no value was set. + // default values come from the tuple above. + // It is an error if the tuple entry for an unset parameter is 'mandatory'. + + // Nullary function? + if fn.NumParams() == 0 { + if nactual := len(args) + len(kwargs); nactual > 0 { + return fmt.Errorf("function %s accepts no arguments (%d given)", fn.Name(), nactual) + } + return nil + } + + cond := func(x bool, y, z interface{}) interface{} { + if x { + return y + } + return z + } + + // nparams is the number of ordinary parameters (sans *args and **kwargs). + nparams := fn.NumParams() + var kwdict *Dict + if fn.HasKwargs() { + nparams-- + kwdict = new(Dict) + locals[nparams] = kwdict + } + if fn.HasVarargs() { + nparams-- + } + + // nonkwonly is the number of non-kwonly parameters. + nonkwonly := nparams - fn.NumKwonlyParams() + + // Too many positional args? + n := len(args) + if len(args) > nonkwonly { + if !fn.HasVarargs() { + return fmt.Errorf("function %s accepts %s%d positional argument%s (%d given)", + fn.Name(), + cond(len(fn.defaults) > fn.NumKwonlyParams(), "at most ", ""), + nonkwonly, + cond(nonkwonly == 1, "", "s"), + len(args)) + } + n = nonkwonly + } + + // Bind positional arguments to non-kwonly parameters. + for i := 0; i < n; i++ { + locals[i] = args[i] + } + + // Bind surplus positional arguments to *args parameter. + if fn.HasVarargs() { + tuple := make(Tuple, len(args)-n) + for i := n; i < len(args); i++ { + tuple[i-n] = args[i] + } + locals[nparams] = tuple + } + + // Bind keyword arguments to parameters. + paramIdents := fn.funcode.Locals[:nparams] + for _, pair := range kwargs { + k, v := pair[0].(String), pair[1] + if i := findParam(paramIdents, string(k)); i >= 0 { + if locals[i] != nil { + return fmt.Errorf("function %s got multiple values for parameter %s", fn.Name(), k) + } + locals[i] = v + continue + } + if kwdict == nil { + return fmt.Errorf("function %s got an unexpected keyword argument %s", fn.Name(), k) + } + oldlen := kwdict.Len() + kwdict.SetKey(k, v) + if kwdict.Len() == oldlen { + return fmt.Errorf("function %s got multiple values for parameter %s", fn.Name(), k) + } + } + + // Are defaults required? + if n < nparams || fn.NumKwonlyParams() > 0 { + m := nparams - len(fn.defaults) // first default + + // Report errors for missing required arguments. + var missing []string + var i int + for i = n; i < m; i++ { + if locals[i] == nil { + missing = append(missing, paramIdents[i].Name) + } + } + + // Bind default values to parameters. + for ; i < nparams; i++ { + if locals[i] == nil { + dflt := fn.defaults[i-m] + if _, ok := dflt.(mandatory); ok { + missing = append(missing, paramIdents[i].Name) + continue + } + locals[i] = dflt + } + } + + if missing != nil { + return fmt.Errorf("function %s missing %d argument%s (%s)", + fn.Name(), len(missing), cond(len(missing) > 1, "s", ""), strings.Join(missing, ", ")) + } + } + return nil +} + +func findParam(params []compile.Binding, name string) int { + for i, param := range params { + if param.Name == name { + return i + } + } + return -1 +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string-interpolation +func interpolate(format string, x Value) (Value, error) { + buf := new(strings.Builder) + index := 0 + nargs := 1 + if tuple, ok := x.(Tuple); ok { + nargs = len(tuple) + } + for { + i := strings.IndexByte(format, '%') + if i < 0 { + buf.WriteString(format) + break + } + buf.WriteString(format[:i]) + format = format[i+1:] + + if format != "" && format[0] == '%' { + buf.WriteByte('%') + format = format[1:] + continue + } + + var arg Value + if format != "" && format[0] == '(' { + // keyword argument: %(name)s. + format = format[1:] + j := strings.IndexByte(format, ')') + if j < 0 { + return nil, fmt.Errorf("incomplete format key") + } + key := format[:j] + if dict, ok := x.(Mapping); !ok { + return nil, fmt.Errorf("format requires a mapping") + } else if v, found, _ := dict.Get(String(key)); found { + arg = v + } else { + return nil, fmt.Errorf("key not found: %s", key) + } + format = format[j+1:] + } else { + // positional argument: %s. + if index >= nargs { + return nil, fmt.Errorf("not enough arguments for format string") + } + if tuple, ok := x.(Tuple); ok { + arg = tuple[index] + } else { + arg = x + } + } + + // NOTE: Starlark does not support any of these optional Python features: + // - optional conversion flags: [#0- +], etc. + // - optional minimum field width (number or *). + // - optional precision (.123 or *) + // - optional length modifier + + // conversion type + if format == "" { + return nil, fmt.Errorf("incomplete format") + } + switch c := format[0]; c { + case 's', 'r': + if str, ok := AsString(arg); ok && c == 's' { + buf.WriteString(str) + } else { + writeValue(buf, arg, nil) + } + case 'd', 'i', 'o', 'x', 'X': + i, err := NumberToInt(arg) + if err != nil { + return nil, fmt.Errorf("%%%c format requires integer: %v", c, err) + } + switch c { + case 'd', 'i': + fmt.Fprintf(buf, "%d", i) + case 'o': + fmt.Fprintf(buf, "%o", i) + case 'x': + fmt.Fprintf(buf, "%x", i) + case 'X': + fmt.Fprintf(buf, "%X", i) + } + case 'e', 'f', 'g', 'E', 'F', 'G': + f, ok := AsFloat(arg) + if !ok { + return nil, fmt.Errorf("%%%c format requires float, not %s", c, arg.Type()) + } + Float(f).format(buf, c) + case 'c': + switch arg := arg.(type) { + case Int: + // chr(int) + r, err := AsInt32(arg) + if err != nil || r < 0 || r > unicode.MaxRune { + return nil, fmt.Errorf("%%c format requires a valid Unicode code point, got %s", arg) + } + buf.WriteRune(rune(r)) + case String: + r, size := utf8.DecodeRuneInString(string(arg)) + if size != len(arg) || len(arg) == 0 { + return nil, fmt.Errorf("%%c format requires a single-character string") + } + buf.WriteRune(r) + default: + return nil, fmt.Errorf("%%c format requires int or single-character string, not %s", arg.Type()) + } + case '%': + buf.WriteByte('%') + default: + return nil, fmt.Errorf("unknown conversion %%%c", c) + } + format = format[1:] + index++ + } + + if index < nargs { + return nil, fmt.Errorf("too many arguments for format string") + } + + return String(buf.String()), nil +} diff --git a/starlark/eval_test.go b/starlark/eval_test.go new file mode 100644 index 0000000..9752fe8 --- /dev/null +++ b/starlark/eval_test.go @@ -0,0 +1,945 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark_test + +import ( + "bytes" + "fmt" + "math" + "os/exec" + "path/filepath" + "reflect" + "sort" + "strings" + "testing" + + "go.starlark.net/internal/chunkedfile" + "go.starlark.net/resolve" + "go.starlark.net/starlark" + "go.starlark.net/starlarkjson" + "go.starlark.net/starlarkstruct" + "go.starlark.net/starlarktest" + "go.starlark.net/syntax" +) + +// A test may enable non-standard options by containing (e.g.) "option:recursion". +func setOptions(src string) { + resolve.AllowGlobalReassign = option(src, "globalreassign") + resolve.LoadBindsGlobally = option(src, "loadbindsglobally") + resolve.AllowRecursion = option(src, "recursion") + resolve.AllowSet = option(src, "set") +} + +func option(chunk, name string) bool { + return strings.Contains(chunk, "option:"+name) +} + +// Wrapper is the type of errors with an Unwrap method; see https://golang.org/pkg/errors. +type Wrapper interface { + Unwrap() error +} + +func TestEvalExpr(t *testing.T) { + // This is mostly redundant with the new *.star tests. + // TODO(adonovan): move checks into *.star files and + // reduce this to a mere unit test of starlark.Eval. + thread := new(starlark.Thread) + for _, test := range []struct{ src, want string }{ + {`123`, `123`}, + {`-1`, `-1`}, + {`"a"+"b"`, `"ab"`}, + {`1+2`, `3`}, + + // lists + {`[]`, `[]`}, + {`[1]`, `[1]`}, + {`[1,]`, `[1]`}, + {`[1, 2]`, `[1, 2]`}, + {`[2 * x for x in [1, 2, 3]]`, `[2, 4, 6]`}, + {`[2 * x for x in [1, 2, 3] if x > 1]`, `[4, 6]`}, + {`[(x, y) for x in [1, 2] for y in [3, 4]]`, + `[(1, 3), (1, 4), (2, 3), (2, 4)]`}, + {`[(x, y) for x in [1, 2] if x == 2 for y in [3, 4]]`, + `[(2, 3), (2, 4)]`}, + // tuples + {`()`, `()`}, + {`(1)`, `1`}, + {`(1,)`, `(1,)`}, + {`(1, 2)`, `(1, 2)`}, + {`(1, 2, 3, 4, 5)`, `(1, 2, 3, 4, 5)`}, + {`1, 2`, `(1, 2)`}, + // dicts + {`{}`, `{}`}, + {`{"a": 1}`, `{"a": 1}`}, + {`{"a": 1,}`, `{"a": 1}`}, + + // conditional + {`1 if 3 > 2 else 0`, `1`}, + {`1 if "foo" else 0`, `1`}, + {`1 if "" else 0`, `0`}, + + // indexing + {`["a", "b"][0]`, `"a"`}, + {`["a", "b"][1]`, `"b"`}, + {`("a", "b")[0]`, `"a"`}, + {`("a", "b")[1]`, `"b"`}, + {`"aΩb"[0]`, `"a"`}, + {`"aΩb"[1]`, `"\xce"`}, + {`"aΩb"[3]`, `"b"`}, + {`{"a": 1}["a"]`, `1`}, + {`{"a": 1}["b"]`, `key "b" not in dict`}, + {`{}[[]]`, `unhashable type: list`}, + {`{"a": 1}[[]]`, `unhashable type: list`}, + {`[x for x in range(3)]`, "[0, 1, 2]"}, + } { + var got string + if v, err := starlark.Eval(thread, "<expr>", test.src, nil); err != nil { + got = err.Error() + } else { + got = v.String() + } + if got != test.want { + t.Errorf("eval %s = %s, want %s", test.src, got, test.want) + } + } +} + +func TestExecFile(t *testing.T) { + defer setOptions("") + testdata := starlarktest.DataFile("starlark", ".") + thread := &starlark.Thread{Load: load} + starlarktest.SetReporter(thread, t) + for _, file := range []string{ + "testdata/assign.star", + "testdata/bool.star", + "testdata/builtins.star", + "testdata/bytes.star", + "testdata/control.star", + "testdata/dict.star", + "testdata/float.star", + "testdata/function.star", + "testdata/int.star", + "testdata/json.star", + "testdata/list.star", + "testdata/misc.star", + "testdata/set.star", + "testdata/string.star", + "testdata/tuple.star", + "testdata/recursion.star", + "testdata/module.star", + } { + filename := filepath.Join(testdata, file) + for _, chunk := range chunkedfile.Read(filename, t) { + predeclared := starlark.StringDict{ + "hasfields": starlark.NewBuiltin("hasfields", newHasFields), + "fibonacci": fib{}, + "struct": starlark.NewBuiltin("struct", starlarkstruct.Make), + } + + setOptions(chunk.Source) + resolve.AllowLambda = true // used extensively + + _, err := starlark.ExecFile(thread, filename, chunk.Source, predeclared) + switch err := err.(type) { + case *starlark.EvalError: + found := false + for i := range err.CallStack { + posn := err.CallStack.At(i).Pos + if posn.Filename() == filename { + chunk.GotError(int(posn.Line), err.Error()) + found = true + break + } + } + if !found { + t.Error(err.Backtrace()) + } + case nil: + // success + default: + t.Errorf("\n%s", err) + } + chunk.Done() + } + } +} + +// A fib is an iterable value representing the infinite Fibonacci sequence. +type fib struct{} + +func (t fib) Freeze() {} +func (t fib) String() string { return "fib" } +func (t fib) Type() string { return "fib" } +func (t fib) Truth() starlark.Bool { return true } +func (t fib) Hash() (uint32, error) { return 0, fmt.Errorf("fib is unhashable") } +func (t fib) Iterate() starlark.Iterator { return &fibIterator{0, 1} } + +type fibIterator struct{ x, y int } + +func (it *fibIterator) Next(p *starlark.Value) bool { + *p = starlark.MakeInt(it.x) + it.x, it.y = it.y, it.x+it.y + return true +} +func (it *fibIterator) Done() {} + +// load implements the 'load' operation as used in the evaluator tests. +func load(thread *starlark.Thread, module string) (starlark.StringDict, error) { + if module == "assert.star" { + return starlarktest.LoadAssertModule() + } + if module == "json.star" { + return starlark.StringDict{"json": starlarkjson.Module}, nil + } + + // TODO(adonovan): test load() using this execution path. + filename := filepath.Join(filepath.Dir(thread.CallFrame(0).Pos.Filename()), module) + return starlark.ExecFile(thread, filename, nil, nil) +} + +func newHasFields(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + if len(args)+len(kwargs) > 0 { + return nil, fmt.Errorf("%s: unexpected arguments", b.Name()) + } + return &hasfields{attrs: make(map[string]starlark.Value)}, nil +} + +// hasfields is a test-only implementation of HasAttrs. +// It permits any field to be set. +// Clients will likely want to provide their own implementation, +// so we don't have any public implementation. +type hasfields struct { + attrs starlark.StringDict + frozen bool +} + +var ( + _ starlark.HasAttrs = (*hasfields)(nil) + _ starlark.HasBinary = (*hasfields)(nil) +) + +func (hf *hasfields) String() string { return "hasfields" } +func (hf *hasfields) Type() string { return "hasfields" } +func (hf *hasfields) Truth() starlark.Bool { return true } +func (hf *hasfields) Hash() (uint32, error) { return 42, nil } + +func (hf *hasfields) Freeze() { + if !hf.frozen { + hf.frozen = true + for _, v := range hf.attrs { + v.Freeze() + } + } +} + +func (hf *hasfields) Attr(name string) (starlark.Value, error) { return hf.attrs[name], nil } + +func (hf *hasfields) SetField(name string, val starlark.Value) error { + if hf.frozen { + return fmt.Errorf("cannot set field on a frozen hasfields") + } + if strings.HasPrefix(name, "no") { // for testing + return starlark.NoSuchAttrError(fmt.Sprintf("no .%s field", name)) + } + hf.attrs[name] = val + return nil +} + +func (hf *hasfields) AttrNames() []string { + names := make([]string, 0, len(hf.attrs)) + for key := range hf.attrs { + names = append(names, key) + } + sort.Strings(names) + return names +} + +func (hf *hasfields) Binary(op syntax.Token, y starlark.Value, side starlark.Side) (starlark.Value, error) { + // This method exists so we can exercise 'list += x' + // where x is not Iterable but defines list+x. + if op == syntax.PLUS { + if _, ok := y.(*starlark.List); ok { + return starlark.MakeInt(42), nil // list+hasfields is 42 + } + } + return nil, nil +} + +func TestParameterPassing(t *testing.T) { + const filename = "parameters.go" + const src = ` +def a(): + return +def b(a, b): + return a, b +def c(a, b=42): + return a, b +def d(*args): + return args +def e(**kwargs): + return kwargs +def f(a, b=42, *args, **kwargs): + return a, b, args, kwargs +def g(a, b=42, *args, c=123, **kwargs): + return a, b, args, c, kwargs +def h(a, b=42, *, c=123, **kwargs): + return a, b, c, kwargs +def i(a, b=42, *, c, d=123, e, **kwargs): + return a, b, c, d, e, kwargs +def j(a, b=42, *args, c, d=123, e, **kwargs): + return a, b, args, c, d, e, kwargs +` + + thread := new(starlark.Thread) + globals, err := starlark.ExecFile(thread, filename, src, nil) + if err != nil { + t.Fatal(err) + } + + // All errors are dynamic; see resolver for static errors. + for _, test := range []struct{ src, want string }{ + // a() + {`a()`, `None`}, + {`a(1)`, `function a accepts no arguments (1 given)`}, + + // b(a, b) + {`b()`, `function b missing 2 arguments (a, b)`}, + {`b(1)`, `function b missing 1 argument (b)`}, + {`b(a=1)`, `function b missing 1 argument (b)`}, + {`b(b=1)`, `function b missing 1 argument (a)`}, + {`b(1, 2)`, `(1, 2)`}, + {`b`, `<function b>`}, // asserts that b's parameter b was treated as a local variable + {`b(1, 2, 3)`, `function b accepts 2 positional arguments (3 given)`}, + {`b(1, b=2)`, `(1, 2)`}, + {`b(1, a=2)`, `function b got multiple values for parameter "a"`}, + {`b(1, x=2)`, `function b got an unexpected keyword argument "x"`}, + {`b(a=1, b=2)`, `(1, 2)`}, + {`b(b=1, a=2)`, `(2, 1)`}, + {`b(b=1, a=2, x=1)`, `function b got an unexpected keyword argument "x"`}, + {`b(x=1, b=1, a=2)`, `function b got an unexpected keyword argument "x"`}, + + // c(a, b=42) + {`c()`, `function c missing 1 argument (a)`}, + {`c(1)`, `(1, 42)`}, + {`c(1, 2)`, `(1, 2)`}, + {`c(1, 2, 3)`, `function c accepts at most 2 positional arguments (3 given)`}, + {`c(1, b=2)`, `(1, 2)`}, + {`c(1, a=2)`, `function c got multiple values for parameter "a"`}, + {`c(a=1, b=2)`, `(1, 2)`}, + {`c(b=1, a=2)`, `(2, 1)`}, + + // d(*args) + {`d()`, `()`}, + {`d(1)`, `(1,)`}, + {`d(1, 2)`, `(1, 2)`}, + {`d(1, 2, k=3)`, `function d got an unexpected keyword argument "k"`}, + {`d(args=[])`, `function d got an unexpected keyword argument "args"`}, + + // e(**kwargs) + {`e()`, `{}`}, + {`e(1)`, `function e accepts 0 positional arguments (1 given)`}, + {`e(k=1)`, `{"k": 1}`}, + {`e(kwargs={})`, `{"kwargs": {}}`}, + + // f(a, b=42, *args, **kwargs) + {`f()`, `function f missing 1 argument (a)`}, + {`f(0)`, `(0, 42, (), {})`}, + {`f(0)`, `(0, 42, (), {})`}, + {`f(0, 1)`, `(0, 1, (), {})`}, + {`f(0, 1, 2)`, `(0, 1, (2,), {})`}, + {`f(0, 1, 2, 3)`, `(0, 1, (2, 3), {})`}, + {`f(a=0)`, `(0, 42, (), {})`}, + {`f(0, b=1)`, `(0, 1, (), {})`}, + {`f(0, a=1)`, `function f got multiple values for parameter "a"`}, + {`f(0, b=1, c=2)`, `(0, 1, (), {"c": 2})`}, + + // g(a, b=42, *args, c=123, **kwargs) + {`g()`, `function g missing 1 argument (a)`}, + {`g(0)`, `(0, 42, (), 123, {})`}, + {`g(0, 1)`, `(0, 1, (), 123, {})`}, + {`g(0, 1, 2)`, `(0, 1, (2,), 123, {})`}, + {`g(0, 1, 2, 3)`, `(0, 1, (2, 3), 123, {})`}, + {`g(a=0)`, `(0, 42, (), 123, {})`}, + {`g(0, b=1)`, `(0, 1, (), 123, {})`}, + {`g(0, a=1)`, `function g got multiple values for parameter "a"`}, + {`g(0, b=1, c=2, d=3)`, `(0, 1, (), 2, {"d": 3})`}, + + // h(a, b=42, *, c=123, **kwargs) + {`h()`, `function h missing 1 argument (a)`}, + {`h(0)`, `(0, 42, 123, {})`}, + {`h(0, 1)`, `(0, 1, 123, {})`}, + {`h(0, 1, 2)`, `function h accepts at most 2 positional arguments (3 given)`}, + {`h(a=0)`, `(0, 42, 123, {})`}, + {`h(0, b=1)`, `(0, 1, 123, {})`}, + {`h(0, a=1)`, `function h got multiple values for parameter "a"`}, + {`h(0, b=1, c=2)`, `(0, 1, 2, {})`}, + {`h(0, b=1, d=2)`, `(0, 1, 123, {"d": 2})`}, + {`h(0, b=1, c=2, d=3)`, `(0, 1, 2, {"d": 3})`}, + + // i(a, b=42, *, c, d=123, e, **kwargs) + {`i()`, `function i missing 3 arguments (a, c, e)`}, + {`i(0)`, `function i missing 2 arguments (c, e)`}, + {`i(0, 1)`, `function i missing 2 arguments (c, e)`}, + {`i(0, 1, 2)`, `function i accepts at most 2 positional arguments (3 given)`}, + {`i(0, 1, e=2)`, `function i missing 1 argument (c)`}, + {`i(0, 1, 2, 3)`, `function i accepts at most 2 positional arguments (4 given)`}, + {`i(a=0)`, `function i missing 2 arguments (c, e)`}, + {`i(0, b=1)`, `function i missing 2 arguments (c, e)`}, + {`i(0, a=1)`, `function i got multiple values for parameter "a"`}, + {`i(0, b=1, c=2)`, `function i missing 1 argument (e)`}, + {`i(0, b=1, d=2)`, `function i missing 2 arguments (c, e)`}, + {`i(0, b=1, c=2, d=3)`, `function i missing 1 argument (e)`}, + {`i(0, b=1, c=2, d=3, e=4)`, `(0, 1, 2, 3, 4, {})`}, + {`i(0, 1, b=1, c=2, d=3, e=4)`, `function i got multiple values for parameter "b"`}, + + // j(a, b=42, *args, c, d=123, e, **kwargs) + {`j()`, `function j missing 3 arguments (a, c, e)`}, + {`j(0)`, `function j missing 2 arguments (c, e)`}, + {`j(0, 1)`, `function j missing 2 arguments (c, e)`}, + {`j(0, 1, 2)`, `function j missing 2 arguments (c, e)`}, + {`j(0, 1, e=2)`, `function j missing 1 argument (c)`}, + {`j(0, 1, 2, 3)`, `function j missing 2 arguments (c, e)`}, + {`j(a=0)`, `function j missing 2 arguments (c, e)`}, + {`j(0, b=1)`, `function j missing 2 arguments (c, e)`}, + {`j(0, a=1)`, `function j got multiple values for parameter "a"`}, + {`j(0, b=1, c=2)`, `function j missing 1 argument (e)`}, + {`j(0, b=1, d=2)`, `function j missing 2 arguments (c, e)`}, + {`j(0, b=1, c=2, d=3)`, `function j missing 1 argument (e)`}, + {`j(0, b=1, c=2, d=3, e=4)`, `(0, 1, (), 2, 3, 4, {})`}, + {`j(0, 1, b=1, c=2, d=3, e=4)`, `function j got multiple values for parameter "b"`}, + {`j(0, 1, 2, c=3, e=4)`, `(0, 1, (2,), 3, 123, 4, {})`}, + } { + var got string + if v, err := starlark.Eval(thread, "<expr>", test.src, globals); err != nil { + got = err.Error() + } else { + got = v.String() + } + if got != test.want { + t.Errorf("eval %s = %s, want %s", test.src, got, test.want) + } + } +} + +// TestPrint ensures that the Starlark print function calls +// Thread.Print, if provided. +func TestPrint(t *testing.T) { + const src = ` +print("hello") +def f(): print("hello", "world", sep=", ") +f() +` + buf := new(bytes.Buffer) + print := func(thread *starlark.Thread, msg string) { + caller := thread.CallFrame(1) + fmt.Fprintf(buf, "%s: %s: %s\n", caller.Pos, caller.Name, msg) + } + thread := &starlark.Thread{Print: print} + if _, err := starlark.ExecFile(thread, "foo.star", src, nil); err != nil { + t.Fatal(err) + } + want := "foo.star:2:6: <toplevel>: hello\n" + + "foo.star:3:15: f: hello, world\n" + if got := buf.String(); got != want { + t.Errorf("output was %s, want %s", got, want) + } +} + +func reportEvalError(tb testing.TB, err error) { + if err, ok := err.(*starlark.EvalError); ok { + tb.Fatal(err.Backtrace()) + } + tb.Fatal(err) +} + +// TestInt exercises the Int.Int64 and Int.Uint64 methods. +// If we can move their logic into math/big, delete this test. +func TestInt(t *testing.T) { + one := starlark.MakeInt(1) + + for _, test := range []struct { + i starlark.Int + wantInt64 string + wantUint64 string + }{ + {starlark.MakeInt64(math.MinInt64).Sub(one), "error", "error"}, + {starlark.MakeInt64(math.MinInt64), "-9223372036854775808", "error"}, + {starlark.MakeInt64(-1), "-1", "error"}, + {starlark.MakeInt64(0), "0", "0"}, + {starlark.MakeInt64(1), "1", "1"}, + {starlark.MakeInt64(math.MaxInt64), "9223372036854775807", "9223372036854775807"}, + {starlark.MakeUint64(math.MaxUint64), "error", "18446744073709551615"}, + {starlark.MakeUint64(math.MaxUint64).Add(one), "error", "error"}, + } { + gotInt64, gotUint64 := "error", "error" + if i, ok := test.i.Int64(); ok { + gotInt64 = fmt.Sprint(i) + } + if u, ok := test.i.Uint64(); ok { + gotUint64 = fmt.Sprint(u) + } + if gotInt64 != test.wantInt64 { + t.Errorf("(%s).Int64() = %s, want %s", test.i, gotInt64, test.wantInt64) + } + if gotUint64 != test.wantUint64 { + t.Errorf("(%s).Uint64() = %s, want %s", test.i, gotUint64, test.wantUint64) + } + } +} + +func backtrace(t *testing.T, err error) string { + switch err := err.(type) { + case *starlark.EvalError: + return err.Backtrace() + case nil: + t.Fatalf("ExecFile succeeded unexpectedly") + default: + t.Fatalf("ExecFile failed with %v, wanted *EvalError", err) + } + panic("unreachable") +} + +func TestBacktrace(t *testing.T) { + // This test ensures continuity of the stack of active Starlark + // functions, including propagation through built-ins such as 'min'. + const src = ` +def f(x): return 1//x +def g(x): return f(x) +def h(): return min([1, 2, 0], key=g) +def i(): return h() +i() +` + thread := new(starlark.Thread) + _, err := starlark.ExecFile(thread, "crash.star", src, nil) + const want = `Traceback (most recent call last): + crash.star:6:2: in <toplevel> + crash.star:5:18: in i + crash.star:4:20: in h + <builtin>: in min + crash.star:3:19: in g + crash.star:2:19: in f +Error: floored division by zero` + if got := backtrace(t, err); got != want { + t.Errorf("error was %s, want %s", got, want) + } + + // Additionally, ensure that errors originating in + // Starlark and/or Go each have an accurate frame. + // The topmost frame, if built-in, is not shown, + // but the name of the built-in function is shown + // as "Error in fn: ...". + // + // This program fails in Starlark (f) if x==0, + // or in Go (string.join) if x is non-zero. + const src2 = ` +def f(): ''.join([1//i]) +f() +` + for i, want := range []string{ + 0: `Traceback (most recent call last): + crash.star:3:2: in <toplevel> + crash.star:2:20: in f +Error: floored division by zero`, + 1: `Traceback (most recent call last): + crash.star:3:2: in <toplevel> + crash.star:2:17: in f +Error in join: join: in list, want string, got int`, + } { + globals := starlark.StringDict{"i": starlark.MakeInt(i)} + _, err := starlark.ExecFile(thread, "crash.star", src2, globals) + if got := backtrace(t, err); got != want { + t.Errorf("error was %s, want %s", got, want) + } + } +} + +func TestLoadBacktrace(t *testing.T) { + // This test ensures that load() does NOT preserve stack traces, + // but that API callers can get them with Unwrap(). + // For discussion, see: + // https://github.com/google/starlark-go/pull/244 + const src = ` +load('crash.star', 'x') +` + const loadedSrc = ` +def f(x): + return 1 // x + +f(0) +` + thread := new(starlark.Thread) + thread.Load = func(t *starlark.Thread, module string) (starlark.StringDict, error) { + return starlark.ExecFile(new(starlark.Thread), module, loadedSrc, nil) + } + _, err := starlark.ExecFile(thread, "root.star", src, nil) + + const want = `Traceback (most recent call last): + root.star:2:1: in <toplevel> +Error: cannot load crash.star: floored division by zero` + if got := backtrace(t, err); got != want { + t.Errorf("error was %s, want %s", got, want) + } + + unwrapEvalError := func(err error) *starlark.EvalError { + var result *starlark.EvalError + for { + if evalErr, ok := err.(*starlark.EvalError); ok { + result = evalErr + } + + // TODO: use errors.Unwrap when go >=1.13 is everywhere. + wrapper, isWrapper := err.(Wrapper) + if !isWrapper { + break + } + err = wrapper.Unwrap() + } + return result + } + + unwrappedErr := unwrapEvalError(err) + const wantUnwrapped = `Traceback (most recent call last): + crash.star:5:2: in <toplevel> + crash.star:3:12: in f +Error: floored division by zero` + if got := backtrace(t, unwrappedErr); got != wantUnwrapped { + t.Errorf("error was %s, want %s", got, wantUnwrapped) + } + +} + +// TestRepeatedExec parses and resolves a file syntax tree once then +// executes it repeatedly with different values of its predeclared variables. +func TestRepeatedExec(t *testing.T) { + predeclared := starlark.StringDict{"x": starlark.None} + _, prog, err := starlark.SourceProgram("repeat.star", "y = 2 * x", predeclared.Has) + if err != nil { + t.Fatal(err) + } + + for _, test := range []struct { + x, want starlark.Value + }{ + {x: starlark.MakeInt(42), want: starlark.MakeInt(84)}, + {x: starlark.String("mur"), want: starlark.String("murmur")}, + {x: starlark.Tuple{starlark.None}, want: starlark.Tuple{starlark.None, starlark.None}}, + } { + predeclared["x"] = test.x // update the values in dictionary + thread := new(starlark.Thread) + if globals, err := prog.Init(thread, predeclared); err != nil { + t.Errorf("x=%v: %v", test.x, err) // exec error + } else if eq, err := starlark.Equal(globals["y"], test.want); err != nil { + t.Errorf("x=%v: %v", test.x, err) // comparison error + } else if !eq { + t.Errorf("x=%v: got y=%v, want %v", test.x, globals["y"], test.want) + } + } +} + +// TestEmptyFilePosition ensures that even Programs +// from empty files have a valid position. +func TestEmptyPosition(t *testing.T) { + var predeclared starlark.StringDict + for _, content := range []string{"", "empty = False"} { + _, prog, err := starlark.SourceProgram("hello.star", content, predeclared.Has) + if err != nil { + t.Fatal(err) + } + if got, want := prog.Filename(), "hello.star"; got != want { + t.Errorf("Program.Filename() = %q, want %q", got, want) + } + } +} + +// TestUnpackUserDefined tests that user-defined +// implementations of starlark.Value may be unpacked. +func TestUnpackUserDefined(t *testing.T) { + // success + want := new(hasfields) + var x *hasfields + if err := starlark.UnpackArgs("unpack", starlark.Tuple{want}, nil, "x", &x); err != nil { + t.Errorf("UnpackArgs failed: %v", err) + } + if x != want { + t.Errorf("for x, got %v, want %v", x, want) + } + + // failure + err := starlark.UnpackArgs("unpack", starlark.Tuple{starlark.MakeInt(42)}, nil, "x", &x) + if want := "unpack: for parameter x: got int, want hasfields"; fmt.Sprint(err) != want { + t.Errorf("unpack args error = %q, want %q", err, want) + } +} + +type optionalStringUnpacker struct { + str string + isSet bool +} + +func (o *optionalStringUnpacker) Unpack(v starlark.Value) error { + s, ok := starlark.AsString(v) + if !ok { + return fmt.Errorf("got %s, want string", v.Type()) + } + o.str = s + o.isSet = ok + return nil +} + +func TestUnpackCustomUnpacker(t *testing.T) { + a := optionalStringUnpacker{} + wantA := optionalStringUnpacker{str: "a", isSet: true} + b := optionalStringUnpacker{str: "b"} + wantB := optionalStringUnpacker{str: "b"} + + // Success + if err := starlark.UnpackArgs("unpack", starlark.Tuple{starlark.String("a")}, nil, "a?", &a, "b?", &b); err != nil { + t.Errorf("UnpackArgs failed: %v", err) + } + if a != wantA { + t.Errorf("for a, got %v, want %v", a, wantA) + } + if b != wantB { + t.Errorf("for b, got %v, want %v", b, wantB) + } + + // failure + err := starlark.UnpackArgs("unpack", starlark.Tuple{starlark.MakeInt(42)}, nil, "a", &a) + if want := "unpack: for parameter a: got int, want string"; fmt.Sprint(err) != want { + t.Errorf("unpack args error = %q, want %q", err, want) + } +} + +func TestAsInt(t *testing.T) { + for _, test := range []struct { + val starlark.Value + ptr interface{} + want string + }{ + {starlark.MakeInt(42), new(int32), "42"}, + {starlark.MakeInt(-1), new(int32), "-1"}, + // Use Lsh not 1<<40 as the latter exceeds int if GOARCH=386. + {starlark.MakeInt(1).Lsh(40), new(int32), "1099511627776 out of range (want value in signed 32-bit range)"}, + {starlark.MakeInt(-1).Lsh(40), new(int32), "-1099511627776 out of range (want value in signed 32-bit range)"}, + + {starlark.MakeInt(42), new(uint16), "42"}, + {starlark.MakeInt(0xffff), new(uint16), "65535"}, + {starlark.MakeInt(0x10000), new(uint16), "65536 out of range (want value in unsigned 16-bit range)"}, + {starlark.MakeInt(-1), new(uint16), "-1 out of range (want value in unsigned 16-bit range)"}, + } { + var got string + if err := starlark.AsInt(test.val, test.ptr); err != nil { + got = err.Error() + } else { + got = fmt.Sprint(reflect.ValueOf(test.ptr).Elem().Interface()) + } + if got != test.want { + t.Errorf("AsInt(%s, %T): got %q, want %q", test.val, test.ptr, got, test.want) + } + } +} + +func TestDocstring(t *testing.T) { + globals, _ := starlark.ExecFile(&starlark.Thread{}, "doc.star", ` +def somefunc(): + "somefunc doc" + return 0 +`, nil) + + if globals["somefunc"].(*starlark.Function).Doc() != "somefunc doc" { + t.Fatal("docstring not found") + } +} + +func TestFrameLocals(t *testing.T) { + // trace prints a nice stack trace including argument + // values of calls to Starlark functions. + trace := func(thread *starlark.Thread) string { + buf := new(bytes.Buffer) + for i := 0; i < thread.CallStackDepth(); i++ { + fr := thread.DebugFrame(i) + fmt.Fprintf(buf, "%s(", fr.Callable().Name()) + if fn, ok := fr.Callable().(*starlark.Function); ok { + for i := 0; i < fn.NumParams(); i++ { + if i > 0 { + buf.WriteString(", ") + } + name, _ := fn.Param(i) + fmt.Fprintf(buf, "%s=%s", name, fr.Local(i)) + } + } else { + buf.WriteString("...") // a built-in function + } + buf.WriteString(")\n") + } + return buf.String() + } + + var got string + builtin := func(thread *starlark.Thread, _ *starlark.Builtin, _ starlark.Tuple, _ []starlark.Tuple) (starlark.Value, error) { + got = trace(thread) + return starlark.None, nil + } + predeclared := starlark.StringDict{ + "builtin": starlark.NewBuiltin("builtin", builtin), + } + _, err := starlark.ExecFile(&starlark.Thread{}, "foo.star", ` +def f(x, y): builtin() +def g(z): f(z, z*z) +g(7) +`, predeclared) + if err != nil { + t.Errorf("ExecFile failed: %v", err) + } + + var want = ` +builtin(...) +f(x=7, y=49) +g(z=7) +<toplevel>() +`[1:] + if got != want { + t.Errorf("got <<%s>>, want <<%s>>", got, want) + } +} + +type badType string + +func (b *badType) String() string { return "badType" } +func (b *badType) Type() string { return "badType:" + string(*b) } // panics if b==nil +func (b *badType) Truth() starlark.Bool { return true } +func (b *badType) Hash() (uint32, error) { return 0, nil } +func (b *badType) Freeze() {} + +var _ starlark.Value = new(badType) + +// TestUnpackErrorBadType verifies that the Unpack functions fail +// gracefully when a parameter's default value's Type method panics. +func TestUnpackErrorBadType(t *testing.T) { + for _, test := range []struct { + x *badType + want string + }{ + {new(badType), "got NoneType, want badType"}, // Starlark type name + {nil, "got NoneType, want *starlark_test.badType"}, // Go type name + } { + err := starlark.UnpackArgs("f", starlark.Tuple{starlark.None}, nil, "x", &test.x) + if err == nil { + t.Errorf("UnpackArgs succeeded unexpectedly") + continue + } + if !strings.Contains(err.Error(), test.want) { + t.Errorf("UnpackArgs error %q does not contain %q", err, test.want) + } + } +} + +// Regression test for github.com/google/starlark-go/issues/233. +func TestREPLChunk(t *testing.T) { + thread := new(starlark.Thread) + globals := make(starlark.StringDict) + exec := func(src string) { + f, err := syntax.Parse("<repl>", src, 0) + if err != nil { + t.Fatal(err) + } + if err := starlark.ExecREPLChunk(f, thread, globals); err != nil { + t.Fatal(err) + } + } + + exec("x = 0; y = 0") + if got, want := fmt.Sprintf("%v %v", globals["x"], globals["y"]), "0 0"; got != want { + t.Fatalf("chunk1: got %s, want %s", got, want) + } + + exec("x += 1; y = y + 1") + if got, want := fmt.Sprintf("%v %v", globals["x"], globals["y"]), "1 1"; got != want { + t.Fatalf("chunk2: got %s, want %s", got, want) + } +} + +func TestCancel(t *testing.T) { + // A thread cancelled before it begins executes no code. + { + thread := new(starlark.Thread) + thread.Cancel("nope") + _, err := starlark.ExecFile(thread, "precancel.star", `x = 1//0`, nil) + if fmt.Sprint(err) != "Starlark computation cancelled: nope" { + t.Errorf("execution returned error %q, want cancellation", err) + } + + // cancellation is sticky + _, err = starlark.ExecFile(thread, "precancel.star", `x = 1//0`, nil) + if fmt.Sprint(err) != "Starlark computation cancelled: nope" { + t.Errorf("execution returned error %q, want cancellation", err) + } + } + // A thread cancelled during a built-in executes no more code. + { + thread := new(starlark.Thread) + predeclared := starlark.StringDict{ + "stopit": starlark.NewBuiltin("stopit", func(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + thread.Cancel(fmt.Sprint(args[0])) + return starlark.None, nil + }), + } + _, err := starlark.ExecFile(thread, "stopit.star", `msg = 'nope'; stopit(msg); x = 1//0`, predeclared) + if fmt.Sprint(err) != `Starlark computation cancelled: "nope"` { + t.Errorf("execution returned error %q, want cancellation", err) + } + } +} + +func TestExecutionSteps(t *testing.T) { + // A Thread records the number of computation steps. + thread := new(starlark.Thread) + countSteps := func(n int) (uint64, error) { + predeclared := starlark.StringDict{"n": starlark.MakeInt(n)} + steps0 := thread.ExecutionSteps() + _, err := starlark.ExecFile(thread, "steps.star", `squares = [x*x for x in range(n)]`, predeclared) + return thread.ExecutionSteps() - steps0, err + } + steps100, err := countSteps(1000) + if err != nil { + t.Errorf("execution failed: %v", err) + } + steps10000, err := countSteps(100000) + if err != nil { + t.Errorf("execution failed: %v", err) + } + if ratio := float64(steps10000) / float64(steps100); ratio < 99 || ratio > 101 { + t.Errorf("computation steps did not increase linearly: f(100)=%d, f(10000)=%d, ratio=%g, want ~100", steps100, steps10000, ratio) + } + + // Exceeding the step limit causes cancellation. + thread.SetMaxExecutionSteps(1000) + _, err = countSteps(1000) + if fmt.Sprint(err) != "Starlark computation cancelled: too many steps" { + t.Errorf("execution returned error %q, want cancellation", err) + } +} + +// TestDeps fails if the interpreter proper (not the REPL, etc) sprouts new external dependencies. +// We may expand the list of permitted dependencies, but should do so deliberately, not casually. +func TestDeps(t *testing.T) { + cmd := exec.Command("go", "list", "-deps") + out, err := cmd.Output() + if err != nil { + t.Skipf("'go list' failed: %s", err) + } + for _, pkg := range strings.Split(string(out), "\n") { + // Does pkg have form "domain.name/dir"? + slash := strings.IndexByte(pkg, '/') + dot := strings.IndexByte(pkg, '.') + if 0 < dot && dot < slash { + if strings.HasPrefix(pkg, "go.starlark.net/") || + strings.HasPrefix(pkg, "golang.org/x/sys/") { + continue // permitted dependencies + } + t.Errorf("new interpreter dependency: %s", pkg) + } + } +} diff --git a/starlark/example_test.go b/starlark/example_test.go new file mode 100644 index 0000000..5feca38 --- /dev/null +++ b/starlark/example_test.go @@ -0,0 +1,322 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark_test + +import ( + "fmt" + "log" + "reflect" + "sort" + "strings" + "sync" + "sync/atomic" + "testing" + "unsafe" + + "go.starlark.net/starlark" +) + +// ExampleExecFile demonstrates a simple embedding +// of the Starlark interpreter into a Go program. +func ExampleExecFile() { + const data = ` +print(greeting + ", world") +print(repeat("one")) +print(repeat("mur", 2)) +squares = [x*x for x in range(10)] +` + + // repeat(str, n=1) is a Go function called from Starlark. + // It behaves like the 'string * int' operation. + repeat := func(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var s string + var n int = 1 + if err := starlark.UnpackArgs(b.Name(), args, kwargs, "s", &s, "n?", &n); err != nil { + return nil, err + } + return starlark.String(strings.Repeat(s, n)), nil + } + + // The Thread defines the behavior of the built-in 'print' function. + thread := &starlark.Thread{ + Name: "example", + Print: func(_ *starlark.Thread, msg string) { fmt.Println(msg) }, + } + + // This dictionary defines the pre-declared environment. + predeclared := starlark.StringDict{ + "greeting": starlark.String("hello"), + "repeat": starlark.NewBuiltin("repeat", repeat), + } + + // Execute a program. + globals, err := starlark.ExecFile(thread, "apparent/filename.star", data, predeclared) + if err != nil { + if evalErr, ok := err.(*starlark.EvalError); ok { + log.Fatal(evalErr.Backtrace()) + } + log.Fatal(err) + } + + // Print the global environment. + fmt.Println("\nGlobals:") + for _, name := range globals.Keys() { + v := globals[name] + fmt.Printf("%s (%s) = %s\n", name, v.Type(), v.String()) + } + + // Output: + // hello, world + // one + // murmur + // + // Globals: + // squares (list) = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] +} + +// ExampleThread_Load_sequential demonstrates a simple caching +// implementation of 'load' that works sequentially. +func ExampleThread_Load_sequential() { + fakeFilesystem := map[string]string{ + "c.star": `load("b.star", "b"); c = b + "!"`, + "b.star": `load("a.star", "a"); b = a + ", world"`, + "a.star": `a = "Hello"`, + } + + type entry struct { + globals starlark.StringDict + err error + } + + cache := make(map[string]*entry) + + var load func(_ *starlark.Thread, module string) (starlark.StringDict, error) + load = func(_ *starlark.Thread, module string) (starlark.StringDict, error) { + e, ok := cache[module] + if e == nil { + if ok { + // request for package whose loading is in progress + return nil, fmt.Errorf("cycle in load graph") + } + + // Add a placeholder to indicate "load in progress". + cache[module] = nil + + // Load and initialize the module in a new thread. + data := fakeFilesystem[module] + thread := &starlark.Thread{Name: "exec " + module, Load: load} + globals, err := starlark.ExecFile(thread, module, data, nil) + e = &entry{globals, err} + + // Update the cache. + cache[module] = e + } + return e.globals, e.err + } + + globals, err := load(nil, "c.star") + if err != nil { + log.Fatal(err) + } + fmt.Println(globals["c"]) + + // Output: + // "Hello, world!" +} + +// ExampleThread_Load_parallel demonstrates a parallel implementation +// of 'load' with caching, duplicate suppression, and cycle detection. +func ExampleThread_Load_parallel() { + cache := &cache{ + cache: make(map[string]*entry), + fakeFilesystem: map[string]string{ + "c.star": `load("a.star", "a"); c = a * 2`, + "b.star": `load("a.star", "a"); b = a * 3`, + "a.star": `a = 1; print("loaded a")`, + }, + } + + // We load modules b and c in parallel by concurrent calls to + // cache.Load. Both of them load module a, but a is executed + // only once, as witnessed by the sole output of its print + // statement. + + ch := make(chan string) + for _, name := range []string{"b", "c"} { + go func(name string) { + globals, err := cache.Load(name + ".star") + if err != nil { + log.Fatal(err) + } + ch <- fmt.Sprintf("%s = %s", name, globals[name]) + }(name) + } + got := []string{<-ch, <-ch} + sort.Strings(got) + fmt.Println(strings.Join(got, "\n")) + + // Output: + // loaded a + // b = 3 + // c = 2 +} + +// TestThread_Load_parallelCycle demonstrates detection +// of cycles during parallel loading. +func TestThreadLoad_ParallelCycle(t *testing.T) { + cache := &cache{ + cache: make(map[string]*entry), + fakeFilesystem: map[string]string{ + "c.star": `load("b.star", "b"); c = b * 2`, + "b.star": `load("a.star", "a"); b = a * 3`, + "a.star": `load("c.star", "c"); a = c * 5; print("loaded a")`, + }, + } + + ch := make(chan string) + for _, name := range "bc" { + name := string(name) + go func() { + _, err := cache.Load(name + ".star") + if err == nil { + log.Fatalf("Load of %s.star succeeded unexpectedly", name) + } + ch <- err.Error() + }() + } + got := []string{<-ch, <-ch} + sort.Strings(got) + + // Typically, the c goroutine quickly blocks behind b; + // b loads a, and a then fails to load c because it forms a cycle. + // The errors observed by the two goroutines are: + want1 := []string{ + "cannot load a.star: cannot load c.star: cycle in load graph", // from b + "cannot load b.star: cannot load a.star: cannot load c.star: cycle in load graph", // from c + } + // But if the c goroutine is slow to start, b loads a, + // and a loads c; then c fails to load b because it forms a cycle. + // The errors this time are: + want2 := []string{ + "cannot load a.star: cannot load c.star: cannot load b.star: cycle in load graph", // from b + "cannot load b.star: cycle in load graph", // from c + } + if !reflect.DeepEqual(got, want1) && !reflect.DeepEqual(got, want2) { + t.Error(got) + } +} + +// cache is a concurrency-safe, duplicate-suppressing, +// non-blocking cache of the doLoad function. +// See Section 9.7 of gopl.io for an explanation of this structure. +// It also features online deadlock (load cycle) detection. +type cache struct { + cacheMu sync.Mutex + cache map[string]*entry + + fakeFilesystem map[string]string +} + +type entry struct { + owner unsafe.Pointer // a *cycleChecker; see cycleCheck + globals starlark.StringDict + err error + ready chan struct{} +} + +func (c *cache) Load(module string) (starlark.StringDict, error) { + return c.get(new(cycleChecker), module) +} + +// get loads and returns an entry (if not already loaded). +func (c *cache) get(cc *cycleChecker, module string) (starlark.StringDict, error) { + c.cacheMu.Lock() + e := c.cache[module] + if e != nil { + c.cacheMu.Unlock() + // Some other goroutine is getting this module. + // Wait for it to become ready. + + // Detect load cycles to avoid deadlocks. + if err := cycleCheck(e, cc); err != nil { + return nil, err + } + + cc.setWaitsFor(e) + <-e.ready + cc.setWaitsFor(nil) + } else { + // First request for this module. + e = &entry{ready: make(chan struct{})} + c.cache[module] = e + c.cacheMu.Unlock() + + e.setOwner(cc) + e.globals, e.err = c.doLoad(cc, module) + e.setOwner(nil) + + // Broadcast that the entry is now ready. + close(e.ready) + } + return e.globals, e.err +} + +func (c *cache) doLoad(cc *cycleChecker, module string) (starlark.StringDict, error) { + thread := &starlark.Thread{ + Name: "exec " + module, + Print: func(_ *starlark.Thread, msg string) { fmt.Println(msg) }, + Load: func(_ *starlark.Thread, module string) (starlark.StringDict, error) { + // Tunnel the cycle-checker state for this "thread of loading". + return c.get(cc, module) + }, + } + data := c.fakeFilesystem[module] + return starlark.ExecFile(thread, module, data, nil) +} + +// -- concurrent cycle checking -- + +// A cycleChecker is used for concurrent deadlock detection. +// Each top-level call to Load creates its own cycleChecker, +// which is passed to all recursive calls it makes. +// It corresponds to a logical thread in the deadlock detection literature. +type cycleChecker struct { + waitsFor unsafe.Pointer // an *entry; see cycleCheck +} + +func (cc *cycleChecker) setWaitsFor(e *entry) { + atomic.StorePointer(&cc.waitsFor, unsafe.Pointer(e)) +} + +func (e *entry) setOwner(cc *cycleChecker) { + atomic.StorePointer(&e.owner, unsafe.Pointer(cc)) +} + +// cycleCheck reports whether there is a path in the waits-for graph +// from resource 'e' to thread 'me'. +// +// The waits-for graph (WFG) is a bipartite graph whose nodes are +// alternately of type entry and cycleChecker. Each node has at most +// one outgoing edge. An entry has an "owner" edge to a cycleChecker +// while it is being readied by that cycleChecker, and a cycleChecker +// has a "waits-for" edge to an entry while it is waiting for that entry +// to become ready. +// +// Before adding a waits-for edge, the cache checks whether the new edge +// would form a cycle. If so, this indicates that the load graph is +// cyclic and that the following wait operation would deadlock. +func cycleCheck(e *entry, me *cycleChecker) error { + for e != nil { + cc := (*cycleChecker)(atomic.LoadPointer(&e.owner)) + if cc == nil { + break + } + if cc == me { + return fmt.Errorf("cycle in load graph") + } + e = (*entry)(atomic.LoadPointer(&cc.waitsFor)) + } + return nil +} diff --git a/starlark/hashtable.go b/starlark/hashtable.go new file mode 100644 index 0000000..27990b5 --- /dev/null +++ b/starlark/hashtable.go @@ -0,0 +1,373 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +import ( + "fmt" + _ "unsafe" // for go:linkname hack +) + +// hashtable is used to represent Starlark dict and set values. +// It is a hash table whose key/value entries form a doubly-linked list +// in the order the entries were inserted. +type hashtable struct { + table []bucket // len is zero or a power of two + bucket0 [1]bucket // inline allocation for small maps. + len uint32 + itercount uint32 // number of active iterators (ignored if frozen) + head *entry // insertion order doubly-linked list; may be nil + tailLink **entry // address of nil link at end of list (perhaps &head) + frozen bool +} + +const bucketSize = 8 + +type bucket struct { + entries [bucketSize]entry + next *bucket // linked list of buckets +} + +type entry struct { + hash uint32 // nonzero => in use + key, value Value + next *entry // insertion order doubly-linked list; may be nil + prevLink **entry // address of link to this entry (perhaps &head) +} + +func (ht *hashtable) init(size int) { + if size < 0 { + panic("size < 0") + } + nb := 1 + for overloaded(size, nb) { + nb = nb << 1 + } + if nb < 2 { + ht.table = ht.bucket0[:1] + } else { + ht.table = make([]bucket, nb) + } + ht.tailLink = &ht.head +} + +func (ht *hashtable) freeze() { + if !ht.frozen { + ht.frozen = true + for i := range ht.table { + for p := &ht.table[i]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash != 0 { + e.key.Freeze() + e.value.Freeze() + } + } + } + } + } +} + +func (ht *hashtable) insert(k, v Value) error { + if ht.frozen { + return fmt.Errorf("cannot insert into frozen hash table") + } + if ht.itercount > 0 { + return fmt.Errorf("cannot insert into hash table during iteration") + } + if ht.table == nil { + ht.init(1) + } + h, err := k.Hash() + if err != nil { + return err + } + if h == 0 { + h = 1 // zero is reserved + } + +retry: + var insert *entry + + // Inspect each bucket in the bucket list. + p := &ht.table[h&(uint32(len(ht.table)-1))] + for { + for i := range p.entries { + e := &p.entries[i] + if e.hash != h { + if e.hash == 0 { + // Found empty entry; make a note. + insert = e + } + continue + } + if eq, err := Equal(k, e.key); err != nil { + return err // e.g. excessively recursive tuple + } else if !eq { + continue + } + // Key already present; update value. + e.value = v + return nil + } + if p.next == nil { + break + } + p = p.next + } + + // Key not found. p points to the last bucket. + + // Does the number of elements exceed the buckets' load factor? + if overloaded(int(ht.len), len(ht.table)) { + ht.grow() + goto retry + } + + if insert == nil { + // No space in existing buckets. Add a new one to the bucket list. + b := new(bucket) + p.next = b + insert = &b.entries[0] + } + + // Insert key/value pair. + insert.hash = h + insert.key = k + insert.value = v + + // Append entry to doubly-linked list. + insert.prevLink = ht.tailLink + *ht.tailLink = insert + ht.tailLink = &insert.next + + ht.len++ + + return nil +} + +func overloaded(elems, buckets int) bool { + const loadFactor = 6.5 // just a guess + return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) +} + +func (ht *hashtable) grow() { + // Double the number of buckets and rehash. + // TODO(adonovan): opt: + // - avoid reentrant calls to ht.insert, and specialize it. + // e.g. we know the calls to Equals will return false since + // there are no duplicates among the old keys. + // - saving the entire hash in the bucket would avoid the need to + // recompute the hash. + // - save the old buckets on a free list. + ht.table = make([]bucket, len(ht.table)<<1) + oldhead := ht.head + ht.head = nil + ht.tailLink = &ht.head + ht.len = 0 + for e := oldhead; e != nil; e = e.next { + ht.insert(e.key, e.value) + } + ht.bucket0[0] = bucket{} // clear out unused initial bucket +} + +func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { + h, err := k.Hash() + if err != nil { + return nil, false, err // unhashable + } + if h == 0 { + h = 1 // zero is reserved + } + if ht.table == nil { + return None, false, nil // empty + } + + // Inspect each bucket in the bucket list. + for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash == h { + if eq, err := Equal(k, e.key); err != nil { + return nil, false, err // e.g. excessively recursive tuple + } else if eq { + return e.value, true, nil // found + } + } + } + } + return None, false, nil // not found +} + +// Items returns all the items in the map (as key/value pairs) in insertion order. +func (ht *hashtable) items() []Tuple { + items := make([]Tuple, 0, ht.len) + array := make([]Value, ht.len*2) // allocate a single backing array + for e := ht.head; e != nil; e = e.next { + pair := Tuple(array[:2:2]) + array = array[2:] + pair[0] = e.key + pair[1] = e.value + items = append(items, pair) + } + return items +} + +func (ht *hashtable) first() (Value, bool) { + if ht.head != nil { + return ht.head.key, true + } + return None, false +} + +func (ht *hashtable) keys() []Value { + keys := make([]Value, 0, ht.len) + for e := ht.head; e != nil; e = e.next { + keys = append(keys, e.key) + } + return keys +} + +func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { + if ht.frozen { + return nil, false, fmt.Errorf("cannot delete from frozen hash table") + } + if ht.itercount > 0 { + return nil, false, fmt.Errorf("cannot delete from hash table during iteration") + } + if ht.table == nil { + return None, false, nil // empty + } + h, err := k.Hash() + if err != nil { + return nil, false, err // unhashable + } + if h == 0 { + h = 1 // zero is reserved + } + + // Inspect each bucket in the bucket list. + for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash == h { + if eq, err := Equal(k, e.key); err != nil { + return nil, false, err + } else if eq { + // Remove e from doubly-linked list. + *e.prevLink = e.next + if e.next == nil { + ht.tailLink = e.prevLink // deletion of last entry + } else { + e.next.prevLink = e.prevLink + } + + v := e.value + *e = entry{} + ht.len-- + return v, true, nil // found + } + } + } + } + + // TODO(adonovan): opt: remove completely empty bucket from bucket list. + + return None, false, nil // not found +} + +func (ht *hashtable) clear() error { + if ht.frozen { + return fmt.Errorf("cannot clear frozen hash table") + } + if ht.itercount > 0 { + return fmt.Errorf("cannot clear hash table during iteration") + } + if ht.table != nil { + for i := range ht.table { + ht.table[i] = bucket{} + } + } + ht.head = nil + ht.tailLink = &ht.head + ht.len = 0 + return nil +} + +// dump is provided as an aid to debugging. +func (ht *hashtable) dump() { + fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", + ht, ht.len, ht.head, ht.tailLink) + if ht.tailLink != nil { + fmt.Printf(" *tailLink=%p", *ht.tailLink) + } + fmt.Println() + for j := range ht.table { + fmt.Printf("bucket chain %d\n", j) + for p := &ht.table[j]; p != nil; p = p.next { + fmt.Printf("bucket %p\n", p) + for i := range p.entries { + e := &p.entries[i] + fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", + i, e, e.hash, e.key, e.value) + fmt.Printf("\t\tnext=%p &next=%p prev=%p", + e.next, &e.next, e.prevLink) + if e.prevLink != nil { + fmt.Printf(" *prev=%p", *e.prevLink) + } + fmt.Println() + } + } + } +} + +func (ht *hashtable) iterate() *keyIterator { + if !ht.frozen { + ht.itercount++ + } + return &keyIterator{ht: ht, e: ht.head} +} + +type keyIterator struct { + ht *hashtable + e *entry +} + +func (it *keyIterator) Next(k *Value) bool { + if it.e != nil { + *k = it.e.key + it.e = it.e.next + return true + } + return false +} + +func (it *keyIterator) Done() { + if !it.ht.frozen { + it.ht.itercount-- + } +} + +// hashString computes the hash of s. +func hashString(s string) uint32 { + if len(s) >= 12 { + // Call the Go runtime's optimized hash implementation, + // which uses the AESENC instruction on amd64 machines. + return uint32(goStringHash(s, 0)) + } + return softHashString(s) +} + +//go:linkname goStringHash runtime.stringHash +func goStringHash(s string, seed uintptr) uintptr + +// softHashString computes the 32-bit FNV-1a hash of s in software. +func softHashString(s string) uint32 { + var h uint32 = 2166136261 + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} diff --git a/starlark/hashtable_test.go b/starlark/hashtable_test.go new file mode 100644 index 0000000..3649f14 --- /dev/null +++ b/starlark/hashtable_test.go @@ -0,0 +1,125 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +import ( + "fmt" + "math/rand" + "sync" + "testing" +) + +func TestHashtable(t *testing.T) { + makeTestIntsOnce.Do(makeTestInts) + testHashtable(t, make(map[int]bool)) +} + +func BenchmarkStringHash(b *testing.B) { + for len := 1; len <= 1024; len *= 2 { + buf := make([]byte, len) + rand.New(rand.NewSource(0)).Read(buf) + s := string(buf) + + b.Run(fmt.Sprintf("hard-%d", len), func(b *testing.B) { + for i := 0; i < b.N; i++ { + hashString(s) + } + }) + b.Run(fmt.Sprintf("soft-%d", len), func(b *testing.B) { + for i := 0; i < b.N; i++ { + softHashString(s) + } + }) + } +} + +func BenchmarkHashtable(b *testing.B) { + makeTestIntsOnce.Do(makeTestInts) + b.ResetTimer() + for i := 0; i < b.N; i++ { + testHashtable(b, nil) + } +} + +const testIters = 10000 + +var ( + // testInts is a zipf-distributed array of Ints and corresponding ints. + // This removes the cost of generating them on the fly during benchmarking. + // Without this, Zipf and MakeInt dominate CPU and memory costs, respectively. + makeTestIntsOnce sync.Once + testInts [3 * testIters]struct { + Int Int + goInt int + } +) + +func makeTestInts() { + zipf := rand.NewZipf(rand.New(rand.NewSource(0)), 1.1, 1.0, 1000.0) + for i := range &testInts { + r := int(zipf.Uint64()) + testInts[i].goInt = r + testInts[i].Int = MakeInt(r) + } +} + +// testHashtable is both a test and a benchmark of hashtable. +// When sane != nil, it acts as a test against the semantics of Go's map. +func testHashtable(tb testing.TB, sane map[int]bool) { + var i int // index into testInts + + var ht hashtable + + // Insert 10000 random ints into the map. + for j := 0; j < testIters; j++ { + k := testInts[i] + i++ + if err := ht.insert(k.Int, None); err != nil { + tb.Fatal(err) + } + if sane != nil { + sane[k.goInt] = true + } + } + + // Do 10000 random lookups in the map. + for j := 0; j < testIters; j++ { + k := testInts[i] + i++ + _, found, err := ht.lookup(k.Int) + if err != nil { + tb.Fatal(err) + } + if sane != nil { + _, found2 := sane[k.goInt] + if found != found2 { + tb.Fatal("sanity check failed") + } + } + } + + // Do 10000 random deletes from the map. + for j := 0; j < testIters; j++ { + k := testInts[i] + i++ + _, found, err := ht.delete(k.Int) + if err != nil { + tb.Fatal(err) + } + if sane != nil { + _, found2 := sane[k.goInt] + if found != found2 { + tb.Fatal("sanity check failed") + } + delete(sane, k.goInt) + } + } + + if sane != nil { + if int(ht.len) != len(sane) { + tb.Fatal("sanity check failed") + } + } +} diff --git a/starlark/int.go b/starlark/int.go new file mode 100644 index 0000000..9ee46f9 --- /dev/null +++ b/starlark/int.go @@ -0,0 +1,436 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +import ( + "fmt" + "math" + "math/big" + "reflect" + "strconv" + + "go.starlark.net/syntax" +) + +// Int is the type of a Starlark int. +// +// The zero value is not a legal value; use MakeInt(0). +type Int struct{ impl intImpl } + +// --- high-level accessors --- + +// MakeInt returns a Starlark int for the specified signed integer. +func MakeInt(x int) Int { return MakeInt64(int64(x)) } + +// MakeInt64 returns a Starlark int for the specified int64. +func MakeInt64(x int64) Int { + if math.MinInt32 <= x && x <= math.MaxInt32 { + return makeSmallInt(x) + } + return makeBigInt(big.NewInt(x)) +} + +// MakeUint returns a Starlark int for the specified unsigned integer. +func MakeUint(x uint) Int { return MakeUint64(uint64(x)) } + +// MakeUint64 returns a Starlark int for the specified uint64. +func MakeUint64(x uint64) Int { + if x <= math.MaxInt32 { + return makeSmallInt(int64(x)) + } + return makeBigInt(new(big.Int).SetUint64(x)) +} + +// MakeBigInt returns a Starlark int for the specified big.Int. +// The new Int value will contain a copy of x. The caller is safe to modify x. +func MakeBigInt(x *big.Int) Int { + if n := x.BitLen(); n < 32 || n == 32 && x.Int64() == math.MinInt32 { + return makeSmallInt(x.Int64()) + } + z := new(big.Int).Set(x) + return makeBigInt(z) +} + +var ( + zero, one = makeSmallInt(0), makeSmallInt(1) + oneBig = big.NewInt(1) + + _ HasUnary = Int{} +) + +// Unary implements the operations +int, -int, and ~int. +func (i Int) Unary(op syntax.Token) (Value, error) { + switch op { + case syntax.MINUS: + return zero.Sub(i), nil + case syntax.PLUS: + return i, nil + case syntax.TILDE: + return i.Not(), nil + } + return nil, nil +} + +// Int64 returns the value as an int64. +// If it is not exactly representable the result is undefined and ok is false. +func (i Int) Int64() (_ int64, ok bool) { + iSmall, iBig := i.get() + if iBig != nil { + x, acc := bigintToInt64(iBig) + if acc != big.Exact { + return // inexact + } + return x, true + } + return iSmall, true +} + +// BigInt returns a new big.Int with the same value as the Int. +func (i Int) BigInt() *big.Int { + iSmall, iBig := i.get() + if iBig != nil { + return new(big.Int).Set(iBig) + } + return big.NewInt(iSmall) +} + +// bigInt returns the value as a big.Int. +// It differs from BigInt in that this method returns the actual +// reference and any modification will change the state of i. +func (i Int) bigInt() *big.Int { + iSmall, iBig := i.get() + if iBig != nil { + return iBig + } + return big.NewInt(iSmall) +} + +// Uint64 returns the value as a uint64. +// If it is not exactly representable the result is undefined and ok is false. +func (i Int) Uint64() (_ uint64, ok bool) { + iSmall, iBig := i.get() + if iBig != nil { + x, acc := bigintToUint64(iBig) + if acc != big.Exact { + return // inexact + } + return x, true + } + if iSmall < 0 { + return // inexact + } + return uint64(iSmall), true +} + +// The math/big API should provide this function. +func bigintToInt64(i *big.Int) (int64, big.Accuracy) { + sign := i.Sign() + if sign > 0 { + if i.Cmp(maxint64) > 0 { + return math.MaxInt64, big.Below + } + } else if sign < 0 { + if i.Cmp(minint64) < 0 { + return math.MinInt64, big.Above + } + } + return i.Int64(), big.Exact +} + +// The math/big API should provide this function. +func bigintToUint64(i *big.Int) (uint64, big.Accuracy) { + sign := i.Sign() + if sign > 0 { + if i.BitLen() > 64 { + return math.MaxUint64, big.Below + } + } else if sign < 0 { + return 0, big.Above + } + return i.Uint64(), big.Exact +} + +var ( + minint64 = new(big.Int).SetInt64(math.MinInt64) + maxint64 = new(big.Int).SetInt64(math.MaxInt64) +) + +func (i Int) Format(s fmt.State, ch rune) { + iSmall, iBig := i.get() + if iBig != nil { + iBig.Format(s, ch) + return + } + big.NewInt(iSmall).Format(s, ch) +} +func (i Int) String() string { + iSmall, iBig := i.get() + if iBig != nil { + return iBig.Text(10) + } + return strconv.FormatInt(iSmall, 10) +} +func (i Int) Type() string { return "int" } +func (i Int) Freeze() {} // immutable +func (i Int) Truth() Bool { return i.Sign() != 0 } +func (i Int) Hash() (uint32, error) { + iSmall, iBig := i.get() + var lo big.Word + if iBig != nil { + lo = iBig.Bits()[0] + } else { + lo = big.Word(iSmall) + } + return 12582917 * uint32(lo+3), nil +} +func (x Int) CompareSameType(op syntax.Token, v Value, depth int) (bool, error) { + y := v.(Int) + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return threeway(op, x.bigInt().Cmp(y.bigInt())), nil + } + return threeway(op, signum64(xSmall-ySmall)), nil +} + +// Float returns the float value nearest i. +func (i Int) Float() Float { + iSmall, iBig := i.get() + if iBig != nil { + f, _ := new(big.Float).SetInt(iBig).Float64() + return Float(f) + } + return Float(iSmall) +} + +// finiteFloat returns the finite float value nearest i, +// or an error if the magnitude is too large. +func (i Int) finiteFloat() (Float, error) { + f := i.Float() + if math.IsInf(float64(f), 0) { + return 0, fmt.Errorf("int too large to convert to float") + } + return f, nil +} + +func (x Int) Sign() int { + xSmall, xBig := x.get() + if xBig != nil { + return xBig.Sign() + } + return signum64(xSmall) +} + +func (x Int) Add(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return MakeBigInt(new(big.Int).Add(x.bigInt(), y.bigInt())) + } + return MakeInt64(xSmall + ySmall) +} +func (x Int) Sub(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return MakeBigInt(new(big.Int).Sub(x.bigInt(), y.bigInt())) + } + return MakeInt64(xSmall - ySmall) +} +func (x Int) Mul(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return MakeBigInt(new(big.Int).Mul(x.bigInt(), y.bigInt())) + } + return MakeInt64(xSmall * ySmall) +} +func (x Int) Or(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return MakeBigInt(new(big.Int).Or(x.bigInt(), y.bigInt())) + } + return makeSmallInt(xSmall | ySmall) +} +func (x Int) And(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return MakeBigInt(new(big.Int).And(x.bigInt(), y.bigInt())) + } + return makeSmallInt(xSmall & ySmall) +} +func (x Int) Xor(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + return MakeBigInt(new(big.Int).Xor(x.bigInt(), y.bigInt())) + } + return makeSmallInt(xSmall ^ ySmall) +} +func (x Int) Not() Int { + xSmall, xBig := x.get() + if xBig != nil { + return MakeBigInt(new(big.Int).Not(xBig)) + } + return makeSmallInt(^xSmall) +} +func (x Int) Lsh(y uint) Int { return MakeBigInt(new(big.Int).Lsh(x.bigInt(), y)) } +func (x Int) Rsh(y uint) Int { return MakeBigInt(new(big.Int).Rsh(x.bigInt(), y)) } + +// Precondition: y is nonzero. +func (x Int) Div(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + // http://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html + if xBig != nil || yBig != nil { + xb, yb := x.bigInt(), y.bigInt() + + var quo, rem big.Int + quo.QuoRem(xb, yb, &rem) + if (xb.Sign() < 0) != (yb.Sign() < 0) && rem.Sign() != 0 { + quo.Sub(&quo, oneBig) + } + return MakeBigInt(&quo) + } + quo := xSmall / ySmall + rem := xSmall % ySmall + if (xSmall < 0) != (ySmall < 0) && rem != 0 { + quo -= 1 + } + return MakeInt64(quo) +} + +// Precondition: y is nonzero. +func (x Int) Mod(y Int) Int { + xSmall, xBig := x.get() + ySmall, yBig := y.get() + if xBig != nil || yBig != nil { + xb, yb := x.bigInt(), y.bigInt() + + var quo, rem big.Int + quo.QuoRem(xb, yb, &rem) + if (xb.Sign() < 0) != (yb.Sign() < 0) && rem.Sign() != 0 { + rem.Add(&rem, yb) + } + return MakeBigInt(&rem) + } + rem := xSmall % ySmall + if (xSmall < 0) != (ySmall < 0) && rem != 0 { + rem += ySmall + } + return makeSmallInt(rem) +} + +func (i Int) rational() *big.Rat { + iSmall, iBig := i.get() + if iBig != nil { + return new(big.Rat).SetInt(iBig) + } + return new(big.Rat).SetInt64(iSmall) +} + +// AsInt32 returns the value of x if is representable as an int32. +func AsInt32(x Value) (int, error) { + i, ok := x.(Int) + if !ok { + return 0, fmt.Errorf("got %s, want int", x.Type()) + } + iSmall, iBig := i.get() + if iBig != nil { + return 0, fmt.Errorf("%s out of range", i) + } + return int(iSmall), nil +} + +// AsInt sets *ptr to the value of Starlark int x, if it is exactly representable, +// otherwise it returns an error. +// The type of ptr must be one of the pointer types *int, *int8, *int16, *int32, or *int64, +// or one of their unsigned counterparts including *uintptr. +func AsInt(x Value, ptr interface{}) error { + xint, ok := x.(Int) + if !ok { + return fmt.Errorf("got %s, want int", x.Type()) + } + + bits := reflect.TypeOf(ptr).Elem().Size() * 8 + switch ptr.(type) { + case *int, *int8, *int16, *int32, *int64: + i, ok := xint.Int64() + if !ok || bits < 64 && !(-1<<(bits-1) <= i && i < 1<<(bits-1)) { + return fmt.Errorf("%s out of range (want value in signed %d-bit range)", xint, bits) + } + switch ptr := ptr.(type) { + case *int: + *ptr = int(i) + case *int8: + *ptr = int8(i) + case *int16: + *ptr = int16(i) + case *int32: + *ptr = int32(i) + case *int64: + *ptr = int64(i) + } + + case *uint, *uint8, *uint16, *uint32, *uint64, *uintptr: + i, ok := xint.Uint64() + if !ok || bits < 64 && i >= 1<<bits { + return fmt.Errorf("%s out of range (want value in unsigned %d-bit range)", xint, bits) + } + switch ptr := ptr.(type) { + case *uint: + *ptr = uint(i) + case *uint8: + *ptr = uint8(i) + case *uint16: + *ptr = uint16(i) + case *uint32: + *ptr = uint32(i) + case *uint64: + *ptr = uint64(i) + case *uintptr: + *ptr = uintptr(i) + } + default: + panic(fmt.Sprintf("invalid argument type: %T", ptr)) + } + return nil +} + +// NumberToInt converts a number x to an integer value. +// An int is returned unchanged, a float is truncated towards zero. +// NumberToInt reports an error for all other values. +func NumberToInt(x Value) (Int, error) { + switch x := x.(type) { + case Int: + return x, nil + case Float: + f := float64(x) + if math.IsInf(f, 0) { + return zero, fmt.Errorf("cannot convert float infinity to integer") + } else if math.IsNaN(f) { + return zero, fmt.Errorf("cannot convert float NaN to integer") + } + return finiteFloatToInt(x), nil + + } + return zero, fmt.Errorf("cannot convert %s to int", x.Type()) +} + +// finiteFloatToInt converts f to an Int, truncating towards zero. +// f must be finite. +func finiteFloatToInt(f Float) Int { + if math.MinInt64 <= f && f <= math.MaxInt64 { + // small values + return MakeInt64(int64(f)) + } + rat := f.rational() + if rat == nil { + panic(f) // non-finite + } + return MakeBigInt(new(big.Int).Div(rat.Num(), rat.Denom())) +} diff --git a/starlark/int_generic.go b/starlark/int_generic.go new file mode 100644 index 0000000..9e84d7f --- /dev/null +++ b/starlark/int_generic.go @@ -0,0 +1,33 @@ +//+build !linux,!darwin,!dragonfly,!freebsd,!netbsd,!openbsd,!solaris darwin,arm64 !amd64,!arm64,!mips64x,!ppc64x + +package starlark + +// generic Int implementation as a union + +import "math/big" + +type intImpl struct { + // We use only the signed 32-bit range of small to ensure + // that small+small and small*small do not overflow. + small_ int64 // minint32 <= small <= maxint32 + big_ *big.Int // big != nil <=> value is not representable as int32 +} + +// --- low-level accessors --- + +// get returns the small and big components of the Int. +// small is defined only if big is nil. +// small is sign-extended to 64 bits for ease of subsequent arithmetic. +func (i Int) get() (small int64, big *big.Int) { + return i.impl.small_, i.impl.big_ +} + +// Precondition: math.MinInt32 <= x && x <= math.MaxInt32 +func makeSmallInt(x int64) Int { + return Int{intImpl{small_: x}} +} + +// Precondition: x cannot be represented as int32. +func makeBigInt(x *big.Int) Int { + return Int{intImpl{big_: x}} +} diff --git a/starlark/int_posix64.go b/starlark/int_posix64.go new file mode 100644 index 0000000..1f13d66 --- /dev/null +++ b/starlark/int_posix64.go @@ -0,0 +1,67 @@ +//+build linux darwin dragonfly freebsd netbsd openbsd solaris +//+build amd64 arm64,!darwin mips64x ppc64x + +package starlark + +// This file defines an optimized Int implementation for 64-bit machines +// running POSIX. It reserves a 4GB portion of the address space using +// mmap and represents int32 values as addresses within that range. This +// disambiguates int32 values from *big.Int pointers, letting all Int +// values be represented as an unsafe.Pointer, so that Int-to-Value +// interface conversion need not allocate. + +// Although iOS (arm64,darwin) claims to be a POSIX-compliant, +// it limits each process to about 700MB of virtual address space, +// which defeats the optimization. +// +// TODO(golang.org/issue/38485): darwin,arm64 may refer to macOS in the future. +// Update this when there are distinct GOOS values for macOS, iOS, and other Apple +// operating systems on arm64. + +import ( + "log" + "math" + "math/big" + "unsafe" + + "golang.org/x/sys/unix" +) + +// intImpl represents a union of (int32, *big.Int) in a single pointer, +// so that Int-to-Value conversions need not allocate. +// +// The pointer is either a *big.Int, if the value is big, or a pointer into a +// reserved portion of the address space (smallints), if the value is small. +// +// See int_generic.go for the basic representation concepts. +type intImpl unsafe.Pointer + +// get returns the (small, big) arms of the union. +func (i Int) get() (int64, *big.Int) { + ptr := uintptr(i.impl) + if ptr >= smallints && ptr < smallints+1<<32 { + return math.MinInt32 + int64(ptr-smallints), nil + } + return 0, (*big.Int)(i.impl) +} + +// Precondition: math.MinInt32 <= x && x <= math.MaxInt32 +func makeSmallInt(x int64) Int { + return Int{intImpl(uintptr(x-math.MinInt32) + smallints)} +} + +// Precondition: x cannot be represented as int32. +func makeBigInt(x *big.Int) Int { return Int{intImpl(x)} } + +// smallints is the base address of a 2^32 byte memory region. +// Pointers to addresses in this region represent int32 values. +// We assume smallints is not at the very top of the address space. +var smallints = reserveAddresses(1 << 32) + +func reserveAddresses(len int) uintptr { + b, err := unix.Mmap(-1, 0, len, unix.PROT_READ, unix.MAP_PRIVATE|unix.MAP_ANON) + if err != nil { + log.Fatalf("mmap: %v", err) + } + return uintptr(unsafe.Pointer(&b[0])) +} diff --git a/starlark/int_test.go b/starlark/int_test.go new file mode 100644 index 0000000..ad1bf92 --- /dev/null +++ b/starlark/int_test.go @@ -0,0 +1,102 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +import ( + "fmt" + "math" + "math/big" + "testing" +) + +// TestIntOpts exercises integer arithmetic, especially at the boundaries. +func TestIntOpts(t *testing.T) { + f := MakeInt64 + left, right := big.NewInt(math.MinInt32), big.NewInt(math.MaxInt32) + + for i, test := range []struct { + val Int + want string + }{ + // Add + {f(math.MaxInt32).Add(f(1)), "80000000"}, + {f(math.MinInt32).Add(f(-1)), "-80000001"}, + // Mul + {f(math.MaxInt32).Mul(f(math.MaxInt32)), "3fffffff00000001"}, + {f(math.MinInt32).Mul(f(math.MinInt32)), "4000000000000000"}, + {f(math.MaxUint32).Mul(f(math.MaxUint32)), "fffffffe00000001"}, + {f(math.MinInt32).Mul(f(-1)), "80000000"}, + // Div + {f(math.MinInt32).Div(f(-1)), "80000000"}, + {f(1 << 31).Div(f(2)), "40000000"}, + // And + {f(math.MaxInt32).And(f(math.MaxInt32)), "7fffffff"}, + {f(math.MinInt32).And(f(math.MinInt32)), "-80000000"}, + {f(1 << 33).And(f(1 << 32)), "0"}, + // Mod + {f(1 << 32).Mod(f(2)), "0"}, + // Or + {f(1 << 32).Or(f(0)), "100000000"}, + {f(math.MaxInt32).Or(f(0)), "7fffffff"}, + {f(math.MaxUint32).Or(f(0)), "ffffffff"}, + {f(math.MinInt32).Or(f(math.MinInt32)), "-80000000"}, + // Xor + {f(math.MinInt32).Xor(f(-1)), "7fffffff"}, + // Not + {f(math.MinInt32).Not(), "7fffffff"}, + {f(math.MaxInt32).Not(), "-80000000"}, + // Shift + {f(1).Lsh(31), "80000000"}, + {f(1).Lsh(32), "100000000"}, + {f(math.MaxInt32 + 1).Rsh(1), "40000000"}, + {f(math.MinInt32 * 2).Rsh(1), "-80000000"}, + } { + if got := fmt.Sprintf("%x", test.val); got != test.want { + t.Errorf("%d equals %s, want %s", i, got, test.want) + } + small, big := test.val.get() + if small < math.MinInt32 || math.MaxInt32 < small { + t.Errorf("expected big, %d %s", i, test.val) + } + if big == nil { + continue + } + if small != 0 { + t.Errorf("expected 0 small, %d %s with %d", i, test.val, small) + } + if big.Cmp(left) >= 0 && big.Cmp(right) <= 0 { + t.Errorf("expected small, %d %s", i, test.val) + } + } +} + +func TestImmutabilityMakeBigInt(t *testing.T) { + // use max int64 for the test + expect := int64(^uint64(0) >> 1) + + mutint := big.NewInt(expect) + value := MakeBigInt(mutint) + mutint.Set(big.NewInt(1)) + + got, _ := value.Int64() + if got != expect { + t.Errorf("expected %d, got %d", expect, got) + } +} + +func TestImmutabilityBigInt(t *testing.T) { + // use 1 and max int64 for the test + for _, expect := range []int64{1, int64(^uint64(0) >> 1)} { + value := MakeBigInt(big.NewInt(expect)) + + bigint := value.BigInt() + bigint.Set(big.NewInt(2)) + + got, _ := value.Int64() + if got != expect { + t.Errorf("expected %d, got %d", expect, got) + } + } +} diff --git a/starlark/interp.go b/starlark/interp.go new file mode 100644 index 0000000..642d8f5 --- /dev/null +++ b/starlark/interp.go @@ -0,0 +1,669 @@ +package starlark + +// This file defines the bytecode interpreter. + +import ( + "fmt" + "os" + "sync/atomic" + "unsafe" + + "go.starlark.net/internal/compile" + "go.starlark.net/internal/spell" + "go.starlark.net/resolve" + "go.starlark.net/syntax" +) + +const vmdebug = false // TODO(adonovan): use a bitfield of specific kinds of error. + +// TODO(adonovan): +// - optimize position table. +// - opt: record MaxIterStack during compilation and preallocate the stack. + +func (fn *Function) CallInternal(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { + // Postcondition: args is not mutated. This is stricter than required by Callable, + // but allows CALL to avoid a copy. + + if !resolve.AllowRecursion { + // detect recursion + for _, fr := range thread.stack[:len(thread.stack)-1] { + // We look for the same function code, + // not function value, otherwise the user could + // defeat the check by writing the Y combinator. + if frfn, ok := fr.Callable().(*Function); ok && frfn.funcode == fn.funcode { + return nil, fmt.Errorf("function %s called recursively", fn.Name()) + } + } + } + + f := fn.funcode + fr := thread.frameAt(0) + + // Allocate space for stack and locals. + // Logically these do not escape from this frame + // (See https://github.com/golang/go/issues/20533.) + // + // This heap allocation looks expensive, but I was unable to get + // more than 1% real time improvement in a large alloc-heavy + // benchmark (in which this alloc was 8% of alloc-bytes) + // by allocating space for 8 Values in each frame, or + // by allocating stack by slicing an array held by the Thread + // that is expanded in chunks of min(k, nspace), for k=256 or 1024. + nlocals := len(f.Locals) + nspace := nlocals + f.MaxStack + space := make([]Value, nspace) + locals := space[:nlocals:nlocals] // local variables, starting with parameters + stack := space[nlocals:] // operand stack + + // Digest arguments and set parameters. + err := setArgs(locals, fn, args, kwargs) + if err != nil { + return nil, thread.evalError(err) + } + + fr.locals = locals + + if vmdebug { + fmt.Printf("Entering %s @ %s\n", f.Name, f.Position(0)) + fmt.Printf("%d stack, %d locals\n", len(stack), len(locals)) + defer fmt.Println("Leaving ", f.Name) + } + + // Spill indicated locals to cells. + // Each cell is a separate alloc to avoid spurious liveness. + for _, index := range f.Cells { + locals[index] = &cell{locals[index]} + } + + // TODO(adonovan): add static check that beneath this point + // - there is exactly one return statement + // - there is no redefinition of 'err'. + + var iterstack []Iterator // stack of active iterators + + sp := 0 + var pc uint32 + var result Value + code := f.Code +loop: + for { + thread.steps++ + if thread.steps >= thread.maxSteps { + thread.Cancel("too many steps") + } + if reason := atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&thread.cancelReason))); reason != nil { + err = fmt.Errorf("Starlark computation cancelled: %s", *(*string)(reason)) + break loop + } + + fr.pc = pc + + op := compile.Opcode(code[pc]) + pc++ + var arg uint32 + if op >= compile.OpcodeArgMin { + // TODO(adonovan): opt: profile this. + // Perhaps compiling big endian would be less work to decode? + for s := uint(0); ; s += 7 { + b := code[pc] + pc++ + arg |= uint32(b&0x7f) << s + if b < 0x80 { + break + } + } + } + if vmdebug { + fmt.Fprintln(os.Stderr, stack[:sp]) // very verbose! + compile.PrintOp(f, fr.pc, op, arg) + } + + switch op { + case compile.NOP: + // nop + + case compile.DUP: + stack[sp] = stack[sp-1] + sp++ + + case compile.DUP2: + stack[sp] = stack[sp-2] + stack[sp+1] = stack[sp-1] + sp += 2 + + case compile.POP: + sp-- + + case compile.EXCH: + stack[sp-2], stack[sp-1] = stack[sp-1], stack[sp-2] + + case compile.EQL, compile.NEQ, compile.GT, compile.LT, compile.LE, compile.GE: + op := syntax.Token(op-compile.EQL) + syntax.EQL + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + ok, err2 := Compare(op, x, y) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = Bool(ok) + sp++ + + case compile.PLUS, + compile.MINUS, + compile.STAR, + compile.SLASH, + compile.SLASHSLASH, + compile.PERCENT, + compile.AMP, + compile.PIPE, + compile.CIRCUMFLEX, + compile.LTLT, + compile.GTGT, + compile.IN: + binop := syntax.Token(op-compile.PLUS) + syntax.PLUS + if op == compile.IN { + binop = syntax.IN // IN token is out of order + } + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + z, err2 := Binary(binop, x, y) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = z + sp++ + + case compile.UPLUS, compile.UMINUS, compile.TILDE: + var unop syntax.Token + if op == compile.TILDE { + unop = syntax.TILDE + } else { + unop = syntax.Token(op-compile.UPLUS) + syntax.PLUS + } + x := stack[sp-1] + y, err2 := Unary(unop, x) + if err2 != nil { + err = err2 + break loop + } + stack[sp-1] = y + + case compile.INPLACE_ADD: + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + + // It's possible that y is not Iterable but + // nonetheless defines x+y, in which case we + // should fall back to the general case. + var z Value + if xlist, ok := x.(*List); ok { + if yiter, ok := y.(Iterable); ok { + if err = xlist.checkMutable("apply += to"); err != nil { + break loop + } + listExtend(xlist, yiter) + z = xlist + } + } + if z == nil { + z, err = Binary(syntax.PLUS, x, y) + if err != nil { + break loop + } + } + + stack[sp] = z + sp++ + + case compile.NONE: + stack[sp] = None + sp++ + + case compile.TRUE: + stack[sp] = True + sp++ + + case compile.FALSE: + stack[sp] = False + sp++ + + case compile.MANDATORY: + stack[sp] = mandatory{} + sp++ + + case compile.JMP: + pc = arg + + case compile.CALL, compile.CALL_VAR, compile.CALL_KW, compile.CALL_VAR_KW: + var kwargs Value + if op == compile.CALL_KW || op == compile.CALL_VAR_KW { + kwargs = stack[sp-1] + sp-- + } + + var args Value + if op == compile.CALL_VAR || op == compile.CALL_VAR_KW { + args = stack[sp-1] + sp-- + } + + // named args (pairs) + var kvpairs []Tuple + if nkvpairs := int(arg & 0xff); nkvpairs > 0 { + kvpairs = make([]Tuple, 0, nkvpairs) + kvpairsAlloc := make(Tuple, 2*nkvpairs) // allocate a single backing array + sp -= 2 * nkvpairs + for i := 0; i < nkvpairs; i++ { + pair := kvpairsAlloc[:2:2] + kvpairsAlloc = kvpairsAlloc[2:] + pair[0] = stack[sp+2*i] // name + pair[1] = stack[sp+2*i+1] // value + kvpairs = append(kvpairs, pair) + } + } + if kwargs != nil { + // Add key/value items from **kwargs dictionary. + dict, ok := kwargs.(IterableMapping) + if !ok { + err = fmt.Errorf("argument after ** must be a mapping, not %s", kwargs.Type()) + break loop + } + items := dict.Items() + for _, item := range items { + if _, ok := item[0].(String); !ok { + err = fmt.Errorf("keywords must be strings, not %s", item[0].Type()) + break loop + } + } + if len(kvpairs) == 0 { + kvpairs = items + } else { + kvpairs = append(kvpairs, items...) + } + } + + // positional args + var positional Tuple + if npos := int(arg >> 8); npos > 0 { + positional = stack[sp-npos : sp] + sp -= npos + + // Copy positional arguments into a new array, + // unless the callee is another Starlark function, + // in which case it can be trusted not to mutate them. + if _, ok := stack[sp-1].(*Function); !ok || args != nil { + positional = append(Tuple(nil), positional...) + } + } + if args != nil { + // Add elements from *args sequence. + iter := Iterate(args) + if iter == nil { + err = fmt.Errorf("argument after * must be iterable, not %s", args.Type()) + break loop + } + var elem Value + for iter.Next(&elem) { + positional = append(positional, elem) + } + iter.Done() + } + + function := stack[sp-1] + + if vmdebug { + fmt.Printf("VM call %s args=%s kwargs=%s @%s\n", + function, positional, kvpairs, f.Position(fr.pc)) + } + + thread.endProfSpan() + z, err2 := Call(thread, function, positional, kvpairs) + thread.beginProfSpan() + if err2 != nil { + err = err2 + break loop + } + if vmdebug { + fmt.Printf("Resuming %s @ %s\n", f.Name, f.Position(0)) + } + stack[sp-1] = z + + case compile.ITERPUSH: + x := stack[sp-1] + sp-- + iter := Iterate(x) + if iter == nil { + err = fmt.Errorf("%s value is not iterable", x.Type()) + break loop + } + iterstack = append(iterstack, iter) + + case compile.ITERJMP: + iter := iterstack[len(iterstack)-1] + if iter.Next(&stack[sp]) { + sp++ + } else { + pc = arg + } + + case compile.ITERPOP: + n := len(iterstack) - 1 + iterstack[n].Done() + iterstack = iterstack[:n] + + case compile.NOT: + stack[sp-1] = !stack[sp-1].Truth() + + case compile.RETURN: + result = stack[sp-1] + break loop + + case compile.SETINDEX: + z := stack[sp-1] + y := stack[sp-2] + x := stack[sp-3] + sp -= 3 + err = setIndex(x, y, z) + if err != nil { + break loop + } + + case compile.INDEX: + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + z, err2 := getIndex(x, y) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = z + sp++ + + case compile.ATTR: + x := stack[sp-1] + name := f.Prog.Names[arg] + y, err2 := getAttr(x, name) + if err2 != nil { + err = err2 + break loop + } + stack[sp-1] = y + + case compile.SETFIELD: + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + name := f.Prog.Names[arg] + if err2 := setField(x, name, y); err2 != nil { + err = err2 + break loop + } + + case compile.MAKEDICT: + stack[sp] = new(Dict) + sp++ + + case compile.SETDICT, compile.SETDICTUNIQ: + dict := stack[sp-3].(*Dict) + k := stack[sp-2] + v := stack[sp-1] + sp -= 3 + oldlen := dict.Len() + if err2 := dict.SetKey(k, v); err2 != nil { + err = err2 + break loop + } + if op == compile.SETDICTUNIQ && dict.Len() == oldlen { + err = fmt.Errorf("duplicate key: %v", k) + break loop + } + + case compile.APPEND: + elem := stack[sp-1] + list := stack[sp-2].(*List) + sp -= 2 + list.elems = append(list.elems, elem) + + case compile.SLICE: + x := stack[sp-4] + lo := stack[sp-3] + hi := stack[sp-2] + step := stack[sp-1] + sp -= 4 + res, err2 := slice(x, lo, hi, step) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = res + sp++ + + case compile.UNPACK: + n := int(arg) + iterable := stack[sp-1] + sp-- + iter := Iterate(iterable) + if iter == nil { + err = fmt.Errorf("got %s in sequence assignment", iterable.Type()) + break loop + } + i := 0 + sp += n + for i < n && iter.Next(&stack[sp-1-i]) { + i++ + } + var dummy Value + if iter.Next(&dummy) { + // NB: Len may return -1 here in obscure cases. + err = fmt.Errorf("too many values to unpack (got %d, want %d)", Len(iterable), n) + break loop + } + iter.Done() + if i < n { + err = fmt.Errorf("too few values to unpack (got %d, want %d)", i, n) + break loop + } + + case compile.CJMP: + if stack[sp-1].Truth() { + pc = arg + } + sp-- + + case compile.CONSTANT: + stack[sp] = fn.module.constants[arg] + sp++ + + case compile.MAKETUPLE: + n := int(arg) + tuple := make(Tuple, n) + sp -= n + copy(tuple, stack[sp:]) + stack[sp] = tuple + sp++ + + case compile.MAKELIST: + n := int(arg) + elems := make([]Value, n) + sp -= n + copy(elems, stack[sp:]) + stack[sp] = NewList(elems) + sp++ + + case compile.MAKEFUNC: + funcode := f.Prog.Functions[arg] + tuple := stack[sp-1].(Tuple) + n := len(tuple) - len(funcode.Freevars) + defaults := tuple[:n:n] + freevars := tuple[n:] + stack[sp-1] = &Function{ + funcode: funcode, + module: fn.module, + defaults: defaults, + freevars: freevars, + } + + case compile.LOAD: + n := int(arg) + module := string(stack[sp-1].(String)) + sp-- + + if thread.Load == nil { + err = fmt.Errorf("load not implemented by this application") + break loop + } + + thread.endProfSpan() + dict, err2 := thread.Load(thread, module) + thread.beginProfSpan() + if err2 != nil { + err = wrappedError{ + msg: fmt.Sprintf("cannot load %s: %v", module, err2), + cause: err2, + } + break loop + } + + for i := 0; i < n; i++ { + from := string(stack[sp-1-i].(String)) + v, ok := dict[from] + if !ok { + err = fmt.Errorf("load: name %s not found in module %s", from, module) + if n := spell.Nearest(from, dict.Keys()); n != "" { + err = fmt.Errorf("%s (did you mean %s?)", err, n) + } + break loop + } + stack[sp-1-i] = v + } + + case compile.SETLOCAL: + locals[arg] = stack[sp-1] + sp-- + + case compile.SETLOCALCELL: + locals[arg].(*cell).v = stack[sp-1] + sp-- + + case compile.SETGLOBAL: + fn.module.globals[arg] = stack[sp-1] + sp-- + + case compile.LOCAL: + x := locals[arg] + if x == nil { + err = fmt.Errorf("local variable %s referenced before assignment", f.Locals[arg].Name) + break loop + } + stack[sp] = x + sp++ + + case compile.FREE: + stack[sp] = fn.freevars[arg] + sp++ + + case compile.LOCALCELL: + v := locals[arg].(*cell).v + if v == nil { + err = fmt.Errorf("local variable %s referenced before assignment", f.Locals[arg].Name) + break loop + } + stack[sp] = v + sp++ + + case compile.FREECELL: + v := fn.freevars[arg].(*cell).v + if v == nil { + err = fmt.Errorf("local variable %s referenced before assignment", f.Freevars[arg].Name) + break loop + } + stack[sp] = v + sp++ + + case compile.GLOBAL: + x := fn.module.globals[arg] + if x == nil { + err = fmt.Errorf("global variable %s referenced before assignment", f.Prog.Globals[arg].Name) + break loop + } + stack[sp] = x + sp++ + + case compile.PREDECLARED: + name := f.Prog.Names[arg] + x := fn.module.predeclared[name] + if x == nil { + err = fmt.Errorf("internal error: predeclared variable %s is uninitialized", name) + break loop + } + stack[sp] = x + sp++ + + case compile.UNIVERSAL: + stack[sp] = Universe[f.Prog.Names[arg]] + sp++ + + default: + err = fmt.Errorf("unimplemented: %s", op) + break loop + } + } + + // ITERPOP the rest of the iterator stack. + for _, iter := range iterstack { + iter.Done() + } + + fr.locals = nil + + return result, err +} + +type wrappedError struct { + msg string + cause error +} + +func (e wrappedError) Error() string { + return e.msg +} + +// Implements the xerrors.Wrapper interface +// https://godoc.org/golang.org/x/xerrors#Wrapper +func (e wrappedError) Unwrap() error { + return e.cause +} + +// mandatory is a sentinel value used in a function's defaults tuple +// to indicate that a (keyword-only) parameter is mandatory. +type mandatory struct{} + +func (mandatory) String() string { return "mandatory" } +func (mandatory) Type() string { return "mandatory" } +func (mandatory) Freeze() {} // immutable +func (mandatory) Truth() Bool { return False } +func (mandatory) Hash() (uint32, error) { return 0, nil } + +// A cell is a box containing a Value. +// Local variables marked as cells hold their value indirectly +// so that they may be shared by outer and inner nested functions. +// Cells are always accessed using indirect {FREE,LOCAL,SETLOCAL}CELL instructions. +// The FreeVars tuple contains only cells. +// The FREE instruction always yields a cell. +type cell struct{ v Value } + +func (c *cell) String() string { return "cell" } +func (c *cell) Type() string { return "cell" } +func (c *cell) Freeze() { + if c.v != nil { + c.v.Freeze() + } +} +func (c *cell) Truth() Bool { panic("unreachable") } +func (c *cell) Hash() (uint32, error) { panic("unreachable") } diff --git a/starlark/library.go b/starlark/library.go new file mode 100644 index 0000000..5620426 --- /dev/null +++ b/starlark/library.go @@ -0,0 +1,2251 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +// This file defines the library of built-ins. +// +// Built-ins must explicitly check the "frozen" flag before updating +// mutable types such as lists and dicts. + +import ( + "errors" + "fmt" + "math" + "math/big" + "os" + "sort" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" + + "go.starlark.net/syntax" +) + +// Universe defines the set of universal built-ins, such as None, True, and len. +// +// The Go application may add or remove items from the +// universe dictionary before Starlark evaluation begins. +// All values in the dictionary must be immutable. +// Starlark programs cannot modify the dictionary. +var Universe StringDict + +func init() { + // https://github.com/google/starlark-go/blob/master/doc/spec.md#built-in-constants-and-functions + Universe = StringDict{ + "None": None, + "True": True, + "False": False, + "any": NewBuiltin("any", any), + "all": NewBuiltin("all", all), + "bool": NewBuiltin("bool", bool_), + "bytes": NewBuiltin("bytes", bytes_), + "chr": NewBuiltin("chr", chr), + "dict": NewBuiltin("dict", dict), + "dir": NewBuiltin("dir", dir), + "enumerate": NewBuiltin("enumerate", enumerate), + "fail": NewBuiltin("fail", fail), + "float": NewBuiltin("float", float), + "getattr": NewBuiltin("getattr", getattr), + "hasattr": NewBuiltin("hasattr", hasattr), + "hash": NewBuiltin("hash", hash), + "int": NewBuiltin("int", int_), + "len": NewBuiltin("len", len_), + "list": NewBuiltin("list", list), + "max": NewBuiltin("max", minmax), + "min": NewBuiltin("min", minmax), + "ord": NewBuiltin("ord", ord), + "print": NewBuiltin("print", print), + "range": NewBuiltin("range", range_), + "repr": NewBuiltin("repr", repr), + "reversed": NewBuiltin("reversed", reversed), + "set": NewBuiltin("set", set), // requires resolve.AllowSet + "sorted": NewBuiltin("sorted", sorted), + "str": NewBuiltin("str", str), + "tuple": NewBuiltin("tuple", tuple), + "type": NewBuiltin("type", type_), + "zip": NewBuiltin("zip", zip), + } +} + +// methods of built-in types +// https://github.com/google/starlark-go/blob/master/doc/spec.md#built-in-methods +var ( + bytesMethods = map[string]*Builtin{ + "elems": NewBuiltin("elems", bytes_elems), + } + + dictMethods = map[string]*Builtin{ + "clear": NewBuiltin("clear", dict_clear), + "get": NewBuiltin("get", dict_get), + "items": NewBuiltin("items", dict_items), + "keys": NewBuiltin("keys", dict_keys), + "pop": NewBuiltin("pop", dict_pop), + "popitem": NewBuiltin("popitem", dict_popitem), + "setdefault": NewBuiltin("setdefault", dict_setdefault), + "update": NewBuiltin("update", dict_update), + "values": NewBuiltin("values", dict_values), + } + + listMethods = map[string]*Builtin{ + "append": NewBuiltin("append", list_append), + "clear": NewBuiltin("clear", list_clear), + "extend": NewBuiltin("extend", list_extend), + "index": NewBuiltin("index", list_index), + "insert": NewBuiltin("insert", list_insert), + "pop": NewBuiltin("pop", list_pop), + "remove": NewBuiltin("remove", list_remove), + } + + stringMethods = map[string]*Builtin{ + "capitalize": NewBuiltin("capitalize", string_capitalize), + "codepoint_ords": NewBuiltin("codepoint_ords", string_iterable), + "codepoints": NewBuiltin("codepoints", string_iterable), // sic + "count": NewBuiltin("count", string_count), + "elem_ords": NewBuiltin("elem_ords", string_iterable), + "elems": NewBuiltin("elems", string_iterable), // sic + "endswith": NewBuiltin("endswith", string_startswith), // sic + "find": NewBuiltin("find", string_find), + "format": NewBuiltin("format", string_format), + "index": NewBuiltin("index", string_index), + "isalnum": NewBuiltin("isalnum", string_isalnum), + "isalpha": NewBuiltin("isalpha", string_isalpha), + "isdigit": NewBuiltin("isdigit", string_isdigit), + "islower": NewBuiltin("islower", string_islower), + "isspace": NewBuiltin("isspace", string_isspace), + "istitle": NewBuiltin("istitle", string_istitle), + "isupper": NewBuiltin("isupper", string_isupper), + "join": NewBuiltin("join", string_join), + "lower": NewBuiltin("lower", string_lower), + "lstrip": NewBuiltin("lstrip", string_strip), // sic + "partition": NewBuiltin("partition", string_partition), + "replace": NewBuiltin("replace", string_replace), + "rfind": NewBuiltin("rfind", string_rfind), + "rindex": NewBuiltin("rindex", string_rindex), + "rpartition": NewBuiltin("rpartition", string_partition), // sic + "rsplit": NewBuiltin("rsplit", string_split), // sic + "rstrip": NewBuiltin("rstrip", string_strip), // sic + "split": NewBuiltin("split", string_split), + "splitlines": NewBuiltin("splitlines", string_splitlines), + "startswith": NewBuiltin("startswith", string_startswith), + "strip": NewBuiltin("strip", string_strip), + "title": NewBuiltin("title", string_title), + "upper": NewBuiltin("upper", string_upper), + } + + setMethods = map[string]*Builtin{ + "union": NewBuiltin("union", set_union), + } +) + +func builtinAttr(recv Value, name string, methods map[string]*Builtin) (Value, error) { + b := methods[name] + if b == nil { + return nil, nil // no such method + } + return b.BindReceiver(recv), nil +} + +func builtinAttrNames(methods map[string]*Builtin) []string { + names := make([]string, 0, len(methods)) + for name := range methods { + names = append(names, name) + } + sort.Strings(names) + return names +} + +// ---- built-in functions ---- + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#all +func all(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("all", args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var x Value + for iter.Next(&x) { + if !x.Truth() { + return False, nil + } + } + return True, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#any +func any(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("any", args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var x Value + for iter.Next(&x) { + if x.Truth() { + return True, nil + } + } + return False, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#bool +func bool_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value = False + if err := UnpackPositionalArgs("bool", args, kwargs, 0, &x); err != nil { + return nil, err + } + return x.Truth(), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#bytes +func bytes_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("bytes does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("bytes: got %d arguments, want exactly 1", len(args)) + } + switch x := args[0].(type) { + case Bytes: + return x, nil + case String: + // Invalid encodings are replaced by that of U+FFFD. + return Bytes(utf8Transcode(string(x))), nil + case Iterable: + // iterable of numeric byte values + var buf strings.Builder + if n := Len(x); n >= 0 { + // common case: known length + buf.Grow(n) + } + iter := x.Iterate() + defer iter.Done() + var elem Value + var b byte + for i := 0; iter.Next(&elem); i++ { + if err := AsInt(elem, &b); err != nil { + return nil, fmt.Errorf("bytes: at index %d, %s", i, err) + } + buf.WriteByte(b) + } + return Bytes(buf.String()), nil + + default: + // Unlike string(foo), which stringifies it, bytes(foo) is an error. + return nil, fmt.Errorf("bytes: got %s, want string, bytes, or iterable of ints", x.Type()) + } +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#chr +func chr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("chr does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("chr: got %d arguments, want 1", len(args)) + } + i, err := AsInt32(args[0]) + if err != nil { + return nil, fmt.Errorf("chr: %s", err) + } + if i < 0 { + return nil, fmt.Errorf("chr: Unicode code point %d out of range (<0)", i) + } + if i > unicode.MaxRune { + return nil, fmt.Errorf("chr: Unicode code point U+%X out of range (>0x10FFFF)", i) + } + return String(string(rune(i))), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict +func dict(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(args) > 1 { + return nil, fmt.Errorf("dict: got %d arguments, want at most 1", len(args)) + } + dict := new(Dict) + if err := updateDict(dict, args, kwargs); err != nil { + return nil, fmt.Errorf("dict: %v", err) + } + return dict, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dir +func dir(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("dir does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("dir: got %d arguments, want 1", len(args)) + } + + var names []string + if x, ok := args[0].(HasAttrs); ok { + names = x.AttrNames() + } + sort.Strings(names) + elems := make([]Value, len(names)) + for i, name := range names { + elems[i] = String(name) + } + return NewList(elems), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#enumerate +func enumerate(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + var start int + if err := UnpackPositionalArgs("enumerate", args, kwargs, 1, &iterable, &start); err != nil { + return nil, err + } + + iter := iterable.Iterate() + defer iter.Done() + + var pairs []Value + var x Value + + if n := Len(iterable); n >= 0 { + // common case: known length + pairs = make([]Value, 0, n) + array := make(Tuple, 2*n) // allocate a single backing array + for i := 0; iter.Next(&x); i++ { + pair := array[:2:2] + array = array[2:] + pair[0] = MakeInt(start + i) + pair[1] = x + pairs = append(pairs, pair) + } + } else { + // non-sequence (unknown length) + for i := 0; iter.Next(&x); i++ { + pair := Tuple{MakeInt(start + i), x} + pairs = append(pairs, pair) + } + } + + return NewList(pairs), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#fail +func fail(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + sep := " " + if err := UnpackArgs("fail", nil, kwargs, "sep?", &sep); err != nil { + return nil, err + } + buf := new(strings.Builder) + buf.WriteString("fail: ") + for i, v := range args { + if i > 0 { + buf.WriteString(sep) + } + if s, ok := AsString(v); ok { + buf.WriteString(s) + } else { + writeValue(buf, v, nil) + } + } + + return nil, errors.New(buf.String()) +} + +func float(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("float does not accept keyword arguments") + } + if len(args) == 0 { + return Float(0.0), nil + } + if len(args) != 1 { + return nil, fmt.Errorf("float got %d arguments, wants 1", len(args)) + } + switch x := args[0].(type) { + case Bool: + if x { + return Float(1.0), nil + } else { + return Float(0.0), nil + } + case Int: + return x.finiteFloat() + case Float: + return x, nil + case String: + if x == "" { + return nil, fmt.Errorf("float: empty string") + } + // +/- NaN or Inf or Infinity (case insensitive)? + s := string(x) + switch x[len(x)-1] { + case 'y', 'Y': + if strings.EqualFold(s, "infinity") || strings.EqualFold(s, "+infinity") { + return inf, nil + } else if strings.EqualFold(s, "-infinity") { + return neginf, nil + } + case 'f', 'F': + if strings.EqualFold(s, "inf") || strings.EqualFold(s, "+inf") { + return inf, nil + } else if strings.EqualFold(s, "-inf") { + return neginf, nil + } + case 'n', 'N': + if strings.EqualFold(s, "nan") || strings.EqualFold(s, "+nan") || strings.EqualFold(s, "-nan") { + return nan, nil + } + } + f, err := strconv.ParseFloat(s, 64) + if math.IsInf(f, 0) { + return nil, fmt.Errorf("floating-point number too large") + } + if err != nil { + return nil, fmt.Errorf("invalid float literal: %s", s) + } + return Float(f), nil + default: + return nil, fmt.Errorf("float got %s, want number or string", x.Type()) + } +} + +var ( + inf = Float(math.Inf(+1)) + neginf = Float(math.Inf(-1)) + nan = Float(math.NaN()) +) + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#getattr +func getattr(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var object, dflt Value + var name string + if err := UnpackPositionalArgs("getattr", args, kwargs, 2, &object, &name, &dflt); err != nil { + return nil, err + } + if object, ok := object.(HasAttrs); ok { + v, err := object.Attr(name) + if err != nil { + // An error could mean the field doesn't exist, + // or it exists but could not be computed. + if dflt != nil { + return dflt, nil + } + return nil, nameErr(b, err) + } + if v != nil { + return v, nil + } + // (nil, nil) => no such field + } + if dflt != nil { + return dflt, nil + } + return nil, fmt.Errorf("getattr: %s has no .%s field or method", object.Type(), name) +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#hasattr +func hasattr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var object Value + var name string + if err := UnpackPositionalArgs("hasattr", args, kwargs, 2, &object, &name); err != nil { + return nil, err + } + if object, ok := object.(HasAttrs); ok { + v, err := object.Attr(name) + if err == nil { + return Bool(v != nil), nil + } + + // An error does not conclusively indicate presence or + // absence of a field: it could occur while computing + // the value of a present attribute, or it could be a + // "no such attribute" error with details. + for _, x := range object.AttrNames() { + if x == name { + return True, nil + } + } + } + return False, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#hash +func hash(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + if err := UnpackPositionalArgs("hash", args, kwargs, 1, &x); err != nil { + return nil, err + } + + var h int + switch x := x.(type) { + case String: + // The Starlark spec requires that the hash function be + // deterministic across all runs, motivated by the need + // for reproducibility of builds. Thus we cannot call + // String.Hash, which uses the fastest implementation + // available, because as varies across process restarts, + // and may evolve with the implementation. + h = int(javaStringHash(string(x))) + case Bytes: + h = int(softHashString(string(x))) // FNV32 + default: + return nil, fmt.Errorf("hash: got %s, want string or bytes", x.Type()) + } + return MakeInt(h), nil +} + +// javaStringHash returns the same hash as would be produced by +// java.lang.String.hashCode. This requires transcoding the string to +// UTF-16; transcoding may introduce Unicode replacement characters +// U+FFFD if s does not contain valid UTF-8. +func javaStringHash(s string) (h int32) { + for _, r := range s { + if utf16.IsSurrogate(r) { + c1, c2 := utf16.EncodeRune(r) + h = 31*h + c1 + h = 31*h + c2 + } else { + h = 31*h + r // r may be U+FFFD + } + } + return h +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#int +func int_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value = zero + var base Value + if err := UnpackArgs("int", args, kwargs, "x", &x, "base?", &base); err != nil { + return nil, err + } + + if s, ok := AsString(x); ok { + b := 10 + if base != nil { + var err error + b, err = AsInt32(base) + if err != nil { + return nil, fmt.Errorf("int: for base, got %s, want int", base.Type()) + } + if b != 0 && (b < 2 || b > 36) { + return nil, fmt.Errorf("int: base must be an integer >= 2 && <= 36") + } + } + res := parseInt(s, b) + if res == nil { + return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, s) + } + return res, nil + } + + if base != nil { + return nil, fmt.Errorf("int: can't convert non-string with explicit base") + } + + if b, ok := x.(Bool); ok { + if b { + return one, nil + } else { + return zero, nil + } + } + + i, err := NumberToInt(x) + if err != nil { + return nil, fmt.Errorf("int: %s", err) + } + return i, nil +} + +// parseInt defines the behavior of int(string, base=int). It returns nil on error. +func parseInt(s string, base int) Value { + // remove sign + var neg bool + if s != "" { + if s[0] == '+' { + s = s[1:] + } else if s[0] == '-' { + neg = true + s = s[1:] + } + } + + // remove optional base prefix + baseprefix := 0 + if len(s) > 1 && s[0] == '0' { + if len(s) > 2 { + switch s[1] { + case 'o', 'O': + baseprefix = 8 + case 'x', 'X': + baseprefix = 16 + case 'b', 'B': + baseprefix = 2 + } + } + if baseprefix != 0 { + // Remove the base prefix if it matches + // the explicit base, or if base=0. + if base == 0 || baseprefix == base { + base = baseprefix + s = s[2:] + } + } else { + // For automatic base detection, + // a string starting with zero + // must be all zeros. + // Thus we reject int("0755", 0). + if base == 0 { + for i := 1; i < len(s); i++ { + if s[i] != '0' { + return nil + } + } + return zero + } + } + } + if base == 0 { + base = 10 + } + + // we explicitly handled sign above. + // if a sign remains, it is invalid. + if s != "" && (s[0] == '-' || s[0] == '+') { + return nil + } + + // s has no sign or base prefix. + if i, ok := new(big.Int).SetString(s, base); ok { + res := MakeBigInt(i) + if neg { + res = zero.Sub(res) + } + return res + } + + return nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#len +func len_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + if err := UnpackPositionalArgs("len", args, kwargs, 1, &x); err != nil { + return nil, err + } + len := Len(x) + if len < 0 { + return nil, fmt.Errorf("len: value of type %s has no len", x.Type()) + } + return MakeInt(len), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list +func list(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("list", args, kwargs, 0, &iterable); err != nil { + return nil, err + } + var elems []Value + if iterable != nil { + iter := iterable.Iterate() + defer iter.Done() + if n := Len(iterable); n > 0 { + elems = make([]Value, 0, n) // preallocate if length known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + } + return NewList(elems), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#min +func minmax(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(args) == 0 { + return nil, fmt.Errorf("%s requires at least one positional argument", b.Name()) + } + var keyFunc Callable + if err := UnpackArgs(b.Name(), nil, kwargs, "key?", &keyFunc); err != nil { + return nil, err + } + var op syntax.Token + if b.Name() == "max" { + op = syntax.GT + } else { + op = syntax.LT + } + var iterable Value + if len(args) == 1 { + iterable = args[0] + } else { + iterable = args + } + iter := Iterate(iterable) + if iter == nil { + return nil, fmt.Errorf("%s: %s value is not iterable", b.Name(), iterable.Type()) + } + defer iter.Done() + var extremum Value + if !iter.Next(&extremum) { + return nil, nameErr(b, "argument is an empty sequence") + } + + var extremeKey Value + var keyargs Tuple + if keyFunc == nil { + extremeKey = extremum + } else { + keyargs = Tuple{extremum} + res, err := Call(thread, keyFunc, keyargs, nil) + if err != nil { + return nil, err // to preserve backtrace, don't modify error + } + extremeKey = res + } + + var x Value + for iter.Next(&x) { + var key Value + if keyFunc == nil { + key = x + } else { + keyargs[0] = x + res, err := Call(thread, keyFunc, keyargs, nil) + if err != nil { + return nil, err // to preserve backtrace, don't modify error + } + key = res + } + + if ok, err := Compare(op, key, extremeKey); err != nil { + return nil, nameErr(b, err) + } else if ok { + extremum = x + extremeKey = key + } + } + return extremum, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#ord +func ord(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("ord does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("ord: got %d arguments, want 1", len(args)) + } + switch x := args[0].(type) { + case String: + // ord(string) returns int value of sole rune. + s := string(x) + r, sz := utf8.DecodeRuneInString(s) + if sz == 0 || sz != len(s) { + n := utf8.RuneCountInString(s) + return nil, fmt.Errorf("ord: string encodes %d Unicode code points, want 1", n) + } + return MakeInt(int(r)), nil + + case Bytes: + // ord(bytes) returns int value of sole byte. + if len(x) != 1 { + return nil, fmt.Errorf("ord: bytes has length %d, want 1", len(x)) + } + return MakeInt(int(x[0])), nil + default: + return nil, fmt.Errorf("ord: got %s, want string or bytes", x.Type()) + } +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#print +func print(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + sep := " " + if err := UnpackArgs("print", nil, kwargs, "sep?", &sep); err != nil { + return nil, err + } + buf := new(strings.Builder) + for i, v := range args { + if i > 0 { + buf.WriteString(sep) + } + if s, ok := AsString(v); ok { + buf.WriteString(s) + } else if b, ok := v.(Bytes); ok { + buf.WriteString(string(b)) + } else { + writeValue(buf, v, nil) + } + } + + s := buf.String() + if thread.Print != nil { + thread.Print(thread, s) + } else { + fmt.Fprintln(os.Stderr, s) + } + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#range +func range_(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var start, stop, step int + step = 1 + if err := UnpackPositionalArgs("range", args, kwargs, 1, &start, &stop, &step); err != nil { + return nil, err + } + + if len(args) == 1 { + // range(stop) + start, stop = 0, start + } + if step == 0 { + // we were given range(start, stop, 0) + return nil, nameErr(b, "step argument must not be zero") + } + + return rangeValue{start: start, stop: stop, step: step, len: rangeLen(start, stop, step)}, nil +} + +// A rangeValue is a comparable, immutable, indexable sequence of integers +// defined by the three parameters to a range(...) call. +// Invariant: step != 0. +type rangeValue struct{ start, stop, step, len int } + +var ( + _ Indexable = rangeValue{} + _ Sequence = rangeValue{} + _ Comparable = rangeValue{} + _ Sliceable = rangeValue{} +) + +func (r rangeValue) Len() int { return r.len } +func (r rangeValue) Index(i int) Value { return MakeInt(r.start + i*r.step) } +func (r rangeValue) Iterate() Iterator { return &rangeIterator{r, 0} } + +// rangeLen calculates the length of a range with the provided start, stop, and step. +// caller must ensure that step is non-zero. +func rangeLen(start, stop, step int) int { + switch { + case step > 0: + if stop > start { + return (stop-1-start)/step + 1 + } + case step < 0: + if start > stop { + return (start-1-stop)/-step + 1 + } + default: + panic("rangeLen: zero step") + } + return 0 +} + +func (r rangeValue) Slice(start, end, step int) Value { + newStart := r.start + r.step*start + newStop := r.start + r.step*end + newStep := r.step * step + return rangeValue{ + start: newStart, + stop: newStop, + step: newStep, + len: rangeLen(newStart, newStop, newStep), + } +} + +func (r rangeValue) Freeze() {} // immutable +func (r rangeValue) String() string { + if r.step != 1 { + return fmt.Sprintf("range(%d, %d, %d)", r.start, r.stop, r.step) + } else if r.start != 0 { + return fmt.Sprintf("range(%d, %d)", r.start, r.stop) + } else { + return fmt.Sprintf("range(%d)", r.stop) + } +} +func (r rangeValue) Type() string { return "range" } +func (r rangeValue) Truth() Bool { return r.len > 0 } +func (r rangeValue) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: range") } + +func (x rangeValue) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(rangeValue) + switch op { + case syntax.EQL: + return rangeEqual(x, y), nil + case syntax.NEQ: + return !rangeEqual(x, y), nil + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func rangeEqual(x, y rangeValue) bool { + // Two ranges compare equal if they denote the same sequence. + if x.len != y.len { + return false // sequences differ in length + } + if x.len == 0 { + return true // both sequences are empty + } + if x.start != y.start { + return false // first element differs + } + return x.len == 1 || x.step == y.step +} + +func (r rangeValue) contains(x Int) bool { + x32, err := AsInt32(x) + if err != nil { + return false // out of range + } + delta := x32 - r.start + quo, rem := delta/r.step, delta%r.step + return rem == 0 && 0 <= quo && quo < r.len +} + +type rangeIterator struct { + r rangeValue + i int +} + +func (it *rangeIterator) Next(p *Value) bool { + if it.i < it.r.len { + *p = it.r.Index(it.i) + it.i++ + return true + } + return false +} +func (*rangeIterator) Done() {} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#repr +func repr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + if err := UnpackPositionalArgs("repr", args, kwargs, 1, &x); err != nil { + return nil, err + } + return String(x.String()), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#reversed +func reversed(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("reversed", args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var elems []Value + if n := Len(args[0]); n >= 0 { + elems = make([]Value, 0, n) // preallocate if length known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + n := len(elems) + for i := 0; i < n>>1; i++ { + elems[i], elems[n-1-i] = elems[n-1-i], elems[i] + } + return NewList(elems), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set +func set(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("set", args, kwargs, 0, &iterable); err != nil { + return nil, err + } + set := new(Set) + if iterable != nil { + iter := iterable.Iterate() + defer iter.Done() + var x Value + for iter.Next(&x) { + if err := set.Insert(x); err != nil { + return nil, nameErr(b, err) + } + } + } + return set, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#sorted +func sorted(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + // Oddly, Python's sorted permits all arguments to be positional, thus so do we. + var iterable Iterable + var key Callable + var reverse bool + if err := UnpackArgs("sorted", args, kwargs, + "iterable", &iterable, + "key?", &key, + "reverse?", &reverse, + ); err != nil { + return nil, err + } + + iter := iterable.Iterate() + defer iter.Done() + var values []Value + if n := Len(iterable); n > 0 { + values = make(Tuple, 0, n) // preallocate if length is known + } + var x Value + for iter.Next(&x) { + values = append(values, x) + } + + // Derive keys from values by applying key function. + var keys []Value + if key != nil { + keys = make([]Value, len(values)) + for i, v := range values { + k, err := Call(thread, key, Tuple{v}, nil) + if err != nil { + return nil, err // to preserve backtrace, don't modify error + } + keys[i] = k + } + } + + slice := &sortSlice{keys: keys, values: values} + if reverse { + sort.Stable(sort.Reverse(slice)) + } else { + sort.Stable(slice) + } + return NewList(slice.values), slice.err +} + +type sortSlice struct { + keys []Value // nil => values[i] is key + values []Value + err error +} + +func (s *sortSlice) Len() int { return len(s.values) } +func (s *sortSlice) Less(i, j int) bool { + keys := s.keys + if s.keys == nil { + keys = s.values + } + ok, err := Compare(syntax.LT, keys[i], keys[j]) + if err != nil { + s.err = err + } + return ok +} +func (s *sortSlice) Swap(i, j int) { + if s.keys != nil { + s.keys[i], s.keys[j] = s.keys[j], s.keys[i] + } + s.values[i], s.values[j] = s.values[j], s.values[i] +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#str +func str(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("str does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("str: got %d arguments, want exactly 1", len(args)) + } + switch x := args[0].(type) { + case String: + return x, nil + case Bytes: + // Invalid encodings are replaced by that of U+FFFD. + return String(utf8Transcode(string(x))), nil + default: + return String(x.String()), nil + } +} + +// utf8Transcode returns the UTF-8-to-UTF-8 transcoding of s. +// The effect is that each code unit that is part of an +// invalid sequence is replaced by U+FFFD. +func utf8Transcode(s string) string { + if utf8.ValidString(s) { + return s + } + var out strings.Builder + for _, r := range s { + out.WriteRune(r) + } + return out.String() +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#tuple +func tuple(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("tuple", args, kwargs, 0, &iterable); err != nil { + return nil, err + } + if len(args) == 0 { + return Tuple(nil), nil + } + iter := iterable.Iterate() + defer iter.Done() + var elems Tuple + if n := Len(iterable); n > 0 { + elems = make(Tuple, 0, n) // preallocate if length is known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + return elems, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#type +func type_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("type does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("type: got %d arguments, want exactly 1", len(args)) + } + return String(args[0].Type()), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#zip +func zip(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("zip does not accept keyword arguments") + } + rows, cols := 0, len(args) + iters := make([]Iterator, cols) + defer func() { + for _, iter := range iters { + if iter != nil { + iter.Done() + } + } + }() + for i, seq := range args { + it := Iterate(seq) + if it == nil { + return nil, fmt.Errorf("zip: argument #%d is not iterable: %s", i+1, seq.Type()) + } + iters[i] = it + n := Len(seq) + if i == 0 || n < rows { + rows = n // possibly -1 + } + } + var result []Value + if rows >= 0 { + // length known + result = make([]Value, rows) + array := make(Tuple, cols*rows) // allocate a single backing array + for i := 0; i < rows; i++ { + tuple := array[:cols:cols] + array = array[cols:] + for j, iter := range iters { + iter.Next(&tuple[j]) + } + result[i] = tuple + } + } else { + // length not known + outer: + for { + tuple := make(Tuple, cols) + for i, iter := range iters { + if !iter.Next(&tuple[i]) { + break outer + } + } + result = append(result, tuple) + } + } + return NewList(result), nil +} + +// ---- methods of built-in types --- + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·get +func dict_get(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var key, dflt Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &key, &dflt); err != nil { + return nil, err + } + if v, ok, err := b.Receiver().(*Dict).Get(key); err != nil { + return nil, nameErr(b, err) + } else if ok { + return v, nil + } else if dflt != nil { + return dflt, nil + } + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·clear +func dict_clear(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + return None, b.Receiver().(*Dict).Clear() +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·items +func dict_items(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + items := b.Receiver().(*Dict).Items() + res := make([]Value, len(items)) + for i, item := range items { + res[i] = item // convert [2]Value to Value + } + return NewList(res), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·keys +func dict_keys(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + return NewList(b.Receiver().(*Dict).Keys()), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·pop +func dict_pop(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var k, d Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &k, &d); err != nil { + return nil, err + } + if v, found, err := b.Receiver().(*Dict).Delete(k); err != nil { + return nil, nameErr(b, err) // dict is frozen or key is unhashable + } else if found { + return v, nil + } else if d != nil { + return d, nil + } + return nil, nameErr(b, "missing key") +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·popitem +func dict_popitem(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := b.Receiver().(*Dict) + k, ok := recv.ht.first() + if !ok { + return nil, nameErr(b, "empty dict") + } + v, _, err := recv.Delete(k) + if err != nil { + return nil, nameErr(b, err) // dict is frozen + } + return Tuple{k, v}, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·setdefault +func dict_setdefault(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var key, dflt Value = nil, None + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &key, &dflt); err != nil { + return nil, err + } + dict := b.Receiver().(*Dict) + if v, ok, err := dict.Get(key); err != nil { + return nil, nameErr(b, err) + } else if ok { + return v, nil + } else if err := dict.SetKey(key, dflt); err != nil { + return nil, nameErr(b, err) + } else { + return dflt, nil + } +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·update +func dict_update(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(args) > 1 { + return nil, fmt.Errorf("update: got %d arguments, want at most 1", len(args)) + } + if err := updateDict(b.Receiver().(*Dict), args, kwargs); err != nil { + return nil, fmt.Errorf("update: %v", err) + } + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·update +func dict_values(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + items := b.Receiver().(*Dict).Items() + res := make([]Value, len(items)) + for i, item := range items { + res[i] = item[1] + } + return NewList(res), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·append +func list_append(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var object Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &object); err != nil { + return nil, err + } + recv := b.Receiver().(*List) + if err := recv.checkMutable("append to"); err != nil { + return nil, nameErr(b, err) + } + recv.elems = append(recv.elems, object) + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·clear +func list_clear(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + if err := b.Receiver().(*List).Clear(); err != nil { + return nil, nameErr(b, err) + } + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·extend +func list_extend(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := b.Receiver().(*List) + var iterable Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &iterable); err != nil { + return nil, err + } + if err := recv.checkMutable("extend"); err != nil { + return nil, nameErr(b, err) + } + listExtend(recv, iterable) + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·index +func list_index(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var value, start_, end_ Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &value, &start_, &end_); err != nil { + return nil, err + } + + recv := b.Receiver().(*List) + start, end, err := indices(start_, end_, recv.Len()) + if err != nil { + return nil, nameErr(b, err) + } + + for i := start; i < end; i++ { + if eq, err := Equal(recv.elems[i], value); err != nil { + return nil, nameErr(b, err) + } else if eq { + return MakeInt(i), nil + } + } + return nil, nameErr(b, "value not in list") +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·insert +func list_insert(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := b.Receiver().(*List) + var index int + var object Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 2, &index, &object); err != nil { + return nil, err + } + if err := recv.checkMutable("insert into"); err != nil { + return nil, nameErr(b, err) + } + + if index < 0 { + index += recv.Len() + } + + if index >= recv.Len() { + // end + recv.elems = append(recv.elems, object) + } else { + if index < 0 { + index = 0 // start + } + recv.elems = append(recv.elems, nil) + copy(recv.elems[index+1:], recv.elems[index:]) // slide up one + recv.elems[index] = object + } + return None, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·remove +func list_remove(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := b.Receiver().(*List) + var value Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &value); err != nil { + return nil, err + } + if err := recv.checkMutable("remove from"); err != nil { + return nil, nameErr(b, err) + } + for i, elem := range recv.elems { + if eq, err := Equal(elem, value); err != nil { + return nil, fmt.Errorf("remove: %v", err) + } else if eq { + recv.elems = append(recv.elems[:i], recv.elems[i+1:]...) + return None, nil + } + } + return nil, fmt.Errorf("remove: element not found") +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·pop +func list_pop(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := b.Receiver() + list := recv.(*List) + n := list.Len() + i := n - 1 + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &i); err != nil { + return nil, err + } + origI := i + if i < 0 { + i += n + } + if i < 0 || i >= n { + return nil, nameErr(b, outOfRange(origI, n, list)) + } + if err := list.checkMutable("pop from"); err != nil { + return nil, nameErr(b, err) + } + res := list.elems[i] + list.elems = append(list.elems[:i], list.elems[i+1:]...) + return res, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·capitalize +func string_capitalize(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + s := string(b.Receiver().(String)) + res := new(strings.Builder) + res.Grow(len(s)) + for i, r := range s { + if i == 0 { + r = unicode.ToTitle(r) + } else { + r = unicode.ToLower(r) + } + res.WriteRune(r) + } + return String(res.String()), nil +} + +// string_iterable returns an unspecified iterable value whose iterator yields: +// - elems: successive 1-byte substrings +// - codepoints: successive substrings that encode a single Unicode code point. +// - elem_ords: numeric values of successive bytes +// - codepoint_ords: numeric values of successive Unicode code points +func string_iterable(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + s := b.Receiver().(String) + ords := b.Name()[len(b.Name())-2] == 'd' + codepoints := b.Name()[0] == 'c' + if codepoints { + return stringCodepoints{s, ords}, nil + } else { + return stringElems{s, ords}, nil + } +} + +// bytes_elems returns an unspecified iterable value whose +// iterator yields the int values of successive elements. +func bytes_elems(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + return bytesIterable{b.Receiver().(Bytes)}, nil +} + +// A bytesIterable is an iterable returned by bytes.elems(), +// whose iterator yields a sequence of numeric bytes values. +type bytesIterable struct{ bytes Bytes } + +var _ Iterable = (*bytesIterable)(nil) + +func (bi bytesIterable) String() string { return bi.bytes.String() + ".elems()" } +func (bi bytesIterable) Type() string { return "bytes.elems" } +func (bi bytesIterable) Freeze() {} // immutable +func (bi bytesIterable) Truth() Bool { return True } +func (bi bytesIterable) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", bi.Type()) } +func (bi bytesIterable) Iterate() Iterator { return &bytesIterator{bi.bytes} } + +type bytesIterator struct{ bytes Bytes } + +func (it *bytesIterator) Next(p *Value) bool { + if it.bytes == "" { + return false + } + *p = MakeInt(int(it.bytes[0])) + it.bytes = it.bytes[1:] + return true +} + +func (*bytesIterator) Done() {} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·count +func string_count(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var sub string + var start_, end_ Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &sub, &start_, &end_); err != nil { + return nil, err + } + + recv := string(b.Receiver().(String)) + start, end, err := indices(start_, end_, len(recv)) + if err != nil { + return nil, nameErr(b, err) + } + + var slice string + if start < end { + slice = recv[start:end] + } + return MakeInt(strings.Count(slice, sub)), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isalnum +func string_isalnum(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + for _, r := range recv { + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isalpha +func string_isalpha(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + for _, r := range recv { + if !unicode.IsLetter(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isdigit +func string_isdigit(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + for _, r := range recv { + if !unicode.IsDigit(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·islower +func string_islower(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + return Bool(isCasedString(recv) && recv == strings.ToLower(recv)), nil +} + +// isCasedString reports whether its argument contains any cased code points. +func isCasedString(s string) bool { + for _, r := range s { + if isCasedRune(r) { + return true + } + } + return false +} + +func isCasedRune(r rune) bool { + // It's unclear what the correct behavior is for a rune such as 'ffi', + // a lowercase letter with no upper or title case and no SimpleFold. + return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || unicode.SimpleFold(r) != r +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isspace +func string_isspace(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + for _, r := range recv { + if !unicode.IsSpace(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·istitle +func string_istitle(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + + // Python semantics differ from x==strings.{To,}Title(x) in Go: + // "uppercase characters may only follow uncased characters and + // lowercase characters only cased ones." + var cased, prevCased bool + for _, r := range recv { + if 'A' <= r && r <= 'Z' || unicode.IsTitle(r) { // e.g. "Dž" + if prevCased { + return False, nil + } + prevCased = true + cased = true + } else if unicode.IsLower(r) { + if !prevCased { + return False, nil + } + prevCased = true + cased = true + } else if unicode.IsUpper(r) { + return False, nil + } else { + prevCased = false + } + } + return Bool(cased), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isupper +func string_isupper(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + return Bool(isCasedString(recv) && recv == strings.ToUpper(recv)), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·find +func string_find(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(b, args, kwargs, true, false) +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·format +func string_format(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + format := string(b.Receiver().(String)) + var auto, manual bool // kinds of positional indexing used + buf := new(strings.Builder) + index := 0 + for { + literal := format + i := strings.IndexByte(format, '{') + if i >= 0 { + literal = format[:i] + } + + // Replace "}}" with "}" in non-field portion, rejecting a lone '}'. + for { + j := strings.IndexByte(literal, '}') + if j < 0 { + buf.WriteString(literal) + break + } + if len(literal) == j+1 || literal[j+1] != '}' { + return nil, fmt.Errorf("format: single '}' in format") + } + buf.WriteString(literal[:j+1]) + literal = literal[j+2:] + } + + if i < 0 { + break // end of format string + } + + if i+1 < len(format) && format[i+1] == '{' { + // "{{" means a literal '{' + buf.WriteByte('{') + format = format[i+2:] + continue + } + + format = format[i+1:] + i = strings.IndexByte(format, '}') + if i < 0 { + return nil, fmt.Errorf("format: unmatched '{' in format") + } + + var arg Value + conv := "s" + var spec string + + field := format[:i] + format = format[i+1:] + + var name string + if i := strings.IndexByte(field, '!'); i < 0 { + // "name" or "name:spec" + if i := strings.IndexByte(field, ':'); i < 0 { + name = field + } else { + name = field[:i] + spec = field[i+1:] + } + } else { + // "name!conv" or "name!conv:spec" + name = field[:i] + field = field[i+1:] + // "conv" or "conv:spec" + if i := strings.IndexByte(field, ':'); i < 0 { + conv = field + } else { + conv = field[:i] + spec = field[i+1:] + } + } + + if name == "" { + // "{}": automatic indexing + if manual { + return nil, fmt.Errorf("format: cannot switch from manual field specification to automatic field numbering") + } + auto = true + if index >= len(args) { + return nil, fmt.Errorf("format: tuple index out of range") + } + arg = args[index] + index++ + } else if num, ok := decimal(name); ok { + // positional argument + if auto { + return nil, fmt.Errorf("format: cannot switch from automatic field numbering to manual field specification") + } + manual = true + if num >= len(args) { + return nil, fmt.Errorf("format: tuple index out of range") + } else { + arg = args[num] + } + } else { + // keyword argument + for _, kv := range kwargs { + if string(kv[0].(String)) == name { + arg = kv[1] + break + } + } + if arg == nil { + // Starlark does not support Python's x.y or a[i] syntaxes, + // or nested use of {...}. + if strings.Contains(name, ".") { + return nil, fmt.Errorf("format: attribute syntax x.y is not supported in replacement fields: %s", name) + } + if strings.Contains(name, "[") { + return nil, fmt.Errorf("format: element syntax a[i] is not supported in replacement fields: %s", name) + } + if strings.Contains(name, "{") { + return nil, fmt.Errorf("format: nested replacement fields not supported") + } + return nil, fmt.Errorf("format: keyword %s not found", name) + } + } + + if spec != "" { + // Starlark does not support Python's format_spec features. + return nil, fmt.Errorf("format spec features not supported in replacement fields: %s", spec) + } + + switch conv { + case "s": + if str, ok := AsString(arg); ok { + buf.WriteString(str) + } else { + writeValue(buf, arg, nil) + } + case "r": + writeValue(buf, arg, nil) + default: + return nil, fmt.Errorf("format: unknown conversion %q", conv) + } + } + return String(buf.String()), nil +} + +// decimal interprets s as a sequence of decimal digits. +func decimal(s string) (x int, ok bool) { + n := len(s) + for i := 0; i < n; i++ { + digit := s[i] - '0' + if digit > 9 { + return 0, false + } + x = x*10 + int(digit) + if x < 0 { + return 0, false // underflow + } + } + return x, true +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·index +func string_index(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(b, args, kwargs, false, false) +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·join +func string_join(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(b.Receiver().(String)) + var iterable Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + buf := new(strings.Builder) + var x Value + for i := 0; iter.Next(&x); i++ { + if i > 0 { + buf.WriteString(recv) + } + s, ok := AsString(x) + if !ok { + return nil, fmt.Errorf("join: in list, want string, got %s", x.Type()) + } + buf.WriteString(s) + } + return String(buf.String()), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·lower +func string_lower(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.ToLower(string(b.Receiver().(String)))), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·partition +func string_partition(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(b.Receiver().(String)) + var sep string + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &sep); err != nil { + return nil, err + } + if sep == "" { + return nil, nameErr(b, "empty separator") + } + var i int + if b.Name()[0] == 'p' { + i = strings.Index(recv, sep) // partition + } else { + i = strings.LastIndex(recv, sep) // rpartition + } + tuple := make(Tuple, 0, 3) + if i < 0 { + if b.Name()[0] == 'p' { + tuple = append(tuple, String(recv), String(""), String("")) + } else { + tuple = append(tuple, String(""), String(""), String(recv)) + } + } else { + tuple = append(tuple, String(recv[:i]), String(sep), String(recv[i+len(sep):])) + } + return tuple, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·replace +func string_replace(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(b.Receiver().(String)) + var old, new string + count := -1 + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 2, &old, &new, &count); err != nil { + return nil, err + } + return String(strings.Replace(recv, old, new, count)), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rfind +func string_rfind(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(b, args, kwargs, true, true) +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rindex +func string_rindex(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(b, args, kwargs, false, true) +} + +// https://github.com/google/starlark-go/starlark/blob/master/doc/spec.md#string·startswith +// https://github.com/google/starlark-go/starlark/blob/master/doc/spec.md#string·endswith +func string_startswith(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + var start, end Value = None, None + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x, &start, &end); err != nil { + return nil, err + } + + // compute effective substring. + s := string(b.Receiver().(String)) + if start, end, err := indices(start, end, len(s)); err != nil { + return nil, nameErr(b, err) + } else { + if end < start { + end = start // => empty result + } + s = s[start:end] + } + + f := strings.HasPrefix + if b.Name()[0] == 'e' { // endswith + f = strings.HasSuffix + } + + switch x := x.(type) { + case Tuple: + for i, x := range x { + prefix, ok := AsString(x) + if !ok { + return nil, fmt.Errorf("%s: want string, got %s, for element %d", + b.Name(), x.Type(), i) + } + if f(s, prefix) { + return True, nil + } + } + return False, nil + case String: + return Bool(f(s, string(x))), nil + } + return nil, fmt.Errorf("%s: got %s, want string or tuple of string", b.Name(), x.Type()) +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·strip +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·lstrip +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rstrip +func string_strip(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var chars string + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &chars); err != nil { + return nil, err + } + recv := string(b.Receiver().(String)) + var s string + switch b.Name()[0] { + case 's': // strip + if chars != "" { + s = strings.Trim(recv, chars) + } else { + s = strings.TrimSpace(recv) + } + case 'l': // lstrip + if chars != "" { + s = strings.TrimLeft(recv, chars) + } else { + s = strings.TrimLeftFunc(recv, unicode.IsSpace) + } + case 'r': // rstrip + if chars != "" { + s = strings.TrimRight(recv, chars) + } else { + s = strings.TrimRightFunc(recv, unicode.IsSpace) + } + } + return String(s), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·title +func string_title(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + + s := string(b.Receiver().(String)) + + // Python semantics differ from x==strings.{To,}Title(x) in Go: + // "uppercase characters may only follow uncased characters and + // lowercase characters only cased ones." + buf := new(strings.Builder) + buf.Grow(len(s)) + var prevCased bool + for _, r := range s { + if prevCased { + r = unicode.ToLower(r) + } else { + r = unicode.ToTitle(r) + } + prevCased = isCasedRune(r) + buf.WriteRune(r) + } + return String(buf.String()), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·upper +func string_upper(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.ToUpper(string(b.Receiver().(String)))), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·split +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rsplit +func string_split(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(b.Receiver().(String)) + var sep_ Value + maxsplit := -1 + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &sep_, &maxsplit); err != nil { + return nil, err + } + + var res []string + + if sep_ == nil || sep_ == None { + // special case: split on whitespace + if maxsplit < 0 { + res = strings.Fields(recv) + } else if b.Name() == "split" { + res = splitspace(recv, maxsplit) + } else { // rsplit + res = rsplitspace(recv, maxsplit) + } + + } else if sep, ok := AsString(sep_); ok { + if sep == "" { + return nil, fmt.Errorf("split: empty separator") + } + // usual case: split on non-empty separator + if maxsplit < 0 { + res = strings.Split(recv, sep) + } else if b.Name() == "split" { + res = strings.SplitN(recv, sep, maxsplit+1) + } else { // rsplit + res = strings.Split(recv, sep) + if excess := len(res) - maxsplit; excess > 0 { + res[0] = strings.Join(res[:excess], sep) + res = append(res[:1], res[excess:]...) + } + } + + } else { + return nil, fmt.Errorf("split: got %s for separator, want string", sep_.Type()) + } + + list := make([]Value, len(res)) + for i, x := range res { + list[i] = String(x) + } + return NewList(list), nil +} + +// Precondition: max >= 0. +func rsplitspace(s string, max int) []string { + res := make([]string, 0, max+1) + end := -1 // index of field end, or -1 in a region of spaces. + for i := len(s); i > 0; { + r, sz := utf8.DecodeLastRuneInString(s[:i]) + if unicode.IsSpace(r) { + if end >= 0 { + if len(res) == max { + break // let this field run to the start + } + res = append(res, s[i:end]) + end = -1 + } + } else if end < 0 { + end = i + } + i -= sz + } + if end >= 0 { + res = append(res, s[:end]) + } + + resLen := len(res) + for i := 0; i < resLen/2; i++ { + res[i], res[resLen-1-i] = res[resLen-1-i], res[i] + } + + return res +} + +// Precondition: max >= 0. +func splitspace(s string, max int) []string { + var res []string + start := -1 // index of field start, or -1 in a region of spaces + for i, r := range s { + if unicode.IsSpace(r) { + if start >= 0 { + if len(res) == max { + break // let this field run to the end + } + res = append(res, s[start:i]) + start = -1 + } + } else if start == -1 { + start = i + } + } + if start >= 0 { + res = append(res, s[start:]) + } + return res +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·splitlines +func string_splitlines(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var keepends bool + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &keepends); err != nil { + return nil, err + } + var lines []string + if s := string(b.Receiver().(String)); s != "" { + // TODO(adonovan): handle CRLF correctly. + if keepends { + lines = strings.SplitAfter(s, "\n") + } else { + lines = strings.Split(s, "\n") + } + if strings.HasSuffix(s, "\n") { + lines = lines[:len(lines)-1] + } + } + list := make([]Value, len(lines)) + for i, x := range lines { + list[i] = String(x) + } + return NewList(list), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set·union. +func set_union(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + union, err := b.Receiver().(*Set).Union(iter) + if err != nil { + return nil, nameErr(b, err) + } + return union, nil +} + +// Common implementation of string_{r}{find,index}. +func string_find_impl(b *Builtin, args Tuple, kwargs []Tuple, allowError, last bool) (Value, error) { + var sub string + var start_, end_ Value + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &sub, &start_, &end_); err != nil { + return nil, err + } + + s := string(b.Receiver().(String)) + start, end, err := indices(start_, end_, len(s)) + if err != nil { + return nil, nameErr(b, err) + } + var slice string + if start < end { + slice = s[start:end] + } + + var i int + if last { + i = strings.LastIndex(slice, sub) + } else { + i = strings.Index(slice, sub) + } + if i < 0 { + if !allowError { + return nil, nameErr(b, "substring not found") + } + return MakeInt(-1), nil + } + return MakeInt(i + start), nil +} + +// Common implementation of builtin dict function and dict.update method. +// Precondition: len(updates) == 0 or 1. +func updateDict(dict *Dict, updates Tuple, kwargs []Tuple) error { + if len(updates) == 1 { + switch updates := updates[0].(type) { + case IterableMapping: + // Iterate over dict's key/value pairs, not just keys. + for _, item := range updates.Items() { + if err := dict.SetKey(item[0], item[1]); err != nil { + return err // dict is frozen + } + } + default: + // all other sequences + iter := Iterate(updates) + if iter == nil { + return fmt.Errorf("got %s, want iterable", updates.Type()) + } + defer iter.Done() + var pair Value + for i := 0; iter.Next(&pair); i++ { + iter2 := Iterate(pair) + if iter2 == nil { + return fmt.Errorf("dictionary update sequence element #%d is not iterable (%s)", i, pair.Type()) + + } + defer iter2.Done() + len := Len(pair) + if len < 0 { + return fmt.Errorf("dictionary update sequence element #%d has unknown length (%s)", i, pair.Type()) + } else if len != 2 { + return fmt.Errorf("dictionary update sequence element #%d has length %d, want 2", i, len) + } + var k, v Value + iter2.Next(&k) + iter2.Next(&v) + if err := dict.SetKey(k, v); err != nil { + return err + } + } + } + } + + // Then add the kwargs. + before := dict.Len() + for _, pair := range kwargs { + if err := dict.SetKey(pair[0], pair[1]); err != nil { + return err // dict is frozen + } + } + // In the common case, each kwarg will add another dict entry. + // If that's not so, check whether it is because there was a duplicate kwarg. + if dict.Len() < before+len(kwargs) { + keys := make(map[String]bool, len(kwargs)) + for _, kv := range kwargs { + k := kv[0].(String) + if keys[k] { + return fmt.Errorf("duplicate keyword arg: %v", k) + } + keys[k] = true + } + } + + return nil +} + +// nameErr returns an error message of the form "name: msg" +// where name is b.Name() and msg is a string or error. +func nameErr(b *Builtin, msg interface{}) error { + return fmt.Errorf("%s: %v", b.Name(), msg) +} diff --git a/starlark/profile.go b/starlark/profile.go new file mode 100644 index 0000000..38da2b2 --- /dev/null +++ b/starlark/profile.go @@ -0,0 +1,449 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +// This file defines a simple execution-time profiler for Starlark. +// It measures the wall time spent executing Starlark code, and emits a +// gzipped protocol message in pprof format (github.com/google/pprof). +// +// When profiling is enabled, the interpreter calls the profiler to +// indicate the start and end of each "span" or time interval. A leaf +// function (whether Go or Starlark) has a single span. A function that +// calls another function has spans for each interval in which it is the +// top of the stack. (A LOAD instruction also ends a span.) +// +// At the start of a span, the interpreter records the current time in +// the thread's topmost frame. At the end of the span, it obtains the +// time again and subtracts the span start time. The difference is added +// to an accumulator variable in the thread. If the accumulator exceeds +// some fixed quantum (10ms, say), the profiler records the current call +// stack and sends it to the profiler goroutine, along with the number +// of quanta, which are subtracted. For example, if the accumulator +// holds 3ms and then a completed span adds 25ms to it, its value is 28ms, +// which exceeeds 10ms. The profiler records a stack with the value 20ms +// (2 quanta), and the accumulator is left with 8ms. +// +// The profiler goroutine converts the stacks into the pprof format and +// emits a gzip-compressed protocol message to the designated output +// file. We use a hand-written streaming proto encoder to avoid +// dependencies on pprof and proto, and to avoid the need to +// materialize the profile data structure in memory. +// +// A limitation of this profiler is that it measures wall time, which +// does not necessarily correspond to CPU time. A CPU profiler requires +// that only running (not runnable) threads are sampled; this is +// commonly achieved by having the kernel deliver a (PROF) signal to an +// arbitrary running thread, through setitimer(2). The CPU profiler in the +// Go runtime uses this mechanism, but it is not possible for a Go +// application to register a SIGPROF handler, nor is it possible for a +// Go handler for some other signal to read the stack pointer of +// the interrupted thread. +// +// Two caveats: +// (1) it is tempting to send the leaf Frame directly to the profiler +// goroutine instead of making a copy of the stack, since a Frame is a +// spaghetti stack--a linked list. However, as soon as execution +// resumes, the stack's Frame.pc values may be mutated, so Frames are +// not safe to share with the asynchronous profiler goroutine. +// (2) it is tempting to use Callables as keys in a map when tabulating +// the pprof protocols's Function entities. However, we cannot assume +// that Callables are valid map keys, and furthermore we must not +// pin function values in memory indefinitely as this may cause lambda +// values to keep their free variables live much longer than necessary. + +// TODO(adonovan): +// - make Start/Stop fully thread-safe. +// - fix the pc hack. +// - experiment with other values of quantum. + +import ( + "bufio" + "bytes" + "compress/gzip" + "encoding/binary" + "fmt" + "io" + "log" + "reflect" + "sync/atomic" + "time" + "unsafe" + + "go.starlark.net/syntax" +) + +// StartProfile enables time profiling of all Starlark threads, +// and writes a profile in pprof format to w. +// It must be followed by a call to StopProfiler to stop +// the profiler and finalize the profile. +// +// StartProfile returns an error if profiling was already enabled. +// +// StartProfile must not be called concurrently with Starlark execution. +func StartProfile(w io.Writer) error { + if !atomic.CompareAndSwapUint32(&profiler.on, 0, 1) { + return fmt.Errorf("profiler already running") + } + + // TODO(adonovan): make the API fully concurrency-safe. + // The main challenge is racy reads/writes of profiler.events, + // and of send/close races on the channel it refers to. + // It's easy to solve them with a mutex but harder to do + // it efficiently. + + profiler.events = make(chan *profEvent, 1) + profiler.done = make(chan error) + + go profile(w) + + return nil +} + +// StopProfiler stops the profiler started by a prior call to +// StartProfile and finalizes the profile. It returns an error if the +// profile could not be completed. +// +// StopProfiler must not be called concurrently with Starlark execution. +func StopProfile() error { + // Terminate the profiler goroutine and get its result. + close(profiler.events) + err := <-profiler.done + + profiler.done = nil + profiler.events = nil + atomic.StoreUint32(&profiler.on, 0) + + return err +} + +// globals +var profiler struct { + on uint32 // nonzero => profiler running + events chan *profEvent // profile events from interpreter threads + done chan error // indicates profiler goroutine is ready +} + +func (thread *Thread) beginProfSpan() { + if profiler.events == nil { + return // profiling not enabled + } + + thread.frameAt(0).spanStart = nanotime() +} + +// TODO(adonovan): experiment with smaller values, +// which trade space and time for greater precision. +const quantum = 10 * time.Millisecond + +func (thread *Thread) endProfSpan() { + if profiler.events == nil { + return // profiling not enabled + } + + // Add the span to the thread's accumulator. + thread.proftime += time.Duration(nanotime() - thread.frameAt(0).spanStart) + if thread.proftime < quantum { + return + } + + // Only record complete quanta. + n := thread.proftime / quantum + thread.proftime -= n * quantum + + // Copy the stack. + // (We can't save thread.frame because its pc will change.) + ev := &profEvent{ + thread: thread, + time: n * quantum, + } + ev.stack = ev.stackSpace[:0] + for i := range thread.stack { + fr := thread.frameAt(i) + ev.stack = append(ev.stack, profFrame{ + pos: fr.Position(), + fn: fr.Callable(), + pc: fr.pc, + }) + } + + profiler.events <- ev +} + +type profEvent struct { + thread *Thread // currently unused + time time.Duration + stack []profFrame + stackSpace [8]profFrame // initial space for stack +} + +type profFrame struct { + fn Callable // don't hold this live for too long (prevents GC of lambdas) + pc uint32 // program counter (Starlark frames only) + pos syntax.Position // position of pc within this frame +} + +// profile is the profiler goroutine. +// It runs until StopProfiler is called. +func profile(w io.Writer) { + // Field numbers from pprof protocol. + // See https://github.com/google/pprof/blob/master/proto/profile.proto + const ( + Profile_sample_type = 1 // repeated ValueType + Profile_sample = 2 // repeated Sample + Profile_mapping = 3 // repeated Mapping + Profile_location = 4 // repeated Location + Profile_function = 5 // repeated Function + Profile_string_table = 6 // repeated string + Profile_time_nanos = 9 // int64 + Profile_duration_nanos = 10 // int64 + Profile_period_type = 11 // ValueType + Profile_period = 12 // int64 + + ValueType_type = 1 // int64 + ValueType_unit = 2 // int64 + + Sample_location_id = 1 // repeated uint64 + Sample_value = 2 // repeated int64 + Sample_label = 3 // repeated Label + + Label_key = 1 // int64 + Label_str = 2 // int64 + Label_num = 3 // int64 + Label_num_unit = 4 // int64 + + Location_id = 1 // uint64 + Location_mapping_id = 2 // uint64 + Location_address = 3 // uint64 + Location_line = 4 // repeated Line + + Line_function_id = 1 // uint64 + Line_line = 2 // int64 + + Function_id = 1 // uint64 + Function_name = 2 // int64 + Function_system_name = 3 // int64 + Function_filename = 4 // int64 + Function_start_line = 5 // int64 + ) + + bufw := bufio.NewWriter(w) // write file in 4KB (not 240B flate-sized) chunks + gz := gzip.NewWriter(bufw) + enc := protoEncoder{w: gz} + + // strings + stringIndex := make(map[string]int64) + str := func(s string) int64 { + i, ok := stringIndex[s] + if !ok { + i = int64(len(stringIndex)) + enc.string(Profile_string_table, s) + stringIndex[s] = i + } + return i + } + str("") // entry 0 + + // functions + // + // function returns the ID of a Callable for use in Line.FunctionId. + // The ID is the same as the function's logical address, + // which is supplied by the caller to avoid the need to recompute it. + functionId := make(map[uintptr]uint64) + function := func(fn Callable, addr uintptr) uint64 { + id, ok := functionId[addr] + if !ok { + id = uint64(addr) + + var pos syntax.Position + if fn, ok := fn.(callableWithPosition); ok { + pos = fn.Position() + } + + name := fn.Name() + if name == "<toplevel>" { + name = pos.Filename() + } + + nameIndex := str(name) + + fun := new(bytes.Buffer) + funenc := protoEncoder{w: fun} + funenc.uint(Function_id, id) + funenc.int(Function_name, nameIndex) + funenc.int(Function_system_name, nameIndex) + funenc.int(Function_filename, str(pos.Filename())) + funenc.int(Function_start_line, int64(pos.Line)) + enc.bytes(Profile_function, fun.Bytes()) + + functionId[addr] = id + } + return id + } + + // locations + // + // location returns the ID of the location denoted by fr. + // For Starlark frames, this is the Frame pc. + locationId := make(map[uintptr]uint64) + location := func(fr profFrame) uint64 { + fnAddr := profFuncAddr(fr.fn) + + // For Starlark functions, the frame position + // represents the current PC value. + // Mix it into the low bits of the address. + // This is super hacky and may result in collisions + // in large functions or if functions are numerous. + // TODO(adonovan): fix: try making this cleaner by treating + // each bytecode segment as a Profile.Mapping. + pcAddr := fnAddr + if _, ok := fr.fn.(*Function); ok { + pcAddr = (pcAddr << 16) ^ uintptr(fr.pc) + } + + id, ok := locationId[pcAddr] + if !ok { + id = uint64(pcAddr) + + line := new(bytes.Buffer) + lineenc := protoEncoder{w: line} + lineenc.uint(Line_function_id, function(fr.fn, fnAddr)) + lineenc.int(Line_line, int64(fr.pos.Line)) + loc := new(bytes.Buffer) + locenc := protoEncoder{w: loc} + locenc.uint(Location_id, id) + locenc.uint(Location_address, uint64(pcAddr)) + locenc.bytes(Location_line, line.Bytes()) + enc.bytes(Profile_location, loc.Bytes()) + + locationId[pcAddr] = id + } + return id + } + + wallNanos := new(bytes.Buffer) + wnenc := protoEncoder{w: wallNanos} + wnenc.int(ValueType_type, str("wall")) + wnenc.int(ValueType_unit, str("nanoseconds")) + + // informational fields of Profile + enc.bytes(Profile_sample_type, wallNanos.Bytes()) + enc.int(Profile_period, quantum.Nanoseconds()) // magnitude of sampling period + enc.bytes(Profile_period_type, wallNanos.Bytes()) // dimension and unit of period + enc.int(Profile_time_nanos, time.Now().UnixNano()) // start (real) time of profile + + startNano := nanotime() + + // Read profile events from the channel + // until it is closed by StopProfiler. + for e := range profiler.events { + sample := new(bytes.Buffer) + sampleenc := protoEncoder{w: sample} + sampleenc.int(Sample_value, e.time.Nanoseconds()) // wall nanoseconds + for _, fr := range e.stack { + sampleenc.uint(Sample_location_id, location(fr)) + } + enc.bytes(Profile_sample, sample.Bytes()) + } + + endNano := nanotime() + enc.int(Profile_duration_nanos, endNano-startNano) + + err := gz.Close() // Close reports any prior write error + if flushErr := bufw.Flush(); err == nil { + err = flushErr + } + profiler.done <- err +} + +// nanotime returns the time in nanoseconds since epoch. +// It is implemented by runtime.nanotime using the linkname hack; +// runtime.nanotime is defined for all OSs/ARCHS and uses the +// monotonic system clock, which there is no portable way to access. +// Should that function ever go away, these alternatives exist: +// +// // POSIX only. REALTIME not MONOTONIC. 17ns. +// var tv syscall.Timeval +// syscall.Gettimeofday(&tv) // can't fail +// return tv.Nano() +// +// // Portable. REALTIME not MONOTONIC. 46ns. +// return time.Now().Nanoseconds() +// +// // POSIX only. Adds a dependency. +// import "golang.org/x/sys/unix" +// var ts unix.Timespec +// unix.ClockGettime(CLOCK_MONOTONIC, &ts) // can't fail +// return unix.TimespecToNsec(ts) +// +//go:linkname nanotime runtime.nanotime +func nanotime() int64 + +// profFuncAddr returns the canonical "address" +// of a Callable for use by the profiler. +func profFuncAddr(fn Callable) uintptr { + switch fn := fn.(type) { + case *Builtin: + return reflect.ValueOf(fn.fn).Pointer() + case *Function: + return uintptr(unsafe.Pointer(fn.funcode)) + } + + // User-defined callable types are typically of + // of kind pointer-to-struct. Handle them specially. + if v := reflect.ValueOf(fn); v.Type().Kind() == reflect.Ptr { + return v.Pointer() + } + + // Address zero is reserved by the protocol. + // Use 1 for callables we don't recognize. + log.Printf("Starlark profiler: no address for Callable %T", fn) + return 1 +} + +// We encode the protocol message by hand to avoid making +// the interpreter depend on both github.com/google/pprof +// and github.com/golang/protobuf. +// +// This also avoids the need to materialize a protocol message object +// tree of unbounded size and serialize it all at the end. +// The pprof format appears to have been designed to +// permit streaming implementations such as this one. +// +// See https://developers.google.com/protocol-buffers/docs/encoding. +type protoEncoder struct { + w io.Writer // *bytes.Buffer or *gzip.Writer + tmp [binary.MaxVarintLen64]byte +} + +func (e *protoEncoder) uvarint(x uint64) { + n := binary.PutUvarint(e.tmp[:], x) + e.w.Write(e.tmp[:n]) +} + +func (e *protoEncoder) tag(field, wire uint) { + e.uvarint(uint64(field<<3 | wire)) +} + +func (e *protoEncoder) string(field uint, s string) { + e.tag(field, 2) // length-delimited + e.uvarint(uint64(len(s))) + io.WriteString(e.w, s) +} + +func (e *protoEncoder) bytes(field uint, b []byte) { + e.tag(field, 2) // length-delimited + e.uvarint(uint64(len(b))) + e.w.Write(b) +} + +func (e *protoEncoder) uint(field uint, x uint64) { + e.tag(field, 0) // varint + e.uvarint(x) +} + +func (e *protoEncoder) int(field uint, x int64) { + e.tag(field, 0) // varint + e.uvarint(uint64(x)) +} diff --git a/starlark/profile_test.go b/starlark/profile_test.go new file mode 100644 index 0000000..2781833 --- /dev/null +++ b/starlark/profile_test.go @@ -0,0 +1,83 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark_test + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "os/exec" + "strings" + "testing" + + "go.starlark.net/starlark" +) + +// TestProfile is a simple integration test that the profiler +// emits minimally plausible pprof-compatible output. +func TestProfile(t *testing.T) { + prof, err := ioutil.TempFile("", "profile_test") + if err != nil { + t.Fatal(err) + } + defer prof.Close() + defer os.Remove(prof.Name()) + if err := starlark.StartProfile(prof); err != nil { + t.Fatal(err) + } + + const src = ` +def fibonacci(n): + res = list(range(n)) + for i in res[2:]: + res[i] = res[i-2] + res[i-1] + return res + +fibonacci(100000) +` + + thread := new(starlark.Thread) + if _, err := starlark.ExecFile(thread, "foo.star", src, nil); err != nil { + _ = starlark.StopProfile() + t.Fatal(err) + } + if err := starlark.StopProfile(); err != nil { + t.Fatal(err) + } + prof.Sync() + cmd := exec.Command("go", "tool", "pprof", "-top", prof.Name()) + cmd.Stderr = new(bytes.Buffer) + cmd.Stdout = new(bytes.Buffer) + if err := cmd.Run(); err != nil { + t.Fatalf("pprof failed: %v; output=<<%s>>", err, cmd.Stderr) + } + + // Typical output (may vary by go release): + // + // Type: wall + // Time: Apr 4, 2019 at 11:10am (EDT) + // Duration: 251.62ms, Total samples = 250ms (99.36%) + // Showing nodes accounting for 250ms, 100% of 250ms total + // flat flat% sum% cum cum% + // 320ms 100% 100% 320ms 100% fibonacci + // 0 0% 100% 320ms 100% foo.star + // + // We'll assert a few key substrings are present. + got := fmt.Sprint(cmd.Stdout) + for _, want := range []string{ + "flat%", + "fibonacci", + "foo.star", + } { + if !strings.Contains(got, want) { + t.Errorf("output did not contain %q", want) + } + } + if t.Failed() { + t.Logf("stderr=%v", cmd.Stderr) + t.Logf("stdout=%v", cmd.Stdout) + } +} diff --git a/starlark/testdata/assign.star b/starlark/testdata/assign.star new file mode 100644 index 0000000..7f579f0 --- /dev/null +++ b/starlark/testdata/assign.star @@ -0,0 +1,354 @@ +# Tests of Starlark assignment. + +# This is a "chunked" file: each "---" effectively starts a new file. + +# tuple assignment +load("assert.star", "assert") + +() = () # empty ok + +a, b, c = 1, 2, 3 +assert.eq(a, 1) +assert.eq(b, 2) +assert.eq(c, 3) + +(d, e, f,) = (1, 2, 3) # trailing comma ok +--- +(a, b, c) = 1 ### "got int in sequence assignment" +--- +(a, b) = () ### "too few values to unpack" +--- +(a, b) = (1,) ### "too few values to unpack" +--- +(a, b, c) = (1, 2) ### "too few values to unpack" +--- +(a, b) = (1, 2, 3) ### "too many values to unpack" +--- +() = 1 ### "got int in sequence assignment" +--- +() = (1,) ### "too many values to unpack" +--- +() = (1, 2) ### "too many values to unpack" +--- +# list assignment +load("assert.star", "assert") + +[] = [] # empty ok + +[a, b, c] = [1, 2, 3] +assert.eq(a, 1) +assert.eq(b, 2) +assert.eq(c, 3) + +[d, e, f,] = [1, 2, 3] # trailing comma ok +--- +[a, b, c] = 1 ### "got int in sequence assignment" +--- +[a, b] = [] ### "too few values to unpack" +--- +[a, b] = [1] ### "too few values to unpack" +--- +[a, b, c] = [1, 2] ### "too few values to unpack" +--- +[a, b] = [1, 2, 3] ### "too many values to unpack" +--- +[] = 1 ### "got int in sequence assignment" +--- +[] = [1] ### "too many values to unpack" +--- +[] = [1, 2] ### "too many values to unpack" +--- +# list-tuple assignment +load("assert.star", "assert") + +# empty ok +[] = () +() = [] + +[a, b, c] = (1, 2, 3) +assert.eq(a, 1) +assert.eq(b, 2) +assert.eq(c, 3) + +[a2, b2, c2] = 1, 2, 3 # bare tuple ok + +(d, e, f) = [1, 2, 3] +assert.eq(d, 1) +assert.eq(e, 2) +assert.eq(f, 3) + +[g, h, (i, j)] = (1, 2, [3, 4]) +assert.eq(g, 1) +assert.eq(h, 2) +assert.eq(i, 3) +assert.eq(j, 4) + +(k, l, [m, n]) = [1, 2, (3, 4)] +assert.eq(k, 1) +assert.eq(l, 2) +assert.eq(m, 3) +assert.eq(n, 4) + +--- +# misc assignment +load("assert.star", "assert") + +def assignment(): + a = [1, 2, 3] + a[1] = 5 + assert.eq(a, [1, 5, 3]) + a[-2] = 2 + assert.eq(a, [1, 2, 3]) + assert.eq("%d %d" % (5, 7), "5 7") + x={} + x[1] = 2 + x[1] += 3 + assert.eq(x[1], 5) + def f12(): x[(1, "abc", {})] = 1 + assert.fails(f12, "unhashable type: dict") + +assignment() + +--- +# augmented assignment + +load("assert.star", "assert") + +def f(): + x = 1 + x += 1 + assert.eq(x, 2) + x *= 3 + assert.eq(x, 6) +f() + +--- +# effects of evaluating LHS occur only once + +load("assert.star", "assert") + +count = [0] # count[0] is the number of calls to f + +def f(): + count[0] += 1 + return count[0] + +x = [1, 2, 3] +x[f()] += 1 + +assert.eq(x, [1, 3, 3]) # sole call to f returned 1 +assert.eq(count[0], 1) # f was called only once + +--- +# Order of evaluation. + +load("assert.star", "assert") + +calls = [] + +def f(name, result): + calls.append(name) + return result + +# The right side is evaluated before the left in an ordinary assignment. +calls.clear() +f("array", [0])[f("index", 0)] = f("rhs", 0) +assert.eq(calls, ["rhs", "array", "index"]) + +calls.clear() +f("lhs1", [0])[0], f("lhs2", [0])[0] = f("rhs1", 0), f("rhs2", 0) +assert.eq(calls, ["rhs1", "rhs2", "lhs1", "lhs2"]) + +# Left side is evaluated first (and only once) in an augmented assignment. +calls.clear() +f("array", [0])[f("index", 0)] += f("addend", 1) +assert.eq(calls, ["array", "index", "addend"]) + +--- +# global referenced before assignment + +def f(): + return g ### "global variable g referenced before assignment" + +f() + +g = 1 + +--- +# Free variables are captured by reference, so this is ok. +load("assert.star", "assert") + +def f(): + def g(): + return outer + outer = 1 + return g() + +assert.eq(f(), 1) + +--- +load("assert.star", "assert") + +printok = [False] + +# This program should resolve successfully but fail dynamically. +# However, the Java implementation currently reports the dynamic +# error at the x=1 statement (b/33975425). I think we need to simplify +# the resolver algorithm to what we have implemented. +def use_before_def(): + print(x) # dynamic error: local var referenced before assignment + printok[0] = True + x = 1 # makes 'x' local + +assert.fails(use_before_def, 'local variable x referenced before assignment') +assert.true(not printok[0]) # execution of print statement failed + +--- +x = [1] +x.extend([2]) # ok + +def f(): + x += [4] ### "local variable x referenced before assignment" + +f() + +--- + +z += 3 ### "global variable z referenced before assignment" + +--- +load("assert.star", "assert") + +# It's ok to define a global that shadows a built-in... +list = [] +assert.eq(type(list), "list") + +# ...but then all uses refer to the global, +# even if they occur before the binding use. +# See github.com/google/skylark/issues/116. +assert.fails(lambda: tuple, "global variable tuple referenced before assignment") +tuple = () + +--- +# option:set +# Same as above, but set is dialect-specific; +# we shouldn't notice any difference. +load("assert.star", "assert") + +set = [1, 2, 3] +assert.eq(type(set), "list") + +# As in Python 2 and Python 3, +# all 'in x' expressions in a comprehension are evaluated +# in the comprehension's lexical block, except the first, +# which is resolved in the outer block. +x = [[1, 2]] +assert.eq([x for x in x for y in x], + [[1, 2], [1, 2]]) + +--- +# A comprehension establishes a single new lexical block, +# not one per 'for' clause. +x = [1, 2] +_ = [x for _ in [3] for x in x] ### "local variable x referenced before assignment" + +--- +load("assert.star", "assert") + +# assign singleton sequence to 1-tuple +(x,) = (1,) +assert.eq(x, 1) +(y,) = [1] +assert.eq(y, 1) + +# assign 1-tuple to variable +z = (1,) +assert.eq(type(z), "tuple") +assert.eq(len(z), 1) +assert.eq(z[0], 1) + +# assign value to parenthesized variable +(a) = 1 +assert.eq(a, 1) + +--- +# assignment to/from fields. +load("assert.star", "assert", "freeze") + +hf = hasfields() +hf.x = 1 +assert.eq(hf.x, 1) +hf.x = [1, 2] +hf.x += [3, 4] +assert.eq(hf.x, [1, 2, 3, 4]) +freeze(hf) +def setX(hf): + hf.x = 2 +def setY(hf): + hf.y = 3 +assert.fails(lambda: setX(hf), "cannot set field on a frozen hasfields") +assert.fails(lambda: setY(hf), "cannot set field on a frozen hasfields") + +--- +# destucturing assignment in a for loop. +load("assert.star", "assert") + +def f(): + res = [] + for (x, y), z in [(["a", "b"], 3), (["c", "d"], 4)]: + res.append((x, y, z)) + return res +assert.eq(f(), [("a", "b", 3), ("c", "d", 4)]) + +def g(): + a = {} + for i, a[i] in [("one", 1), ("two", 2)]: + pass + return a +assert.eq(g(), {"one": 1, "two": 2}) + +--- +# parenthesized LHS in augmented assignment (success) +# option:globalreassign +load("assert.star", "assert") + +a = 5 +(a) += 3 +assert.eq(a, 8) + +--- +# parenthesized LHS in augmented assignment (error) + +(a) += 5 ### "global variable a referenced before assignment" + +--- +# option:globalreassign +load("assert.star", "assert") +assert = 1 +load("assert.star", "assert") + +--- +# option:globalreassign option:loadbindsglobally +load("assert.star", "assert") +assert = 1 +load("assert.star", "assert") + +--- +# option:loadbindsglobally +_ = assert ### "global variable assert referenced before assignment" +load("assert.star", "assert") + +--- +_ = assert ### "local variable assert referenced before assignment" +load("assert.star", "assert") + +--- +def f(): assert.eq(1, 1) # forward ref OK +load("assert.star", "assert") +f() + +--- +# option:loadbindsglobally +def f(): assert.eq(1, 1) # forward ref OK +load("assert.star", "assert") +f() diff --git a/starlark/testdata/benchmark.star b/starlark/testdata/benchmark.star new file mode 100644 index 0000000..b02868d --- /dev/null +++ b/starlark/testdata/benchmark.star @@ -0,0 +1,62 @@ +# Benchmarks of Starlark execution + +def bench_range_construction(b): + for _ in range(b.n): + range(200) + +def bench_range_iteration(b): + for _ in range(b.n): + for x in range(200): + pass + +# Make a 2-level call tree of 100 * 100 calls. +def bench_calling(b): + list = range(100) + + def g(): + for x in list: + pass + + def f(): + for x in list: + g() + + for _ in range(b.n): + f() + +# Measure overhead of calling a trivial built-in method. +emptydict = {} +range1000 = range(1000) + +def bench_builtin_method(b): + for _ in range(b.n): + for _ in range1000: + emptydict.get(None) + +def bench_int(b): + for _ in range(b.n): + a = 0 + for _ in range1000: + a += 1 + +def bench_bigint(b): + for _ in range(b.n): + a = 1 << 31 # maxint32 + 1 + for _ in range1000: + a += 1 + +def bench_gauss(b): + # Sum of arithmetic series. All results fit in int32. + for _ in range(b.n): + acc = 0 + for x in range(92000): + acc += x + +def bench_mix(b): + "Benchmark of a simple mix of computation (for, if, arithmetic, comprehension)." + for _ in range(b.n): + x = 0 + for i in range(50): + if i: + x += 1 + a = [x for x in range(i)] diff --git a/starlark/testdata/bool.star b/starlark/testdata/bool.star new file mode 100644 index 0000000..6c084a3 --- /dev/null +++ b/starlark/testdata/bool.star @@ -0,0 +1,62 @@ +# Tests of Starlark 'bool' + +load("assert.star", "assert") + +# truth +assert.true(True) +assert.true(not False) +assert.true(not not True) +assert.true(not not 1 >= 1) + +# precedence of not +assert.true(not not 2 > 1) +# assert.true(not (not 2) > 1) # TODO(adonovan): fix: gives error for False > 1. +# assert.true(not ((not 2) > 1)) # TODO(adonovan): fix +# assert.true(not ((not (not 2)) > 1)) # TODO(adonovan): fix +# assert.true(not not not (2 > 1)) + +# bool conversion +assert.eq( + [bool(), bool(1), bool(0), bool("hello"), bool("")], + [False, True, False, True, False], +) + +# comparison +assert.true(None == None) +assert.true(None != False) +assert.true(None != True) +assert.eq(1 == 1, True) +assert.eq(1 == 2, False) +assert.true(False == False) +assert.true(True == True) + +# ordered comparison +assert.true(False < True) +assert.true(False <= True) +assert.true(False <= False) +assert.true(True > False) +assert.true(True >= False) +assert.true(True >= True) + +# conditional expression +assert.eq(1 if 3 > 2 else 0, 1) +assert.eq(1 if "foo" else 0, 1) +assert.eq(1 if "" else 0, 0) + +# short-circuit evaluation of 'and' and 'or': +# 'or' yields the first true operand, or the last if all are false. +assert.eq(0 or "" or [] or 0, 0) +assert.eq(0 or "" or [] or 123 or 1 // 0, 123) +assert.fails(lambda : 0 or "" or [] or 0 or 1 // 0, "division by zero") + +# 'and' yields the first false operand, or the last if all are true. +assert.eq(1 and "a" and [1] and 123, 123) +assert.eq(1 and "a" and [1] and 0 and 1 // 0, 0) +assert.fails(lambda : 1 and "a" and [1] and 123 and 1 // 0, "division by zero") + +# Built-ins that want a bool want an actual bool, not a truth value. +# See github.com/bazelbuild/starlark/issues/30 +assert.eq(''.splitlines(True), []) +assert.fails(lambda: ''.splitlines(1), 'got int, want bool') +assert.fails(lambda: ''.splitlines("hello"), 'got string, want bool') +assert.fails(lambda: ''.splitlines(0.0), 'got float, want bool') diff --git a/starlark/testdata/builtins.star b/starlark/testdata/builtins.star new file mode 100644 index 0000000..c6591b8 --- /dev/null +++ b/starlark/testdata/builtins.star @@ -0,0 +1,225 @@ +# Tests of Starlark built-in functions +# option:set + +load("assert.star", "assert") + +# len +assert.eq(len([1, 2, 3]), 3) +assert.eq(len((1, 2, 3)), 3) +assert.eq(len({1: 2}), 1) +assert.fails(lambda: len(1), "int.*has no len") + +# and, or +assert.eq(123 or "foo", 123) +assert.eq(0 or "foo", "foo") +assert.eq(123 and "foo", "foo") +assert.eq(0 and "foo", 0) +none = None +_1 = none and none[0] # rhs is not evaluated +_2 = (not none) or none[0] # rhs is not evaluated + +# any, all +assert.true(all([])) +assert.true(all([1, True, "foo"])) +assert.true(not all([1, True, ""])) +assert.true(not any([])) +assert.true(any([0, False, "foo"])) +assert.true(not any([0, False, ""])) + +# in +assert.true(3 in [1, 2, 3]) +assert.true(4 not in [1, 2, 3]) +assert.true(3 in (1, 2, 3)) +assert.true(4 not in (1, 2, 3)) +assert.fails(lambda: 3 in "foo", "in.*requires string as left operand") +assert.true(123 in {123: ""}) +assert.true(456 not in {123:""}) +assert.true([] not in {123: ""}) + +# sorted +assert.eq(sorted([42, 123, 3]), [3, 42, 123]) +assert.eq(sorted([42, 123, 3], reverse=True), [123, 42, 3]) +assert.eq(sorted(["wiz", "foo", "bar"]), ["bar", "foo", "wiz"]) +assert.eq(sorted(["wiz", "foo", "bar"], reverse=True), ["wiz", "foo", "bar"]) +assert.fails(lambda: sorted([1, 2, None, 3]), "int < NoneType not implemented") +assert.fails(lambda: sorted([1, "one"]), "string < int not implemented") +# custom key function +assert.eq(sorted(["two", "three", "four"], key=len), + ["two", "four", "three"]) +assert.eq(sorted(["two", "three", "four"], key=len, reverse=True), + ["three", "four", "two"]) +assert.fails(lambda: sorted([1, 2, 3], key=None), "got NoneType, want callable") +# sort is stable +pairs = [(4, 0), (3, 1), (4, 2), (2, 3), (3, 4), (1, 5), (2, 6), (3, 7)] +assert.eq(sorted(pairs, key=lambda x: x[0]), + [(1, 5), + (2, 3), (2, 6), + (3, 1), (3, 4), (3, 7), + (4, 0), (4, 2)]) +assert.fails(lambda: sorted(1), 'sorted: for parameter iterable: got int, want iterable') + +# reversed +assert.eq(reversed([1, 144, 81, 16]), [16, 81, 144, 1]) + +# set +assert.contains(set([1, 2, 3]), 1) +assert.true(4 not in set([1, 2, 3])) +assert.eq(len(set([1, 2, 3])), 3) +assert.eq(sorted([x for x in set([1, 2, 3])]), [1, 2, 3]) + +# dict +assert.eq(dict([(1, 2), (3, 4)]), {1: 2, 3: 4}) +assert.eq(dict([(1, 2), (3, 4)], foo="bar"), {1: 2, 3: 4, "foo": "bar"}) +assert.eq(dict({1:2, 3:4}), {1: 2, 3: 4}) +assert.eq(dict({1:2, 3:4}.items()), {1: 2, 3: 4}) + +# range +assert.eq("range", type(range(10))) +assert.eq("range(10)", str(range(0, 10, 1))) +assert.eq("range(1, 10)", str(range(1, 10))) +assert.eq(range(0, 5, 10), range(0, 5, 11)) +assert.eq("range(0, 10, -1)", str(range(0, 10, -1))) +assert.fails(lambda: {range(10): 10}, "unhashable: range") +assert.true(bool(range(1, 2))) +assert.true(not(range(2, 1))) # an empty range is false +assert.eq([x*x for x in range(5)], [0, 1, 4, 9, 16]) +assert.eq(list(range(5)), [0, 1, 2, 3, 4]) +assert.eq(list(range(-5)), []) +assert.eq(list(range(2, 5)), [2, 3, 4]) +assert.eq(list(range(5, 2)), []) +assert.eq(list(range(-2, -5)), []) +assert.eq(list(range(-5, -2)), [-5, -4, -3]) +assert.eq(list(range(2, 10, 3)), [2, 5, 8]) +assert.eq(list(range(10, 2, -3)), [10, 7, 4]) +assert.eq(list(range(-2, -10, -3)), [-2, -5, -8]) +assert.eq(list(range(-10, -2, 3)), [-10, -7, -4]) +assert.eq(list(range(10, 2, -1)), [10, 9, 8, 7, 6, 5, 4, 3]) +assert.eq(list(range(5)[1:]), [1, 2, 3, 4]) +assert.eq(len(range(5)[1:]), 4) +assert.eq(list(range(5)[:2]), [0, 1]) +assert.eq(list(range(10)[1:]), [1, 2, 3, 4, 5, 6, 7, 8, 9]) +assert.eq(list(range(10)[1:9:2]), [1, 3, 5, 7]) +assert.eq(list(range(10)[1:10:2]), [1, 3, 5, 7, 9]) +assert.eq(list(range(10)[1:11:2]), [1, 3, 5, 7, 9]) +assert.eq(list(range(10)[::-2]), [9, 7, 5, 3, 1]) +assert.eq(list(range(0, 10, 2)[::2]), [0, 4, 8]) +assert.eq(list(range(0, 10, 2)[::-2]), [8, 4, 0]) +# range() is limited by the width of the Go int type (int32 or int64). +assert.fails(lambda: range(1<<64), "... out of range .want value in signed ..-bit range") +assert.eq(len(range(0x7fffffff)), 0x7fffffff) # O(1) +# Two ranges compare equal if they denote the same sequence: +assert.eq(range(0), range(2, 1, 3)) # [] +assert.eq(range(0, 3, 2), range(0, 4, 2)) # [0, 2] +assert.ne(range(1, 10), range(2, 10)) +assert.fails(lambda: range(0) < range(0), "range < range not implemented") +# <number> in <range> +assert.contains(range(3), 1) +assert.contains(range(3), 2.0) # acts like 2 +assert.fails(lambda: True in range(3), "requires integer.*not bool") # bools aren't numbers +assert.fails(lambda: "one" in range(10), "requires integer.*not string") +assert.true(4 not in range(4)) +assert.true(1e15 not in range(4)) # too big for int32 +assert.true(1e100 not in range(4)) # too big for int64 +# https://github.com/google/starlark-go/issues/116 +assert.fails(lambda: range(0, 0, 2)[:][0], "index 0 out of range: empty range") + +# list +assert.eq(list("abc".elems()), ["a", "b", "c"]) +assert.eq(sorted(list({"a": 1, "b": 2})), ['a', 'b']) + +# min, max +assert.eq(min(5, -2, 1, 7, 3), -2) +assert.eq(max(5, -2, 1, 7, 3), 7) +assert.eq(min([5, -2, 1, 7, 3]), -2) +assert.eq(min("one", "two", "three", "four"), "four") +assert.eq(max("one", "two", "three", "four"), "two") +assert.fails(min, "min requires at least one positional argument") +assert.fails(lambda: min(1), "not iterable") +assert.fails(lambda: min([]), "empty") +assert.eq(min(5, -2, 1, 7, 3, key=lambda x: x*x), 1) # min absolute value +assert.eq(min(5, -2, 1, 7, 3, key=lambda x: -x), 7) # min negated value + +# enumerate +assert.eq(enumerate("abc".elems()), [(0, "a"), (1, "b"), (2, "c")]) +assert.eq(enumerate([False, True, None], 42), [(42, False), (43, True), (44, None)]) + +# zip +assert.eq(zip(), []) +assert.eq(zip([]), []) +assert.eq(zip([1, 2, 3]), [(1,), (2,), (3,)]) +assert.eq(zip("".elems()), []) +assert.eq(zip("abc".elems(), + list("def".elems()), + "hijk".elems()), + [("a", "d", "h"), ("b", "e", "i"), ("c", "f", "j")]) +z1 = [1] +assert.eq(zip(z1), [(1,)]) +z1.append(2) +assert.eq(zip(z1), [(1,), (2,)]) +assert.fails(lambda: zip(z1, 1), "zip: argument #2 is not iterable: int") +z1.append(3) + +# dir for builtin_function_or_method +assert.eq(dir(None), []) +assert.eq(dir({})[:3], ["clear", "get", "items"]) # etc +assert.eq(dir(1), []) +assert.eq(dir([])[:3], ["append", "clear", "extend"]) # etc + +# hasattr, getattr, dir +# hasfields is an application-defined type defined in eval_test.go. +hf = hasfields() +assert.eq(dir(hf), []) +assert.true(not hasattr(hf, "x")) +assert.fails(lambda: getattr(hf, "x"), "no .x field or method") +assert.eq(getattr(hf, "x", 42), 42) +hf.x = 1 +assert.true(hasattr(hf, "x")) +assert.eq(getattr(hf, "x"), 1) +assert.eq(hf.x, 1) +hf.x = 2 +assert.eq(getattr(hf, "x"), 2) +assert.eq(hf.x, 2) +# built-in types can have attributes (methods) too. +myset = set([]) +assert.eq(dir(myset), ["union"]) +assert.true(hasattr(myset, "union")) +assert.true(not hasattr(myset, "onion")) +assert.eq(str(getattr(myset, "union")), "<built-in method union of set value>") +assert.fails(lambda: getattr(myset, "onion"), "no .onion field or method") +assert.eq(getattr(myset, "onion", 42), 42) + +# dir returns a new, sorted, mutable list +assert.eq(sorted(dir("")), dir("")) # sorted +dir("").append("!") # mutable +assert.true("!" not in dir("")) # new + +# error messages should suggest spelling corrections +hf.one = 1 +hf.two = 2 +hf.three = 3 +hf.forty_five = 45 +assert.fails(lambda: hf.One, 'no .One field.*did you mean .one') +assert.fails(lambda: hf.oone, 'no .oone field.*did you mean .one') +assert.fails(lambda: hf.FortyFive, 'no .FortyFive field.*did you mean .forty_five') +assert.fails(lambda: hf.trhee, 'no .trhee field.*did you mean .three') +assert.fails(lambda: hf.thirty, 'no .thirty field or method$') # no suggestion + +# spell check in setfield too +def setfield(): hf.noForty_Five = 46 # "no" prefix => SetField returns NoSuchField +assert.fails(setfield, 'no .noForty_Five field.*did you mean .forty_five') + +# repr +assert.eq(repr(1), "1") +assert.eq(repr("x"), '"x"') +assert.eq(repr(["x", 1]), '["x", 1]') + +# fail +--- +fail() ### `fail: $` +x = 1//0 # unreachable +--- +fail(1) ### `fail: 1` +--- +fail(1, 2, 3) ### `fail: 1 2 3` +--- +fail(1, 2, 3, sep="/") ### `fail: 1/2/3` diff --git a/starlark/testdata/bytes.star b/starlark/testdata/bytes.star new file mode 100644 index 0000000..d500403 --- /dev/null +++ b/starlark/testdata/bytes.star @@ -0,0 +1,159 @@ +# Tests of 'bytes' (immutable byte strings). + +load("assert.star", "assert") + +# bytes(string) -- UTF-k to UTF-8 transcoding with U+FFFD replacement +hello = bytes("hello, 世界") +goodbye = bytes("goodbye") +empty = bytes("") +nonprinting = bytes("\t\n\x7F\u200D") # TAB, NEWLINE, DEL, ZERO_WIDTH_JOINER +assert.eq(bytes("hello, 世界"[:-1]), b"hello, 世��") + +# bytes(iterable of int) -- construct from numeric byte values +assert.eq(bytes([65, 66, 67]), b"ABC") +assert.eq(bytes((65, 66, 67)), b"ABC") +assert.eq(bytes([0xf0, 0x9f, 0x98, 0xbf]), b"😿") +assert.fails(lambda: bytes([300]), + "at index 0, 300 out of range .want value in unsigned 8-bit range") +assert.fails(lambda: bytes([b"a"]), + "at index 0, got bytes, want int") +assert.fails(lambda: bytes(1), "want string, bytes, or iterable of ints") + +# literals +assert.eq(b"hello, 世界", hello) +assert.eq(b"goodbye", goodbye) +assert.eq(b"", empty) +assert.eq(b"\t\n\x7F\u200D", nonprinting) +assert.ne("abc", b"abc") +assert.eq(b"\012\xff\u0400\U0001F63F", b"\n\xffЀ😿") # see scanner tests for more +assert.eq(rb"\r\n\t", b"\\r\\n\\t") # raw + +# type +assert.eq(type(hello), "bytes") + +# len +assert.eq(len(hello), 13) +assert.eq(len(goodbye), 7) +assert.eq(len(empty), 0) +assert.eq(len(b"A"), 1) +assert.eq(len(b"Ѐ"), 2) +assert.eq(len(b"世"), 3) +assert.eq(len(b"😿"), 4) + +# truth +assert.true(hello) +assert.true(goodbye) +assert.true(not empty) + +# str(bytes) does UTF-8 to UTF-k transcoding. +# TODO(adonovan): specify. +assert.eq(str(hello), "hello, 世界") +assert.eq(str(hello[:-1]), "hello, 世��") # incomplete UTF-8 encoding => U+FFFD +assert.eq(str(goodbye), "goodbye") +assert.eq(str(empty), "") +assert.eq(str(nonprinting), "\t\n\x7f\u200d") +assert.eq(str(b"\xED\xB0\x80"), "���") # UTF-8 encoding of unpaired surrogate => U+FFFD x 3 + +# repr +assert.eq(repr(hello), r'b"hello, 世界"') +assert.eq(repr(hello[:-1]), r'b"hello, 世\xe7\x95"') # (incomplete UTF-8 encoding ) +assert.eq(repr(goodbye), 'b"goodbye"') +assert.eq(repr(empty), 'b""') +assert.eq(repr(nonprinting), 'b"\\t\\n\\x7f\\u200d"') + +# equality +assert.eq(hello, hello) +assert.ne(hello, goodbye) +assert.eq(b"goodbye", goodbye) + +# ordered comparison +assert.lt(b"abc", b"abd") +assert.lt(b"abc", b"abcd") +assert.lt(b"\x7f", b"\x80") # bytes compare as uint8, not int8 + +# bytes are dict-hashable +dict = {hello: 1, goodbye: 2} +dict[b"goodbye"] = 3 +assert.eq(len(dict), 2) +assert.eq(dict[goodbye], 3) + +# hash(bytes) is 32-bit FNV-1a. +assert.eq(hash(b""), 0x811c9dc5) +assert.eq(hash(b"a"), 0xe40c292c) +assert.eq(hash(b"ab"), 0x4d2505ca) +assert.eq(hash(b"abc"), 0x1a47e90b) + +# indexing +assert.eq(goodbye[0], b"g") +assert.eq(goodbye[-1], b"e") +assert.fails(lambda: goodbye[100], "out of range") + +# slicing +assert.eq(goodbye[:4], b"good") +assert.eq(goodbye[4:], b"bye") +assert.eq(goodbye[::2], b"gobe") +assert.eq(goodbye[3:4], b"d") # special case: len=1 +assert.eq(goodbye[4:4], b"") # special case: len=0 + +# bytes in bytes +assert.eq(b"bc" in b"abcd", True) +assert.eq(b"bc" in b"dcab", False) +assert.fails(lambda: "bc" in b"dcab", "requires bytes or int as left operand, not string") + +# int in bytes +assert.eq(97 in b"abc", True) # 97='a' +assert.eq(100 in b"abc", False) # 100='d' +assert.fails(lambda: 256 in b"abc", "int in bytes: 256 out of range") +assert.fails(lambda: -1 in b"abc", "int in bytes: -1 out of range") + +# ord TODO(adonovan): specify +assert.eq(ord(b"a"), 97) +assert.fails(lambda: ord(b"ab"), "ord: bytes has length 2, want 1") +assert.fails(lambda: ord(b""), "ord: bytes has length 0, want 1") + +# repeat (bytes * int) +assert.eq(goodbye * 3, b"goodbyegoodbyegoodbye") +assert.eq(3 * goodbye, b"goodbyegoodbyegoodbye") + +# elems() returns an iterable value over 1-byte substrings. +assert.eq(type(hello.elems()), "bytes.elems") +assert.eq(str(hello.elems()), "b\"hello, 世界\".elems()") +assert.eq(list(hello.elems()), [104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]) +assert.eq(bytes([104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]), hello) +assert.eq(list(goodbye.elems()), [103, 111, 111, 100, 98, 121, 101]) +assert.eq(list(empty.elems()), []) +assert.eq(bytes(hello.elems()), hello) # bytes(iterable) is dual to bytes.elems() + +# x[i] = ... +def f(): + b"abc"[1] = b"B" + +assert.fails(f, "bytes.*does not support.*assignment") + +# TODO(adonovan): the specification is not finalized in many areas: +# - chr, ord functions +# - encoding/decoding bytes to string. +# - methods: find, index, split, etc. +# +# Summary of string operations (put this in spec). +# +# string to number: +# - bytes[i] returns numeric value of ith byte. +# - ord(string) returns numeric value of sole code point in string. +# - ord(string[i]) is not a useful operation: fails on non-ASCII; see below. +# Q. Perhaps ord should return the first (not sole) code point? Then it becomes a UTF-8 decoder. +# Perhaps ord(string, index=int) should apply the index and relax the len=1 check. +# - string.codepoint() iterates over 1-codepoint substrings. +# - string.codepoint_ords() iterates over numeric values of code points in string. +# - string.elems() iterates over 1-element (UTF-k code) substrings. +# - string.elem_ords() iterates over numeric UTF-k code values. +# - string.elem_ords()[i] returns numeric value of ith element (UTF-k code). +# - string.elems()[i] returns substring of a single element (UTF-k code). +# - int(string) parses string as decimal (or other) numeric literal. +# +# number to string: +# - chr(int) returns string, UTF-k encoding of Unicode code point (like Python). +# Redundant with '%c' % int (which Python2 calls 'unichr'.) +# - bytes(chr(int)) returns byte string containing UTF-8 encoding of one code point. +# - bytes([int]) returns 1-byte string (with regrettable list allocation). +# - str(int) - format number as decimal. diff --git a/starlark/testdata/control.star b/starlark/testdata/control.star new file mode 100644 index 0000000..554ab25 --- /dev/null +++ b/starlark/testdata/control.star @@ -0,0 +1,64 @@ +# Tests of Starlark control flow + +load("assert.star", "assert") + +def controlflow(): + # elif + x = 0 + if True: + x=1 + elif False: + assert.fail("else of true") + else: + assert.fail("else of else of true") + assert.true(x) + + x = 0 + if False: + assert.fail("then of false") + elif True: + x = 1 + else: + assert.fail("else of true") + assert.true(x) + + x = 0 + if False: + assert.fail("then of false") + elif False: + assert.fail("then of false") + else: + x = 1 + assert.true(x) +controlflow() + +def loops(): + y = "" + for x in [1, 2, 3, 4, 5]: + if x == 2: + continue + if x == 4: + break + y = y + str(x) + return y +assert.eq(loops(), "13") + +# return +g = 123 +def f(x): + for g in (1, 2, 3): + if g == x: + return g +assert.eq(f(2), 2) +assert.eq(f(4), None) # falling off end => return None +assert.eq(g, 123) # unchanged by local use of g in function + +# infinite sequences +def fib(n): + seq = [] + for x in fibonacci: # fibonacci is an infinite iterable defined in eval_test.go + if len(seq) == n: + break + seq.append(x) + return seq +assert.eq(fib(10), [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]) diff --git a/starlark/testdata/dict.star b/starlark/testdata/dict.star new file mode 100644 index 0000000..1aeb1e7 --- /dev/null +++ b/starlark/testdata/dict.star @@ -0,0 +1,248 @@ +# Tests of Starlark 'dict' + +load("assert.star", "assert", "freeze") + +# literals +assert.eq({}, {}) +assert.eq({"a": 1}, {"a": 1}) +assert.eq({"a": 1,}, {"a": 1}) + +# truth +assert.true({False: False}) +assert.true(not {}) + +# dict + dict is no longer supported. +assert.fails(lambda: {"a": 1} + {"b": 2}, 'unknown binary op: dict \\+ dict') + +# dict comprehension +assert.eq({x: x*x for x in range(3)}, {0: 0, 1: 1, 2: 4}) + +# dict.pop +x6 = {"a": 1, "b": 2} +assert.eq(x6.pop("a"), 1) +assert.eq(str(x6), '{"b": 2}') +assert.fails(lambda: x6.pop("c"), "pop: missing key") +assert.eq(x6.pop("c", 3), 3) +assert.eq(x6.pop("c", None), None) # default=None tests an edge case of UnpackArgs +assert.eq(x6.pop("b"), 2) +assert.eq(len(x6), 0) + +# dict.popitem +x7 = {"a": 1, "b": 2} +assert.eq([x7.popitem(), x7.popitem()], [("a", 1), ("b", 2)]) +assert.fails(x7.popitem, "empty dict") +assert.eq(len(x7), 0) + +# dict.keys, dict.values +x8 = {"a": 1, "b": 2} +assert.eq(x8.keys(), ["a", "b"]) +assert.eq(x8.values(), [1, 2]) + +# equality +assert.eq({"a": 1, "b": 2}, {"a": 1, "b": 2}) +assert.eq({"a": 1, "b": 2,}, {"a": 1, "b": 2}) +assert.eq({"a": 1, "b": 2}, {"b": 2, "a": 1}) + +# insertion order is preserved +assert.eq(dict([("a", 0), ("b", 1), ("c", 2), ("b", 3)]).keys(), ["a", "b", "c"]) +assert.eq(dict([("b", 0), ("a", 1), ("b", 2), ("c", 3)]).keys(), ["b", "a", "c"]) +assert.eq(dict([("b", 0), ("a", 1), ("b", 2), ("c", 3)])["b"], 2) +# ...even after rehashing (which currently occurs after key 'i'): +small = dict([("a", 0), ("b", 1), ("c", 2)]) +small.update([("d", 4), ("e", 5), ("f", 6), ("g", 7), ("h", 8), ("i", 9), ("j", 10), ("k", 11)]) +assert.eq(small.keys(), ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]) + +# Duplicate keys are not permitted in dictionary expressions (see b/35698444). +# (Nor in keyword args to function calls---checked by resolver.) +assert.fails(lambda: {"aa": 1, "bb": 2, "cc": 3, "bb": 4}, 'duplicate key: "bb"') + +# Check that even with many positional args, keyword collisions are detected. +assert.fails(lambda: dict({'b': 3}, a=4, **dict(a=5)), 'dict: duplicate keyword arg: "a"') +assert.fails(lambda: dict({'a': 2, 'b': 3}, a=4, **dict(a=5)), 'dict: duplicate keyword arg: "a"') +# positional/keyword arg key collisions are ok +assert.eq(dict((['a', 2], ), a=4), {'a': 4}) +assert.eq(dict((['a', 2], ['a', 3]), a=4), {'a': 4}) + +# index +def setIndex(d, k, v): + d[k] = v + +x9 = {} +assert.fails(lambda: x9["a"], 'key "a" not in dict') +x9["a"] = 1 +assert.eq(x9["a"], 1) +assert.eq(x9, {"a": 1}) +assert.fails(lambda: setIndex(x9, [], 2), 'unhashable type: list') +freeze(x9) +assert.fails(lambda: setIndex(x9, "a", 3), 'cannot insert into frozen hash table') + +x9a = {} +x9a[1, 2] = 3 # unparenthesized tuple is allowed here +assert.eq(x9a.keys()[0], (1, 2)) + +# dict.get +x10 = {"a": 1} +assert.eq(x10.get("a"), 1) +assert.eq(x10.get("b"), None) +assert.eq(x10.get("a", 2), 1) +assert.eq(x10.get("b", 2), 2) + +# dict.clear +x11 = {"a": 1} +assert.contains(x11, "a") +assert.eq(x11["a"], 1) +x11.clear() +assert.fails(lambda: x11["a"], 'key "a" not in dict') +assert.true("a" not in x11) +freeze(x11) +assert.fails(x11.clear, "cannot clear frozen hash table") + +# dict.setdefault +x12 = {"a": 1} +assert.eq(x12.setdefault("a"), 1) +assert.eq(x12["a"], 1) +assert.eq(x12.setdefault("b"), None) +assert.eq(x12["b"], None) +assert.eq(x12.setdefault("c", 2), 2) +assert.eq(x12["c"], 2) +assert.eq(x12.setdefault("c", 3), 2) +assert.eq(x12["c"], 2) +freeze(x12) +assert.eq(x12.setdefault("a", 1), 1) # no change, no error +assert.fails(lambda: x12.setdefault("d", 1), "cannot insert into frozen hash table") + +# dict.update +x13 = {"a": 1} +x13.update(a=2, b=3) +assert.eq(x13, {"a": 2, "b": 3}) +x13.update([("b", 4), ("c", 5)]) +assert.eq(x13, {"a": 2, "b": 4, "c": 5}) +x13.update({"c": 6, "d": 7}) +assert.eq(x13, {"a": 2, "b": 4, "c": 6, "d": 7}) +freeze(x13) +assert.fails(lambda: x13.update({"a": 8}), "cannot insert into frozen hash table") + +# dict as a sequence +# +# for loop +x14 = {1:2, 3:4} +def keys(dict): + keys = [] + for k in dict: keys.append(k) + return keys +assert.eq(keys(x14), [1, 3]) +# +# comprehension +assert.eq([x for x in x14], [1, 3]) +# +# varargs +def varargs(*args): return args +x15 = {"one": 1} +assert.eq(varargs(*x15), ("one",)) + +# kwargs parameter does not alias the **kwargs dict +def kwargs(**kwargs): return kwargs +x16 = kwargs(**x15) +assert.eq(x16, x15) +x15["two"] = 2 # mutate +assert.ne(x16, x15) + +# iterator invalidation +def iterator1(): + dict = {1:1, 2:1} + for k in dict: + dict[2*k] = dict[k] +assert.fails(iterator1, "insert.*during iteration") + +def iterator2(): + dict = {1:1, 2:1} + for k in dict: + dict.pop(k) +assert.fails(iterator2, "delete.*during iteration") + +def iterator3(): + def f(d): + d[3] = 3 + dict = {1:1, 2:1} + _ = [f(dict) for x in dict] +assert.fails(iterator3, "insert.*during iteration") + +# This assignment is not a modification-during-iteration: +# the sequence x should be completely iterated before +# the assignment occurs. +def f(): + x = {1:2, 2:4} + a, x[0] = x + assert.eq(a, 1) + assert.eq(x, {1: 2, 2: 4, 0: 2}) +f() + +# Regression test for a bug in hashtable.delete +def test_delete(): + d = {} + + # delete tail first + d["one"] = 1 + d["two"] = 2 + assert.eq(str(d), '{"one": 1, "two": 2}') + d.pop("two") + assert.eq(str(d), '{"one": 1}') + d.pop("one") + assert.eq(str(d), '{}') + + # delete head first + d["one"] = 1 + d["two"] = 2 + assert.eq(str(d), '{"one": 1, "two": 2}') + d.pop("one") + assert.eq(str(d), '{"two": 2}') + d.pop("two") + assert.eq(str(d), '{}') + + # delete middle + d["one"] = 1 + d["two"] = 2 + d["three"] = 3 + assert.eq(str(d), '{"one": 1, "two": 2, "three": 3}') + d.pop("two") + assert.eq(str(d), '{"one": 1, "three": 3}') + d.pop("three") + assert.eq(str(d), '{"one": 1}') + d.pop("one") + assert.eq(str(d), '{}') + +test_delete() + +# Regression test for github.com/google/starlark-go/issues/128. +assert.fails(lambda: dict(None), 'got NoneType, want iterable') +assert.fails(lambda: {}.update(None), 'got NoneType, want iterable') + +--- +# Verify position of an "unhashable key" error in a dict literal. + +_ = { + "one": 1, + ["two"]: 2, ### "unhashable type: list" + "three": 3, +} + +--- +# Verify position of a "duplicate key" error in a dict literal. + +_ = { + "one": 1, + "one": 1, ### `duplicate key: "one"` + "three": 3, +} + +--- +# Verify position of an "unhashable key" error in a dict comprehension. + +_ = { + k: v ### "unhashable type: list" + for k, v in [ + ("one", 1), + (["two"], 2), + ("three", 3), + ] +} diff --git a/starlark/testdata/float.star b/starlark/testdata/float.star new file mode 100644 index 0000000..b4df38d --- /dev/null +++ b/starlark/testdata/float.star @@ -0,0 +1,504 @@ +# Tests of Starlark 'float' +# option:set + +load("assert.star", "assert") + +# TODO(adonovan): more tests: +# - precision +# - limits + +# type +assert.eq(type(0.0), "float") + +# truth +assert.true(123.0) +assert.true(-1.0) +assert.true(not 0.0) +assert.true(-1.0e-45) +assert.true(float("NaN")) + +# not iterable +assert.fails(lambda: len(0.0), 'has no len') +assert.fails(lambda: [x for x in 0.0], 'float value is not iterable') + +# literals +assert.eq(type(1.234), "float") +assert.eq(type(1e10), "float") +assert.eq(type(1e+10), "float") +assert.eq(type(1e-10), "float") +assert.eq(type(1.234e10), "float") +assert.eq(type(1.234e+10), "float") +assert.eq(type(1.234e-10), "float") + +# int/float equality +assert.eq(0.0, 0) +assert.eq(0, 0.0) +assert.eq(1.0, 1) +assert.eq(1, 1.0) +assert.true(1.23e45 != 1229999999999999973814869011019624571608236031) +assert.true(1.23e45 == 1229999999999999973814869011019624571608236032) +assert.true(1.23e45 != 1229999999999999973814869011019624571608236033) +assert.true(1229999999999999973814869011019624571608236031 != 1.23e45) +assert.true(1229999999999999973814869011019624571608236032 == 1.23e45) +assert.true(1229999999999999973814869011019624571608236033 != 1.23e45) + +# loss of precision +p53 = 1<<53 +assert.eq(float(p53-1), p53-1) +assert.eq(float(p53+0), p53+0) +assert.eq(float(p53+1), p53+0) # +assert.eq(float(p53+2), p53+2) +assert.eq(float(p53+3), p53+4) # +assert.eq(float(p53+4), p53+4) +assert.eq(float(p53+5), p53+4) # +assert.eq(float(p53+6), p53+6) +assert.eq(float(p53+7), p53+8) # +assert.eq(float(p53+8), p53+8) + +assert.true(float(p53+1) != p53+1) # comparisons are exact +assert.eq(float(p53+1) - (p53+1), 0) # arithmetic entails rounding + +assert.fails(lambda: {123.0: "f", 123: "i"}, "duplicate key: 123") + +# equal int/float values have same hash +d = {123.0: "x"} +d[123] = "y" +assert.eq(len(d), 1) +assert.eq(d[123.0], "y") + +# literals (mostly covered by scanner tests) +assert.eq(str(0.), "0.0") +assert.eq(str(.0), "0.0") +assert.true(5.0 != 4.999999999999999) +assert.eq(5.0, 4.9999999999999999) # both literals denote 5.0 +assert.eq(1.23e45, 1.23 * 1000000000000000000000000000000000000000000000) +assert.eq(str(1.23e-45 - (1.23 / 1000000000000000000000000000000000000000000000)), "-1.5557538194652854e-61") + +nan = float("NaN") +inf = float("+Inf") +neginf = float("-Inf") +negzero = (-1e-323 / 10) + +# -- arithmetic -- + +# +float, -float +assert.eq(+(123.0), 123.0) +assert.eq(-(123.0), -123.0) +assert.eq(-(-(123.0)), 123.0) +assert.eq(+(inf), inf) +assert.eq(-(inf), neginf) +assert.eq(-(neginf), inf) +assert.eq(str(-(nan)), "nan") +# + +assert.eq(1.2e3 + 5.6e7, 5.60012e+07) +assert.eq(1.2e3 + 1, 1201) +assert.eq(1 + 1.2e3, 1201) +assert.eq(str(1.2e3 + nan), "nan") +assert.eq(inf + 0, inf) +assert.eq(inf + 1, inf) +assert.eq(inf + inf, inf) +assert.eq(str(inf + neginf), "nan") +# - +assert.eq(1.2e3 - 5.6e7, -5.59988e+07) +assert.eq(1.2e3 - 1, 1199) +assert.eq(1 - 1.2e3, -1199) +assert.eq(str(1.2e3 - nan), "nan") +assert.eq(inf - 0, inf) +assert.eq(inf - 1, inf) +assert.eq(str(inf - inf), "nan") +assert.eq(inf - neginf, inf) +# * +assert.eq(1.5e6 * 2.2e3, 3.3e9) +assert.eq(1.5e6 * 123, 1.845e+08) +assert.eq(123 * 1.5e6, 1.845e+08) +assert.eq(str(1.2e3 * nan), "nan") +assert.eq(str(inf * 0), "nan") +assert.eq(inf * 1, inf) +assert.eq(inf * inf, inf) +assert.eq(inf * neginf, neginf) +# % +assert.eq(100.0 % 7.0, 2) +assert.eq(100.0 % -7.0, -5) # NB: different from Go / Java +assert.eq(-100.0 % 7.0, 5) # NB: different from Go / Java +assert.eq(-100.0 % -7.0, -2) +assert.eq(-100.0 % 7, 5) +assert.eq(100 % 7.0, 2) +assert.eq(str(1.2e3 % nan), "nan") +assert.eq(str(inf % 1), "nan") +assert.eq(str(inf % inf), "nan") +assert.eq(str(inf % neginf), "nan") +# / +assert.eq(str(100.0 / 7.0), "14.285714285714286") +assert.eq(str(100 / 7.0), "14.285714285714286") +assert.eq(str(100.0 / 7), "14.285714285714286") +assert.eq(str(100.0 / nan), "nan") +# // +assert.eq(100.0 // 7.0, 14) +assert.eq(100 // 7.0, 14) +assert.eq(100.0 // 7, 14) +assert.eq(100.0 // -7.0, -15) +assert.eq(100 // -7.0, -15) +assert.eq(100.0 // -7, -15) +assert.eq(str(1 // neginf), "-0.0") +assert.eq(str(100.0 // nan), "nan") + +# addition +assert.eq(0.0 + 1.0, 1.0) +assert.eq(1.0 + 1.0, 2.0) +assert.eq(1.25 + 2.75, 4.0) +assert.eq(5.0 + 7.0, 12.0) +assert.eq(5.1 + 7, 12.1) # float + int +assert.eq(7 + 5.1, 12.1) # int + float + +# subtraction +assert.eq(5.0 - 7.0, -2.0) +assert.eq(5.1 - 7.1, -2.0) +assert.eq(5.5 - 7, -1.5) +assert.eq(5 - 7.5, -2.5) +assert.eq(0.0 - 1.0, -1.0) + +# multiplication +assert.eq(5.0 * 7.0, 35.0) +assert.eq(5.5 * 2.5, 13.75) +assert.eq(5.5 * 7, 38.5) +assert.eq(5 * 7.1, 35.5) + +# real division (like Python 3) +# The / operator is available only when the 'fp' dialect option is enabled. +assert.eq(100.0 / 8.0, 12.5) +assert.eq(100.0 / -8.0, -12.5) +assert.eq(-100.0 / 8.0, -12.5) +assert.eq(-100.0 / -8.0, 12.5) +assert.eq(98.0 / 8.0, 12.25) +assert.eq(98.0 / -8.0, -12.25) +assert.eq(-98.0 / 8.0, -12.25) +assert.eq(-98.0 / -8.0, 12.25) +assert.eq(2.5 / 2.0, 1.25) +assert.eq(2.5 / 2, 1.25) +assert.eq(5 / 4.0, 1.25) +assert.eq(5 / 4, 1.25) +assert.fails(lambda: 1.0 / 0, "floating-point division by zero") +assert.fails(lambda: 1.0 / 0.0, "floating-point division by zero") +assert.fails(lambda: 1 / 0.0, "floating-point division by zero") + +# floored division +assert.eq(100.0 // 8.0, 12.0) +assert.eq(100.0 // -8.0, -13.0) +assert.eq(-100.0 // 8.0, -13.0) +assert.eq(-100.0 // -8.0, 12.0) +assert.eq(98.0 // 8.0, 12.0) +assert.eq(98.0 // -8.0, -13.0) +assert.eq(-98.0 // 8.0, -13.0) +assert.eq(-98.0 // -8.0, 12.0) +assert.eq(2.5 // 2.0, 1.0) +assert.eq(2.5 // 2, 1.0) +assert.eq(5 // 4.0, 1.0) +assert.eq(5 // 4, 1) +assert.eq(type(5 // 4), "int") +assert.fails(lambda: 1.0 // 0, "floored division by zero") +assert.fails(lambda: 1.0 // 0.0, "floored division by zero") +assert.fails(lambda: 1 // 0.0, "floored division by zero") + +# remainder +assert.eq(100.0 % 8.0, 4.0) +assert.eq(100.0 % -8.0, -4.0) +assert.eq(-100.0 % 8.0, 4.0) +assert.eq(-100.0 % -8.0, -4.0) +assert.eq(98.0 % 8.0, 2.0) +assert.eq(98.0 % -8.0, -6.0) +assert.eq(-98.0 % 8.0, 6.0) +assert.eq(-98.0 % -8.0, -2.0) +assert.eq(2.5 % 2.0, 0.5) +assert.eq(2.5 % 2, 0.5) +assert.eq(5 % 4.0, 1.0) +assert.fails(lambda: 1.0 % 0, "floating-point modulo by zero") +assert.fails(lambda: 1.0 % 0.0, "floating-point modulo by zero") +assert.fails(lambda: 1 % 0.0, "floating-point modulo by zero") + +# floats cannot be used as indices, even if integral +assert.fails(lambda: "abc"[1.0], "want int") +assert.fails(lambda: ["A", "B", "C"].insert(1.0, "D"), "want int") +assert.fails(lambda: range(3)[1.0], "got float, want int") + +# -- comparisons -- +# NaN +assert.true(nan == nan) # \ +assert.true(nan >= nan) # unlike Python +assert.true(nan <= nan) # / +assert.true(not (nan > nan)) +assert.true(not (nan < nan)) +assert.true(not (nan != nan)) # unlike Python +# Sort is stable: 0.0 and -0.0 are equal, but they are not permuted. +# Similarly 1 and 1.0. +assert.eq( + str(sorted([inf, neginf, nan, 1e300, -1e300, 1.0, -1.0, 1, -1, 1e-300, -1e-300, 0, 0.0, negzero, 1e-300, -1e-300])), + "[-inf, -1e+300, -1.0, -1, -1e-300, -1e-300, 0, 0.0, -0.0, 1e-300, 1e-300, 1.0, 1, 1e+300, +inf, nan]") + +# Sort is stable, and its result contains no adjacent x, y such that y > x. +# Note: Python's reverse sort is unstable; see https://bugs.python.org/issue36095. +assert.eq(str(sorted([7, 3, nan, 1, 9])), "[1, 3, 7, 9, nan]") +assert.eq(str(sorted([7, 3, nan, 1, 9], reverse=True)), "[nan, 9, 7, 3, 1]") + +# All NaN values compare equal. (Identical objects compare equal.) +nandict = {nan: 1} +nandict[nan] = 2 +assert.eq(len(nandict), 1) # (same as Python) +assert.eq(nandict[nan], 2) # (same as Python) +assert.fails(lambda: {nan: 1, nan: 2}, "duplicate key: nan") + +nandict[float('nan')] = 3 # a distinct NaN object +assert.eq(str(nandict), "{nan: 3}") # (Python: {nan: 2, nan: 3}) + +assert.eq(str({inf: 1, neginf: 2}), "{+inf: 1, -inf: 2}") + +# zero +assert.eq(0.0, negzero) + +# inf +assert.eq(+inf / +inf, nan) +assert.eq(+inf / -inf, nan) +assert.eq(-inf / +inf, nan) +assert.eq(0.0 / +inf, 0.0) +assert.eq(0.0 / -inf, 0.0) +assert.true(inf > -inf) +assert.eq(inf, -neginf) +# TODO(adonovan): assert inf > any finite number, etc. + +# negative zero +negz = -0 +assert.eq(negz, 0) + +# min/max ordering with NaN (the greatest float value) +assert.eq(max([1, nan, 3]), nan) +assert.eq(max([nan, 2, 3]), nan) +assert.eq(min([1, nan, 3]), 1) +assert.eq(min([nan, 2, 3]), 2) + +# float/float comparisons +fltmax = 1.7976931348623157e+308 # approx +fltmin = 4.9406564584124654e-324 # approx +assert.lt(-inf, -fltmax) +assert.lt(-fltmax, -1.0) +assert.lt(-1.0, -fltmin) +assert.lt(-fltmin, 0.0) +assert.lt(0, fltmin) +assert.lt(fltmin, 1.0) +assert.lt(1.0, fltmax) +assert.lt(fltmax, inf) + +# int/float comparisons +assert.eq(0, 0.0) +assert.eq(1, 1.0) +assert.eq(-1, -1.0) +assert.ne(-1, -1.0 + 1e-7) +assert.lt(-2, -2 + 1e-15) + +# int conversion (rounds towards zero) +assert.eq(int(100.1), 100) +assert.eq(int(100.0), 100) +assert.eq(int(99.9), 99) +assert.eq(int(-99.9), -99) +assert.eq(int(-100.0), -100) +assert.eq(int(-100.1), -100) +assert.eq(int(1e100), int("10000000000000000159028911097599180468360808563945281389781327557747838772170381060813469985856815104")) +assert.fails(lambda: int(inf), "cannot convert.*infinity") +assert.fails(lambda: int(nan), "cannot convert.*NaN") + +# -- float() function -- +assert.eq(float(), 0.0) +# float(bool) +assert.eq(float(False), 0.0) +assert.eq(float(True), 1.0) +# float(int) +assert.eq(float(0), 0.0) +assert.eq(float(1), 1.0) +assert.eq(float(123), 123.0) +assert.eq(float(123 * 1000000 * 1000000 * 1000000 * 1000000 * 1000000), 1.23e+32) +# float(float) +assert.eq(float(1.1), 1.1) +assert.fails(lambda: float(None), "want number or string") +assert.ne(False, 0.0) # differs from Python +assert.ne(True, 1.0) +# float(string) +assert.eq(float("1.1"), 1.1) +assert.fails(lambda: float("1.1abc"), "invalid float literal") +assert.fails(lambda: float("1e100.0"), "invalid float literal") +assert.fails(lambda: float("1e1000"), "floating-point number too large") +assert.eq(float("-1.1"), -1.1) +assert.eq(float("+1.1"), +1.1) +assert.eq(float("+Inf"), inf) +assert.eq(float("-Inf"), neginf) +assert.eq(float("NaN"), nan) +assert.eq(float("NaN"), nan) +assert.eq(float("+NAN"), nan) +assert.eq(float("-nan"), nan) +assert.eq(str(float("Inf")), "+inf") +assert.eq(str(float("+INF")), "+inf") +assert.eq(str(float("-inf")), "-inf") +assert.eq(str(float("+InFiniTy")), "+inf") +assert.eq(str(float("-iNFiniTy")), "-inf") +assert.fails(lambda: float("one point two"), "invalid float literal: one point two") +assert.fails(lambda: float("1.2.3"), "invalid float literal: 1.2.3") +assert.fails(lambda: float(123 << 500 << 500 << 50), "int too large to convert to float") +assert.fails(lambda: float(-123 << 500 << 500 << 50), "int too large to convert to float") +assert.fails(lambda: float(str(-123 << 500 << 500 << 50)), "floating-point number too large") + +# -- implicit float(int) conversions -- +assert.fails(lambda: (1<<500<<500<<500) + 0.0, "int too large to convert to float") +assert.fails(lambda: 0.0 + (1<<500<<500<<500), "int too large to convert to float") +assert.fails(lambda: (1<<500<<500<<500) - 0.0, "int too large to convert to float") +assert.fails(lambda: 0.0 - (1<<500<<500<<500), "int too large to convert to float") +assert.fails(lambda: (1<<500<<500<<500) * 1.0, "int too large to convert to float") +assert.fails(lambda: 1.0 * (1<<500<<500<<500), "int too large to convert to float") +assert.fails(lambda: (1<<500<<500<<500) / 1.0, "int too large to convert to float") +assert.fails(lambda: 1.0 / (1<<500<<500<<500), "int too large to convert to float") +assert.fails(lambda: (1<<500<<500<<500) // 1.0, "int too large to convert to float") +assert.fails(lambda: 1.0 // (1<<500<<500<<500), "int too large to convert to float") +assert.fails(lambda: (1<<500<<500<<500) % 1.0, "int too large to convert to float") +assert.fails(lambda: 1.0 % (1<<500<<500<<500), "int too large to convert to float") + + +# -- int function -- +assert.eq(int(0.0), 0) +assert.eq(int(1.0), 1) +assert.eq(int(1.1), 1) +assert.eq(int(0.9), 0) +assert.eq(int(-1.1), -1.0) +assert.eq(int(-1.0), -1.0) +assert.eq(int(-0.9), 0.0) +assert.eq(int(1.23e+32), 123000000000000004979083645550592) +assert.eq(int(-1.23e-32), 0) +assert.eq(int(1.23e-32), 0) +assert.fails(lambda: int(float("+Inf")), "cannot convert float infinity to integer") +assert.fails(lambda: int(float("-Inf")), "cannot convert float infinity to integer") +assert.fails(lambda: int(float("NaN")), "cannot convert float NaN to integer") + + +# hash +# Check that equal float and int values have the same internal hash. +def checkhash(): + for a in [1.23e100, 1.23e10, 1.23e1, 1.23, + 1, 4294967295, 8589934591, 9223372036854775807]: + for b in [a, -a, 1/a, -1/a]: + f = float(b) + i = int(b) + if f == i: + fh = {f: None} + ih = {i: None} + if fh != ih: + assert.true(False, "{%v: None} != {%v: None}: hashes vary" % fh, ih) +checkhash() + +# string formatting + +# %d +assert.eq("%d" % 0, "0") +assert.eq("%d" % 0.0, "0") +assert.eq("%d" % 123, "123") +assert.eq("%d" % 123.0, "123") +assert.eq("%d" % 1.23e45, "1229999999999999973814869011019624571608236032") +# (see below for '%d' % NaN/Inf) +assert.eq("%d" % negzero, "0") +assert.fails(lambda: "%d" % float("NaN"), "cannot convert float NaN to integer") +assert.fails(lambda: "%d" % float("+Inf"), "cannot convert float infinity to integer") +assert.fails(lambda: "%d" % float("-Inf"), "cannot convert float infinity to integer") + +# %e +assert.eq("%e" % 0, "0.000000e+00") +assert.eq("%e" % 0.0, "0.000000e+00") +assert.eq("%e" % 123, "1.230000e+02") +assert.eq("%e" % 123.0, "1.230000e+02") +assert.eq("%e" % 1.23e45, "1.230000e+45") +assert.eq("%e" % -1.23e-45, "-1.230000e-45") +assert.eq("%e" % nan, "nan") +assert.eq("%e" % inf, "+inf") +assert.eq("%e" % neginf, "-inf") +assert.eq("%e" % negzero, "-0.000000e+00") +assert.fails(lambda: "%e" % "123", "requires float, not str") +# %f +assert.eq("%f" % 0, "0.000000") +assert.eq("%f" % 0.0, "0.000000") +assert.eq("%f" % 123, "123.000000") +assert.eq("%f" % 123.0, "123.000000") +# Note: Starlark/Java emits 1230000000000000000000000000000000000000000000.000000. Why? +assert.eq("%f" % 1.23e45, "1229999999999999973814869011019624571608236032.000000") +assert.eq("%f" % -1.23e-45, "-0.000000") +assert.eq("%f" % nan, "nan") +assert.eq("%f" % inf, "+inf") +assert.eq("%f" % neginf, "-inf") +assert.eq("%f" % negzero, "-0.000000") +assert.fails(lambda: "%f" % "123", "requires float, not str") +# %g +assert.eq("%g" % 0, "0.0") +assert.eq("%g" % 0.0, "0.0") +assert.eq("%g" % 123, "123.0") +assert.eq("%g" % 123.0, "123.0") +assert.eq("%g" % 1.110, "1.11") +assert.eq("%g" % 1e5, "100000.0") +assert.eq("%g" % 1e6, "1e+06") # Note: threshold of scientific notation is 1e17 in Starlark/Java +assert.eq("%g" % 1.23e45, "1.23e+45") +assert.eq("%g" % -1.23e-45, "-1.23e-45") +assert.eq("%g" % nan, "nan") +assert.eq("%g" % inf, "+inf") +assert.eq("%g" % neginf, "-inf") +assert.eq("%g" % negzero, "-0.0") +# str uses %g +assert.eq(str(0.0), "0.0") +assert.eq(str(123.0), "123.0") +assert.eq(str(1.23e45), "1.23e+45") +assert.eq(str(-1.23e-45), "-1.23e-45") +assert.eq(str(nan), "nan") +assert.eq(str(inf), "+inf") +assert.eq(str(neginf), "-inf") +assert.eq(str(negzero), "-0.0") +assert.fails(lambda: "%g" % "123", "requires float, not str") + +i0 = 1 +f0 = 1.0 +assert.eq(type(i0), "int") +assert.eq(type(f0), "float") + +ops = { + '+': lambda x, y: x + y, + '-': lambda x, y: x - y, + '*': lambda x, y: x * y, + '/': lambda x, y: x / y, + '//': lambda x, y: x // y, + '%': lambda x, y: x % y, +} + +# Check that if either argument is a float, so too is the result. +def checktypes(): + want = set(""" +int + int = int +int + float = float +float + int = float +float + float = float +int - int = int +int - float = float +float - int = float +float - float = float +int * int = int +int * float = float +float * int = float +float * float = float +int / int = float +int / float = float +float / int = float +float / float = float +int // int = int +int // float = float +float // int = float +float // float = float +int % int = int +int % float = float +float % int = float +float % float = float +"""[1:].splitlines()) + for opname in ("+", "-", "*", "/", "%"): + for x in [i0, f0]: + for y in [i0, f0]: + op = ops[opname] + got = "%s %s %s = %s" % (type(x), opname, type(y), type(op(x, y))) + assert.contains(want, got) +checktypes() diff --git a/starlark/testdata/function.star b/starlark/testdata/function.star new file mode 100644 index 0000000..737df26 --- /dev/null +++ b/starlark/testdata/function.star @@ -0,0 +1,323 @@ +# Tests of Starlark 'function' +# option:set + +# TODO(adonovan): +# - add some introspection functions for looking at function values +# and test that functions have correct position, free vars, names of locals, etc. +# - move the hard-coded tests of parameter passing from eval_test.go to here. + +load("assert.star", "assert", "freeze") + +# Test lexical scope and closures: +def outer(x): + def inner(y): + return x + x + y # multiple occurrences of x should create only 1 freevar + return inner + +z = outer(3) +assert.eq(z(5), 11) +assert.eq(z(7), 13) +z2 = outer(4) +assert.eq(z2(5), 13) +assert.eq(z2(7), 15) +assert.eq(z(5), 11) +assert.eq(z(7), 13) + +# Function name +assert.eq(str(outer), '<function outer>') +assert.eq(str(z), '<function inner>') +assert.eq(str(str), '<built-in function str>') +assert.eq(str("".startswith), '<built-in method startswith of string value>') + +# Stateful closure +def squares(): + x = [0] + def f(): + x[0] += 1 + return x[0] * x[0] + return f + +sq = squares() +assert.eq(sq(), 1) +assert.eq(sq(), 4) +assert.eq(sq(), 9) +assert.eq(sq(), 16) + +# Freezing a closure +sq2 = freeze(sq) +assert.fails(sq2, "frozen list") + +# recursion detection, simple +def fib(x): + if x < 2: + return x + return fib(x-2) + fib(x-1) +assert.fails(lambda: fib(10), "function fib called recursively") + +# recursion detection, advanced +# +# A simplistic recursion check that looks for repeated calls to the +# same function value will not detect recursion using the Y +# combinator, which creates a new closure at each step of the +# recursion. To truly prohibit recursion, the dynamic check must look +# for repeated calls of the same syntactic function body. +Y = lambda f: (lambda x: x(x))(lambda y: f(lambda *args: y(y)(*args))) +fibgen = lambda fib: lambda x: (x if x<2 else fib(x-1)+fib(x-2)) +fib2 = Y(fibgen) +assert.fails(lambda: [fib2(x) for x in range(10)], "function lambda called recursively") + +# However, this stricter check outlaws many useful programs +# that are still bounded, and creates a hazard because +# helper functions such as map below cannot be used to +# call functions that themselves use map: +def map(f, seq): return [f(x) for x in seq] +def double(x): return x+x +assert.eq(map(double, [1, 2, 3]), [2, 4, 6]) +assert.eq(map(double, ["a", "b", "c"]), ["aa", "bb", "cc"]) +def mapdouble(x): return map(double, x) +assert.fails(lambda: map(mapdouble, ([1, 2, 3], ["a", "b", "c"])), + 'function map called recursively') +# With the -recursion option it would yield [[2, 4, 6], ["aa", "bb", "cc"]]. + +# call of function not through its name +# (regression test for parsing suffixes of primary expressions) +hf = hasfields() +hf.x = [len] +assert.eq(hf.x[0]("abc"), 3) +def f(): + return lambda: 1 +assert.eq(f()(), 1) +assert.eq(["abc"][0][0].upper(), "A") + +# functions may be recursively defined, +# so long as they don't dynamically recur. +calls = [] +def yin(x): + calls.append("yin") + if x: + yang(False) + +def yang(x): + calls.append("yang") + if x: + yin(False) + +yin(True) +assert.eq(calls, ["yin", "yang"]) + +calls.clear() +yang(True) +assert.eq(calls, ["yang", "yin"]) + + +# builtin_function_or_method use identity equivalence. +closures = set(["".count for _ in range(10)]) +assert.eq(len(closures), 10) + +--- +# Default values of function parameters are mutable. +load("assert.star", "assert", "freeze") + +def f(x=[0]): + return x + +assert.eq(f(), [0]) + +f().append(1) +assert.eq(f(), [0, 1]) + +# Freezing a function value freezes its parameter defaults. +freeze(f) +assert.fails(lambda: f().append(2), "cannot append to frozen list") + +--- +# This is a well known corner case of parsing in Python. +load("assert.star", "assert") + +f = lambda x: 1 if x else 0 +assert.eq(f(True), 1) +assert.eq(f(False), 0) + +x = True +f2 = (lambda x: 1) if x else 0 +assert.eq(f2(123), 1) + +tf = lambda: True, lambda: False +assert.true(tf[0]()) +assert.true(not tf[1]()) + +--- +# Missing parameters are correctly reported +# in functions of more than 64 parameters. +# (This tests a corner case of the implementation: +# we avoid a map allocation for <64 parameters) + +load("assert.star", "assert") + +def f(a, b, c, d, e, f, g, h, + i, j, k, l, m, n, o, p, + q, r, s, t, u, v, w, x, + y, z, A, B, C, D, E, F, + G, H, I, J, K, L, M, N, + O, P, Q, R, S, T, U, V, + W, X, Y, Z, aa, bb, cc, dd, + ee, ff, gg, hh, ii, jj, kk, ll, + mm): + pass + +assert.fails(lambda: f( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64), "missing 1 argument \\(mm\\)") + +assert.fails(lambda: f( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, + mm = 100), 'multiple values for parameter "mm"') + +--- +# Regression test for github.com/google/starlark-go/issues/21, +# which concerns dynamic checks. +# Related: https://github.com/bazelbuild/starlark/issues/21, +# which concerns static checks. + +load("assert.star", "assert") + +def f(*args, **kwargs): + return args, kwargs + +assert.eq(f(x=1, y=2), ((), {"x": 1, "y": 2})) +assert.fails(lambda: f(x=1, **dict(x=2)), 'multiple values for parameter "x"') + +def g(x, y): + return x, y + +assert.eq(g(1, y=2), (1, 2)) +assert.fails(lambda: g(1, y=2, **{'y': 3}), 'multiple values for parameter "y"') + +--- +# Regression test for a bug in CALL_VAR_KW. + +load("assert.star", "assert") + +def f(a, b, x, y): + return a+b+x+y + +assert.eq(f(*("a", "b"), **dict(y="y", x="x")) + ".", 'abxy.') +--- +# Order of evaluation of function arguments. +# Regression test for github.com/google/skylark/issues/135. +load("assert.star", "assert") + +r = [] + +def id(x): + r.append(x) + return x + +def f(*args, **kwargs): + return (args, kwargs) + +y = f(id(1), id(2), x=id(3), *[id(4)], **dict(z=id(5))) +assert.eq(y, ((1, 2, 4), dict(x=3, z=5))) + +# This matches Python2 and Starlark-in-Java, but not Python3 [1 2 4 3 6]. +# *args and *kwargs are evaluated last. +# (Python[23] also allows keyword arguments after *args.) +# See github.com/bazelbuild/starlark#13 for spec change. +assert.eq(r, [1, 2, 3, 4, 5]) + +--- +# option:recursion +# See github.com/bazelbuild/starlark#170 +load("assert.star", "assert") + +def a(): + list = [] + def b(n): + list.append(n) + if n > 0: + b(n - 1) # recursive reference to b + + b(3) + return list + +assert.eq(a(), [3, 2, 1, 0]) + +def c(): + list = [] + x = 1 + def d(): + list.append(x) # this use of x observes both assignments + d() + x = 2 + d() + return list + +assert.eq(c(), [1, 2]) + +def e(): + def f(): + return x # forward reference ok: x is a closure cell + x = 1 + return f() + +assert.eq(e(), 1) + +--- +load("assert.star", "assert") + +def e(): + x = 1 + def f(): + print(x) # this reference to x fails + x = 3 # because this assignment makes x local to f + f() + +assert.fails(e, "local variable x referenced before assignment") + +def f(): + def inner(): + return x + if False: + x = 0 + return x # fails (x is an uninitialized cell of this function) + +assert.fails(f, "local variable x referenced before assignment") + +def g(): + def inner(): + return x # fails (x is an uninitialized cell of the enclosing function) + if False: + x = 0 + return inner() + +assert.fails(g, "local variable x referenced before assignment") + +--- +# A trailing comma is allowed in any function definition or call. +# This reduces the need to edit neighboring lines when editing defs +# or calls splayed across multiple lines. + +def a(x,): pass +def b(x, y=None, ): pass +def c(x, y=None, *args, ): pass +def d(x, y=None, *args, z=None, ): pass +def e(x, y=None, *args, z=None, **kwargs, ): pass + +a(1,) +b(1, y=2, ) +#c(1, *[], ) +#d(1, *[], z=None, ) +#e(1, *[], z=None, *{}, ) diff --git a/starlark/testdata/int.star b/starlark/testdata/int.star new file mode 100644 index 0000000..46c0ad0 --- /dev/null +++ b/starlark/testdata/int.star @@ -0,0 +1,260 @@ +# Tests of Starlark 'int' + +load("assert.star", "assert") + +# basic arithmetic +assert.eq(0 - 1, -1) +assert.eq(0 + 1, +1) +assert.eq(1 + 1, 2) +assert.eq(5 + 7, 12) +assert.eq(5 * 7, 35) +assert.eq(5 - 7, -2) + +# int boundaries +maxint64 = (1 << 63) - 1 +minint64 = -1 << 63 +maxint32 = (1 << 31) - 1 +minint32 = -1 << 31 +assert.eq(maxint64, 9223372036854775807) +assert.eq(minint64, -9223372036854775808) +assert.eq(maxint32, 2147483647) +assert.eq(minint32, -2147483648) + +# truth +def truth(): + assert.true(not 0) + for m in [1, maxint32]: # Test small/big ranges + assert.true(123 * m) + assert.true(-1 * m) + +truth() + +# floored division +# (For real division, see float.star.) +def division(): + for m in [1, maxint32]: # Test small/big ranges + assert.eq((100 * m) // (7 * m), 14) + assert.eq((100 * m) // (-7 * m), -15) + assert.eq((-100 * m) // (7 * m), -15) # NB: different from Go/Java + assert.eq((-100 * m) // (-7 * m), 14) # NB: different from Go/Java + assert.eq((98 * m) // (7 * m), 14) + assert.eq((98 * m) // (-7 * m), -14) + assert.eq((-98 * m) // (7 * m), -14) + assert.eq((-98 * m) // (-7 * m), 14) + +division() + +# remainder +def remainder(): + for m in [1, maxint32]: # Test small/big ranges + assert.eq((100 * m) % (7 * m), 2 * m) + assert.eq((100 * m) % (-7 * m), -5 * m) # NB: different from Go/Java + assert.eq((-100 * m) % (7 * m), 5 * m) # NB: different from Go/Java + assert.eq((-100 * m) % (-7 * m), -2 * m) + assert.eq((98 * m) % (7 * m), 0) + assert.eq((98 * m) % (-7 * m), 0) + assert.eq((-98 * m) % (7 * m), 0) + assert.eq((-98 * m) % (-7 * m), 0) + +remainder() + +# compound assignment +def compound(): + x = 1 + x += 1 + assert.eq(x, 2) + x -= 3 + assert.eq(x, -1) + x *= 39 + assert.eq(x, -39) + x //= 4 + assert.eq(x, -10) + x /= -2 + assert.eq(x, 5) + x %= 3 + assert.eq(x, 2) + + # use resolve.AllowBitwise to enable the ops: + x = 2 + x &= 1 + assert.eq(x, 0) + x |= 2 + assert.eq(x, 2) + x ^= 3 + assert.eq(x, 1) + x <<= 2 + assert.eq(x, 4) + x >>= 2 + assert.eq(x, 1) + +compound() + +# int conversion +# See float.star for float-to-int conversions. +# We follow Python 3 here, but I can't see the method in its madness. +# int from bool/int/float +assert.fails(int, "missing argument") # int() +assert.eq(int(False), 0) +assert.eq(int(True), 1) +assert.eq(int(3), 3) +assert.eq(int(3.1), 3) +assert.fails(lambda: int(3, base = 10), "non-string with explicit base") +assert.fails(lambda: int(True, 10), "non-string with explicit base") + +# int from string, base implicitly 10 +assert.eq(int("100000000000000000000"), 10000000000 * 10000000000) +assert.eq(int("-100000000000000000000"), -10000000000 * 10000000000) +assert.eq(int("123"), 123) +assert.eq(int("-123"), -123) +assert.eq(int("0123"), 123) # not octal +assert.eq(int("-0123"), -123) +assert.fails(lambda: int("0x12"), "invalid literal with base 10") +assert.fails(lambda: int("-0x12"), "invalid literal with base 10") +assert.fails(lambda: int("0o123"), "invalid literal.*base 10") +assert.fails(lambda: int("-0o123"), "invalid literal.*base 10") + +# int from string, explicit base +assert.eq(int("0"), 0) +assert.eq(int("00"), 0) +assert.eq(int("0", base = 10), 0) +assert.eq(int("00", base = 10), 0) +assert.eq(int("0", base = 8), 0) +assert.eq(int("00", base = 8), 0) +assert.eq(int("-0"), 0) +assert.eq(int("-00"), 0) +assert.eq(int("-0", base = 10), 0) +assert.eq(int("-00", base = 10), 0) +assert.eq(int("-0", base = 8), 0) +assert.eq(int("-00", base = 8), 0) +assert.eq(int("+0"), 0) +assert.eq(int("+00"), 0) +assert.eq(int("+0", base = 10), 0) +assert.eq(int("+00", base = 10), 0) +assert.eq(int("+0", base = 8), 0) +assert.eq(int("+00", base = 8), 0) +assert.eq(int("11", base = 9), 10) +assert.eq(int("-11", base = 9), -10) +assert.eq(int("10011", base = 2), 19) +assert.eq(int("-10011", base = 2), -19) +assert.eq(int("123", 8), 83) +assert.eq(int("-123", 8), -83) +assert.eq(int("0123", 8), 83) # redundant zeros permitted +assert.eq(int("-0123", 8), -83) +assert.eq(int("00123", 8), 83) +assert.eq(int("-00123", 8), -83) +assert.eq(int("0o123", 8), 83) +assert.eq(int("-0o123", 8), -83) +assert.eq(int("123", 7), 66) # 1*7*7 + 2*7 + 3 +assert.eq(int("-123", 7), -66) +assert.eq(int("12", 16), 18) +assert.eq(int("-12", 16), -18) +assert.eq(int("0x12", 16), 18) +assert.eq(int("-0x12", 16), -18) +assert.eq(0x1000000000000001 * 0x1000000000000001, 0x1000000000000002000000000000001) +assert.eq(int("1010", 2), 10) +assert.eq(int("111111101", 2), 509) +assert.eq(int("0b0101", 0), 5) +assert.eq(int("0b0101", 2), 5) # prefix is redundant with explicit base +assert.eq(int("0b00000", 0), 0) +assert.eq(1111111111111111 * 1111111111111111, 1234567901234567654320987654321) +assert.fails(lambda: int("0x123", 8), "invalid literal.*base 8") +assert.fails(lambda: int("-0x123", 8), "invalid literal.*base 8") +assert.fails(lambda: int("0o123", 16), "invalid literal.*base 16") +assert.fails(lambda: int("-0o123", 16), "invalid literal.*base 16") +assert.fails(lambda: int("0x110", 2), "invalid literal.*base 2") + +# Base prefix is honored only if base=0, or if the prefix matches the explicit base. +# See https://github.com/google/starlark-go/issues/337 +assert.fails(lambda: int("0b0"), "invalid literal.*base 10") +assert.eq(int("0b0", 0), 0) +assert.eq(int("0b0", 2), 0) +assert.eq(int("0b0", 16), 0xb0) +assert.eq(int("0x0b0", 16), 0xb0) +assert.eq(int("0x0b0", 0), 0xb0) +assert.eq(int("0x0b0101", 16), 0x0b0101) + +# int from string, auto detect base +assert.eq(int("123", 0), 123) +assert.eq(int("+123", 0), +123) +assert.eq(int("-123", 0), -123) +assert.eq(int("0x12", 0), 18) +assert.eq(int("+0x12", 0), +18) +assert.eq(int("-0x12", 0), -18) +assert.eq(int("0o123", 0), 83) +assert.eq(int("+0o123", 0), +83) +assert.eq(int("-0o123", 0), -83) +assert.fails(lambda: int("0123", 0), "invalid literal.*base 0") # valid in Python 2.7 +assert.fails(lambda: int("-0123", 0), "invalid literal.*base 0") + +# github.com/google/starlark-go/issues/108 +assert.fails(lambda: int("0Oxa", 8), "invalid literal with base 8: 0Oxa") + +# follow-on bugs to issue 108 +assert.fails(lambda: int("--4"), "invalid literal with base 10: --4") +assert.fails(lambda: int("++4"), "invalid literal with base 10: \\+\\+4") +assert.fails(lambda: int("+-4"), "invalid literal with base 10: \\+-4") +assert.fails(lambda: int("0x-4", 16), "invalid literal with base 16: 0x-4") + +# bitwise union (int|int), intersection (int&int), XOR (int^int), unary not (~int), +# left shift (int<<int), and right shift (int>>int). +# use resolve.AllowBitwise to enable the ops. +# TODO(adonovan): this is not yet in the Starlark spec, +# but there is consensus that it should be. +assert.eq(1 | 2, 3) +assert.eq(3 | 6, 7) +assert.eq((1 | 2) & (2 | 4), 2) +assert.eq(1 ^ 2, 3) +assert.eq(2 ^ 2, 0) +assert.eq(1 | 0 ^ 1, 1) # check | and ^ operators precedence +assert.eq(~1, -2) +assert.eq(~(-2), 1) +assert.eq(~0, -1) +assert.eq(1 << 2, 4) +assert.eq(2 >> 1, 1) +assert.fails(lambda: 2 << -1, "negative shift count") +assert.fails(lambda: 1 << 512, "shift count too large") + +# comparisons +# TODO(adonovan): test: < > == != etc +def comparisons(): + for m in [1, maxint32 / 2, maxint32]: # Test small/big ranges + assert.lt(-2 * m, -1 * m) + assert.lt(-1 * m, 0 * m) + assert.lt(0 * m, 1 * m) + assert.lt(1 * m, 2 * m) + assert.true(2 * m >= 2 * m) + assert.true(2 * m > 1 * m) + assert.true(1 * m >= 1 * m) + assert.true(1 * m > 0 * m) + assert.true(0 * m >= 0 * m) + assert.true(0 * m > -1 * m) + assert.true(-1 * m >= -1 * m) + assert.true(-1 * m > -2 * m) + +comparisons() + +# precision +assert.eq(str(maxint64), "9223372036854775807") +assert.eq(str(maxint64 + 1), "9223372036854775808") +assert.eq(str(minint64), "-9223372036854775808") +assert.eq(str(minint64 - 1), "-9223372036854775809") +assert.eq(str(minint64 * minint64), "85070591730234615865843651857942052864") +assert.eq(str(maxint32 + 1), "2147483648") +assert.eq(str(minint32 - 1), "-2147483649") +assert.eq(str(minint32 * minint32), "4611686018427387904") +assert.eq(str(minint32 | maxint32), "-1") +assert.eq(str(minint32 & minint32), "-2147483648") +assert.eq(str(minint32 ^ maxint32), "-1") +assert.eq(str(minint32 // -1), "2147483648") + +# string formatting +assert.eq("%o %x %d" % (0o755, 0xDEADBEEF, 42), "755 deadbeef 42") +nums = [-95, -1, 0, +1, +95] +assert.eq(" ".join(["%o" % x for x in nums]), "-137 -1 0 1 137") +assert.eq(" ".join(["%d" % x for x in nums]), "-95 -1 0 1 95") +assert.eq(" ".join(["%i" % x for x in nums]), "-95 -1 0 1 95") +assert.eq(" ".join(["%x" % x for x in nums]), "-5f -1 0 1 5f") +assert.eq(" ".join(["%X" % x for x in nums]), "-5F -1 0 1 5F") +assert.eq("%o %x %d" % (123, 123, 123), "173 7b 123") +assert.eq("%o %x %d" % (123.1, 123.1, 123.1), "173 7b 123") # non-int operands are acceptable +assert.fails(lambda: "%d" % True, "cannot convert bool to int") diff --git a/starlark/testdata/json.star b/starlark/testdata/json.star new file mode 100644 index 0000000..7c7b316 --- /dev/null +++ b/starlark/testdata/json.star @@ -0,0 +1,147 @@ +# Tests of json module. + +load("assert.star", "assert") +load("json.star", "json") + +assert.eq(dir(json), ["decode", "encode", "indent"]) + +# Some of these cases were inspired by github.com/nst/JSONTestSuite. + +## json.encode + +assert.eq(json.encode(None), "null") +assert.eq(json.encode(True), "true") +assert.eq(json.encode(False), "false") +assert.eq(json.encode(-123), "-123") +assert.eq(json.encode(12345*12345*12345*12345*12345*12345), "3539537889086624823140625") +assert.eq(json.encode(float(12345*12345*12345*12345*12345*12345)), "3.539537889086625e+24") +assert.eq(json.encode(12.345e67), "1.2345e+68") +assert.eq(json.encode("hello"), '"hello"') +assert.eq(json.encode([1, 2, 3]), "[1,2,3]") +assert.eq(json.encode((1, 2, 3)), "[1,2,3]") +assert.eq(json.encode(range(3)), "[0,1,2]") # a built-in iterable +assert.eq(json.encode(dict(x = 1, y = "two")), '{"x":1,"y":"two"}') +assert.eq(json.encode(dict(y = "two", x = 1)), '{"x":1,"y":"two"}') # key, not insertion, order +assert.eq(json.encode(struct(x = 1, y = "two")), '{"x":1,"y":"two"}') # a user-defined HasAttrs +assert.eq(json.encode("😹"[:1]), '"\\ufffd"') # invalid UTF-8 -> replacement char + +def encode_error(expr, error): + assert.fails(lambda: json.encode(expr), error) + +encode_error(float("NaN"), "json.encode: cannot encode non-finite float nan") +encode_error({1: "two"}, "dict has int key, want string") +encode_error(len, "cannot encode builtin_function_or_method as JSON") +encode_error(struct(x=[1, {"x": len}]), # nested failure + 'in field .x: at list index 1: in dict key "x": cannot encode...') +encode_error(struct(x=[1, {"x": len}]), # nested failure + 'in field .x: at list index 1: in dict key "x": cannot encode...') +encode_error({1: 2}, 'dict has int key, want string') + +## json.decode + +assert.eq(json.decode("null"), None) +assert.eq(json.decode("true"), True) +assert.eq(json.decode("false"), False) +assert.eq(json.decode("-123"), -123) +assert.eq(json.decode("-0"), -0) +assert.eq(json.decode("3539537889086624823140625"), 3539537889086624823140625) +assert.eq(json.decode("3539537889086624823140625.0"), float(3539537889086624823140625)) +assert.eq(json.decode("3.539537889086625e+24"), 3.539537889086625e+24) +assert.eq(json.decode("0e+1"), 0) +assert.eq(json.decode("-0.0"), -0.0) +assert.eq(json.decode( + "-0.000000000000000000000000000000000000000000000000000000000000000000000000000001"), + -0.000000000000000000000000000000000000000000000000000000000000000000000000000001) +assert.eq(json.decode('[]'), []) +assert.eq(json.decode('[1]'), [1]) +assert.eq(json.decode('[1,2,3]'), [1, 2, 3]) +assert.eq(json.decode('{"one": 1, "two": 2}'), dict(one=1, two=2)) +assert.eq(json.decode('{"foo\\u0000bar": 42}'), {"foo\x00bar": 42}) +assert.eq(json.decode('"\\ud83d\\ude39\\ud83d\\udc8d"'), "😹💍") +assert.eq(json.decode('"\\u0123"'), 'ģ') +assert.eq(json.decode('"\x7f"'), "\x7f") + +def decode_error(expr, error): + assert.fails(lambda: json.decode(expr), error) + +decode_error('truefalse', + "json.decode: at offset 4, unexpected character 'f' after value") + +decode_error('"abc', "unclosed string literal") +decode_error('"ab\\gc"', "invalid character 'g' in string escape code") +decode_error("'abc'", "unexpected character '\\\\''") + +decode_error("1.2.3", "invalid number: 1.2.3") +decode_error("+1", "unexpected character '\\+'") +decode_error("-abc", "invalid number: -") +decode_error("-", "invalid number: -") +decode_error("-00", "invalid number: -00") +decode_error("00", "invalid number: 00") +decode_error("--1", "invalid number: --1") +decode_error("-+1", "invalid number: -\\+1") +decode_error("1e1e1", "invalid number: 1e1e1") +decode_error("0123", "invalid number: 0123") +decode_error("000.123", "invalid number: 000.123") +decode_error("-0123", "invalid number: -0123") +decode_error("-000.123", "invalid number: -000.123") +decode_error("0x123", "unexpected character 'x' after value") + +decode_error('[1, 2 ', "unexpected end of file") +decode_error('[1, 2, ', "unexpected end of file") +decode_error('[1, 2, ]', "unexpected character ']'") +decode_error('[1, 2, }', "unexpected character '}'") +decode_error('[1, 2}', "got '}', want ',' or ']'") + +decode_error('{"one": 1', "unexpected end of file") +decode_error('{"one" 1', "after object key, got '1', want ':'") +decode_error('{"one": 1 "two": 2', "in object, got '\"', want ',' or '}'") +decode_error('{"one": 1,', "unexpected end of file") +decode_error('{"one": 1, }', "unexpected character '}'") +decode_error('{"one": 1]', "in object, got ']', want ',' or '}'") + +def codec(x): + return json.decode(json.encode(x)) + +# string round-tripping +strings = [ + "😿", # U+1F63F CRYING_CAT_FACE + "🐱👤", # CAT FACE + ZERO WIDTH JOINER + BUST IN SILHOUETTE +] +assert.eq(codec(strings), strings) + +# codepoints is a string with every 16-bit code point. +codepoints = ''.join(['%c' % c for c in range(65536)]) +assert.eq(codec(codepoints), codepoints) + +# number round-tripping +numbers = [ + 0, 1, -1, +1, 1.23e45, -1.23e-45, + 3539537889086624823140625, + float(3539537889086624823140625), +] +assert.eq(codec(numbers), numbers) + +## json.indent + +s = json.encode(dict(x = 1, y = ["one", "two"])) + +assert.eq(json.indent(s), '''{ + "x": 1, + "y": [ + "one", + "two" + ] +}''') + +assert.eq(json.decode(json.indent(s)), {"x": 1, "y": ["one", "two"]}) + +assert.eq(json.indent(s, prefix='¶', indent='–––'), '''{ +¶–––"x": 1, +¶–––"y": [ +¶––––––"one", +¶––––––"two" +¶–––] +¶}''') + +assert.fails(lambda: json.indent("!@#$%^& this is not json"), 'invalid character') +--- diff --git a/starlark/testdata/list.star b/starlark/testdata/list.star new file mode 100644 index 0000000..526a962 --- /dev/null +++ b/starlark/testdata/list.star @@ -0,0 +1,276 @@ +# Tests of Starlark 'list' + +load("assert.star", "assert", "freeze") + +# literals +assert.eq([], []) +assert.eq([1], [1]) +assert.eq([1], [1]) +assert.eq([1, 2], [1, 2]) +assert.ne([1, 2, 3], [1, 2, 4]) + +# truth +assert.true([0]) +assert.true(not []) + +# indexing, x[i] +abc = list("abc".elems()) +assert.fails(lambda: abc[-4], "list index -4 out of range \\[-3:2]") +assert.eq(abc[-3], "a") +assert.eq(abc[-2], "b") +assert.eq(abc[-1], "c") +assert.eq(abc[0], "a") +assert.eq(abc[1], "b") +assert.eq(abc[2], "c") +assert.fails(lambda: abc[3], "list index 3 out of range \\[-3:2]") + +# x[i] = ... +x3 = [0, 1, 2] +x3[1] = 2 +x3[2] += 3 +assert.eq(x3, [0, 2, 5]) + +def f2(): + x3[3] = 4 + +assert.fails(f2, "out of range") +freeze(x3) + +def f3(): + x3[0] = 0 + +assert.fails(f3, "cannot assign to element of frozen list") +assert.fails(x3.clear, "cannot clear frozen list") + +# list + list +assert.eq([1, 2, 3] + [3, 4, 5], [1, 2, 3, 3, 4, 5]) +assert.fails(lambda: [1, 2] + (3, 4), "unknown.*list \\+ tuple") +assert.fails(lambda: (1, 2) + [3, 4], "unknown.*tuple \\+ list") + +# list * int, int * list +assert.eq(abc * 0, []) +assert.eq(abc * -1, []) +assert.eq(abc * 1, abc) +assert.eq(abc * 3, ["a", "b", "c", "a", "b", "c", "a", "b", "c"]) +assert.eq(0 * abc, []) +assert.eq(-1 * abc, []) +assert.eq(1 * abc, abc) +assert.eq(3 * abc, ["a", "b", "c", "a", "b", "c", "a", "b", "c"]) + +# list comprehensions +assert.eq([2 * x for x in [1, 2, 3]], [2, 4, 6]) +assert.eq([2 * x for x in [1, 2, 3] if x > 1], [4, 6]) +assert.eq( + [(x, y) for x in [1, 2] for y in [3, 4]], + [(1, 3), (1, 4), (2, 3), (2, 4)], +) +assert.eq([(x, y) for x in [1, 2] if x == 2 for y in [3, 4]], [(2, 3), (2, 4)]) +assert.eq([2 * x for x in (1, 2, 3)], [2, 4, 6]) +assert.eq([x for x in "abc".elems()], ["a", "b", "c"]) +assert.eq([x for x in {"a": 1, "b": 2}], ["a", "b"]) +assert.eq([(y, x) for x, y in {1: 2, 3: 4}.items()], [(2, 1), (4, 3)]) + +# corner cases of parsing: +assert.eq([x for x in range(12) if x % 2 == 0 if x % 3 == 0], [0, 6]) +assert.eq([x for x in [1, 2] if lambda: None], [1, 2]) +assert.eq([x for x in [1, 2] if (lambda: 3 if True else 4)], [1, 2]) + +# list function +assert.eq(list(), []) +assert.eq(list("ab".elems()), ["a", "b"]) + +# A list comprehension defines a separate lexical block, +# whether at top-level... +a = [1, 2] +b = [a for a in [3, 4]] +assert.eq(a, [1, 2]) +assert.eq(b, [3, 4]) + +# ...or local to a function. +def listcompblock(): + c = [1, 2] + d = [c for c in [3, 4]] + assert.eq(c, [1, 2]) + assert.eq(d, [3, 4]) + +listcompblock() + +# list.pop +x4 = [1, 2, 3, 4, 5] +assert.fails(lambda: x4.pop(-6), "index -6 out of range \\[-5:4]") +assert.fails(lambda: x4.pop(6), "index 6 out of range \\[-5:4]") +assert.eq(x4.pop(), 5) +assert.eq(x4, [1, 2, 3, 4]) +assert.eq(x4.pop(1), 2) +assert.eq(x4, [1, 3, 4]) +assert.eq(x4.pop(0), 1) +assert.eq(x4, [3, 4]) +assert.eq(x4.pop(-2), 3) +assert.eq(x4, [4]) +assert.eq(x4.pop(-1), 4) +assert.eq(x4, []) + +# TODO(adonovan): test uses of list as sequence +# (for loop, comprehension, library functions). + +# x += y for lists is equivalent to x.extend(y). +# y may be a sequence. +# TODO: Test that side-effects of 'x' occur only once. +def list_extend(): + a = [1, 2, 3] + b = a + a = a + [4] # creates a new list + assert.eq(a, [1, 2, 3, 4]) + assert.eq(b, [1, 2, 3]) # b is unchanged + + a = [1, 2, 3] + b = a + a += [4] # updates a (and thus b) in place + assert.eq(a, [1, 2, 3, 4]) + assert.eq(b, [1, 2, 3, 4]) # alias observes the change + + a = [1, 2, 3] + b = a + a.extend([4]) # updates existing list + assert.eq(a, [1, 2, 3, 4]) + assert.eq(b, [1, 2, 3, 4]) # alias observes the change + +list_extend() + +# Unlike list.extend(iterable), list += iterable makes its LHS name local. +a_list = [] + +def f4(): + a_list += [1] # binding use => a_list is a local var + +assert.fails(f4, "local variable a_list referenced before assignment") + +# list += <not iterable> +def f5(): + x = [] + x += 1 + +assert.fails(f5, "unknown binary op: list \\+ int") + +# frozen list += iterable +def f6(): + x = [] + freeze(x) + x += [1] + +assert.fails(f6, "cannot apply \\+= to frozen list") + +# list += hasfields (hasfields is not iterable but defines list+hasfields) +def f7(): + x = [] + x += hasfields() + return x + +assert.eq(f7(), 42) # weird, but exercises a corner case in list+=x. + +# append +x5 = [1, 2, 3] +x5.append(4) +x5.append("abc") +assert.eq(x5, [1, 2, 3, 4, "abc"]) + +# extend +x5a = [1, 2, 3] +x5a.extend("abc".elems()) # string +x5a.extend((True, False)) # tuple +assert.eq(x5a, [1, 2, 3, "a", "b", "c", True, False]) + +# list.insert +def insert_at(index): + x = list(range(3)) + x.insert(index, 42) + return x + +assert.eq(insert_at(-99), [42, 0, 1, 2]) +assert.eq(insert_at(-2), [0, 42, 1, 2]) +assert.eq(insert_at(-1), [0, 1, 42, 2]) +assert.eq(insert_at(0), [42, 0, 1, 2]) +assert.eq(insert_at(1), [0, 42, 1, 2]) +assert.eq(insert_at(2), [0, 1, 42, 2]) +assert.eq(insert_at(3), [0, 1, 2, 42]) +assert.eq(insert_at(4), [0, 1, 2, 42]) + +# list.remove +def remove(v): + x = [3, 1, 4, 1] + x.remove(v) + return x + +assert.eq(remove(3), [1, 4, 1]) +assert.eq(remove(1), [3, 4, 1]) +assert.eq(remove(4), [3, 1, 1]) +assert.fails(lambda: [3, 1, 4, 1].remove(42), "remove: element not found") + +# list.index +bananas = list("bananas".elems()) +assert.eq(bananas.index("a"), 1) # bAnanas +assert.fails(lambda: bananas.index("d"), "value not in list") + +# start +assert.eq(bananas.index("a", -1000), 1) # bAnanas +assert.eq(bananas.index("a", 0), 1) # bAnanas +assert.eq(bananas.index("a", 1), 1) # bAnanas +assert.eq(bananas.index("a", 2), 3) # banAnas +assert.eq(bananas.index("a", 3), 3) # banAnas +assert.eq(bananas.index("b", 0), 0) # Bananas +assert.eq(bananas.index("n", -3), 4) # banaNas +assert.fails(lambda: bananas.index("n", -2), "value not in list") +assert.eq(bananas.index("s", -2), 6) # bananaS +assert.fails(lambda: bananas.index("b", 1), "value not in list") + +# start, end +assert.eq(bananas.index("s", -1000, 7), 6) # bananaS +assert.fails(lambda: bananas.index("s", -1000, 6), "value not in list") +assert.fails(lambda: bananas.index("d", -1000, 1000), "value not in list") + +# slicing, x[i:j:k] +assert.eq(bananas[6::-2], list("snnb".elems())) +assert.eq(bananas[5::-2], list("aaa".elems())) +assert.eq(bananas[4::-2], list("nnb".elems())) +assert.eq(bananas[99::-2], list("snnb".elems())) +assert.eq(bananas[100::-2], list("snnb".elems())) +# TODO(adonovan): many more tests + +# iterator invalidation +def iterator1(): + list = [0, 1, 2] + for x in list: + list[x] = 2 * x + return list + +assert.fails(iterator1, "assign to element.* during iteration") + +def iterator2(): + list = [0, 1, 2] + for x in list: + list.remove(x) + +assert.fails(iterator2, "remove.*during iteration") + +def iterator3(): + list = [0, 1, 2] + for x in list: + list.append(3) + +assert.fails(iterator3, "append.*during iteration") + +def iterator4(): + list = [0, 1, 2] + for x in list: + list.extend([3, 4]) + +assert.fails(iterator4, "extend.*during iteration") + +def iterator5(): + def f(x): + x.append(4) + + list = [1, 2, 3] + _ = [f(list) for x in list] + +assert.fails(iterator5, "append.*during iteration") diff --git a/starlark/testdata/misc.star b/starlark/testdata/misc.star new file mode 100644 index 0000000..e7e0c06 --- /dev/null +++ b/starlark/testdata/misc.star @@ -0,0 +1,139 @@ +# Miscellaneous tests of Starlark evaluation. +# This is a "chunked" file: each "---" effectively starts a new file. + +# TODO(adonovan): move these tests into more appropriate files. +# TODO(adonovan): test coverage: +# - stmts: pass; if cond fail; += and failures; +# for x fail; for x not iterable; for can't assign; for +# error in loop body +# - subassign fail +# - x[i]=x fail in both operands; frozen x; list index not int; boundscheck +# - x.f = ... +# - failure in list expr [...]; tuple expr; dict expr (bad key) +# - cond expr semantics; failures +# - x[i] failures in both args; dict and iterator key and range checks; +# unhandled operand types +# - +: list/list, int/int, string/string, tuple+tuple, dict/dict; +# - * and ** calls: various errors +# - call of non-function +# - slice x[ijk] +# - comprehension: unhashable dict key; +# scope of vars (local and toplevel); noniterable for clause +# - unknown unary op +# - ordering of values +# - freeze, transitivity of its effect. +# - add an application-defined type to the environment so we can test it. +# - even more: +# +# eval +# pass statement +# assign to tuple l-value -- illegal +# assign to list l-value -- illegal +# assign to field +# tuple + tuple +# call with *args, **kwargs +# slice with step +# tuple slice +# interpolate with %c, %% + +load("assert.star", "assert") + +# Ordered comparisons require values of the same type. +assert.fails(lambda: None < None, "not impl") +assert.fails(lambda: None < False, "not impl") +assert.fails(lambda: False < list, "not impl") +assert.fails(lambda: list < {}, "not impl") +assert.fails(lambda: {} < (lambda: None), "not impl") +assert.fails(lambda: (lambda: None) < 0, "not impl") +assert.fails(lambda: 0 < [], "not impl") +assert.fails(lambda: [] < "", "not impl") +assert.fails(lambda: "" < (), "not impl") +# Except int < float: +assert.lt(1, 2.0) +assert.lt(2.0, 3) + +--- +# cyclic data structures +load("assert.star", "assert") + +cyclic = [1, 2, 3] # list cycle +cyclic[1] = cyclic +assert.eq(str(cyclic), "[1, [...], 3]") +assert.fails(lambda: cyclic < cyclic, "maximum recursion") +assert.fails(lambda: cyclic == cyclic, "maximum recursion") +cyclic2 = [1, 2, 3] +cyclic2[1] = cyclic2 +assert.fails(lambda: cyclic2 == cyclic, "maximum recursion") + +cyclic3 = [1, [2, 3]] # list-list cycle +cyclic3[1][0] = cyclic3 +assert.eq(str(cyclic3), "[1, [[...], 3]]") +cyclic4 = {"x": 1} +cyclic4["x"] = cyclic4 +assert.eq(str(cyclic4), "{\"x\": {...}}") +cyclic5 = [0, {"x": 1}] # list-dict cycle +cyclic5[1]["x"] = cyclic5 +assert.eq(str(cyclic5), "[0, {\"x\": [...]}]") +assert.eq(str(cyclic5), "[0, {\"x\": [...]}]") +assert.fails(lambda: cyclic5 == cyclic5 ,"maximum recursion") +cyclic6 = [0, {"x": 1}] +cyclic6[1]["x"] = cyclic6 +assert.fails(lambda: cyclic5 == cyclic6, "maximum recursion") + +--- +# regression +load("assert.star", "assert") + +# was a parse error: +assert.eq(("ababab"[2:]).replace("b", "c"), "acac") +assert.eq("ababab"[2:].replace("b", "c"), "acac") + +# test parsing of line continuation, at toplevel and in expression. +three = 1 + \ + 2 +assert.eq(1 + \ + 2, three) + +--- +# A regression test for error position information. + +_ = {}.get(1, default=2) ### "get: unexpected keyword arguments" + +--- +# Load exposes explicitly declared globals from other modules. +load('assert.star', 'assert', 'freeze') +assert.eq(str(freeze), '<built-in function freeze>') + +--- +# Load does not expose pre-declared globals from other modules. +# See github.com/google/skylark/issues/75. +load('assert.star', 'assert', 'matches') ### "matches not found in module" + +--- +# Load does not expose universals accessible in other modules. +load('assert.star', 'len') ### "len not found in module" + + +--- +# Test plus folding optimization. +load('assert.star', 'assert') + +s = "s" +l = [4] +t = (4,) + +assert.eq("a" + "b" + "c", "abc") +assert.eq("a" + "b" + s + "c", "absc") +assert.eq(() + (1,) + (2, 3), (1, 2, 3)) +assert.eq(() + (1,) + t + (2, 3), (1, 4, 2, 3)) +assert.eq([] + [1] + [2, 3], [1, 2, 3]) +assert.eq([] + [1] + l + [2, 3], [1, 4, 2, 3]) + +assert.fails(lambda: "a" + "b" + 1 + "c", "unknown binary op: string \\+ int") +assert.fails(lambda: () + () + 1 + (), "unknown binary op: tuple \\+ int") +assert.fails(lambda: [] + [] + 1 + [], "unknown binary op: list \\+ int") + + + +--- +load('assert.star', 'froze') ### `name froze not found .*did you mean freeze` diff --git a/starlark/testdata/module.star b/starlark/testdata/module.star new file mode 100644 index 0000000..6aac2e2 --- /dev/null +++ b/starlark/testdata/module.star @@ -0,0 +1,17 @@ +# Tests of Module. + +load("assert.star", "assert") + +assert.eq(type(assert), "module") +assert.eq(str(assert), '<module "assert">') +assert.eq(dir(assert), ["contains", "eq", "fail", "fails", "lt", "ne", "true"]) +assert.fails(lambda : {assert: None}, "unhashable: module") + +def assignfield(): + assert.foo = None + +assert.fails(assignfield, "can't assign to .foo field of module") + +# no such field +assert.fails(lambda : assert.nonesuch, "module has no .nonesuch field or method$") +assert.fails(lambda : assert.falls, "module has no .falls field or method .did you mean .fails\\?") diff --git a/starlark/testdata/paths.star b/starlark/testdata/paths.star new file mode 100644 index 0000000..cf8a3c4 --- /dev/null +++ b/starlark/testdata/paths.star @@ -0,0 +1,250 @@ +# Copyright 2017 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Skylib module containing file path manipulation functions. + +NOTE: The functions in this module currently only support paths with Unix-style +path separators (forward slash, "/"); they do not handle Windows-style paths +with backslash separators or drive letters. +""" + +# This file is in the Bazel build language dialect of Starlark, +# so declarations of 'fail' and 'struct' are required to make +# it compile in the core language. +def fail(msg): + print(msg) + +struct = dict + +def _basename(p): + """Returns the basename (i.e., the file portion) of a path. + + Note that if `p` ends with a slash, this function returns an empty string. + This matches the behavior of Python's `os.path.basename`, but differs from + the Unix `basename` command (which would return the path segment preceding + the final slash). + + Args: + p: The path whose basename should be returned. + + Returns: + The basename of the path, which includes the extension. + """ + return p.rpartition("/")[-1] + +def _dirname(p): + """Returns the dirname of a path. + + The dirname is the portion of `p` up to but not including the file portion + (i.e., the basename). Any slashes immediately preceding the basename are not + included, unless omitting them would make the dirname empty. + + Args: + p: The path whose dirname should be returned. + + Returns: + The dirname of the path. + """ + prefix, sep, _ = p.rpartition("/") + if not prefix: + return sep + else: + # If there are multiple consecutive slashes, strip them all out as Python's + # os.path.dirname does. + return prefix.rstrip("/") + +def _is_absolute(path): + """Returns `True` if `path` is an absolute path. + + Args: + path: A path (which is a string). + + Returns: + `True` if `path` is an absolute path. + """ + return path.startswith("/") or (len(path) > 2 and path[1] == ":") + +def _join(path, *others): + """Joins one or more path components intelligently. + + This function mimics the behavior of Python's `os.path.join` function on POSIX + platform. It returns the concatenation of `path` and any members of `others`, + inserting directory separators before each component except the first. The + separator is not inserted if the path up until that point is either empty or + already ends in a separator. + + If any component is an absolute path, all previous components are discarded. + + Args: + path: A path segment. + *others: Additional path segments. + + Returns: + A string containing the joined paths. + """ + result = path + + for p in others: + if _is_absolute(p): + result = p + elif not result or result.endswith("/"): + result += p + else: + result += "/" + p + + return result + +def _normalize(path): + """Normalizes a path, eliminating double slashes and other redundant segments. + + This function mimics the behavior of Python's `os.path.normpath` function on + POSIX platforms; specifically: + + - If the entire path is empty, "." is returned. + - All "." segments are removed, unless the path consists solely of a single + "." segment. + - Trailing slashes are removed, unless the path consists solely of slashes. + - ".." segments are removed as long as there are corresponding segments + earlier in the path to remove; otherwise, they are retained as leading ".." + segments. + - Single and double leading slashes are preserved, but three or more leading + slashes are collapsed into a single leading slash. + - Multiple adjacent internal slashes are collapsed into a single slash. + + Args: + path: A path. + + Returns: + The normalized path. + """ + if not path: + return "." + + if path.startswith("//") and not path.startswith("///"): + initial_slashes = 2 + elif path.startswith("/"): + initial_slashes = 1 + else: + initial_slashes = 0 + is_relative = (initial_slashes == 0) + + components = path.split("/") + new_components = [] + + for component in components: + if component in ("", "."): + continue + if component == "..": + if new_components and new_components[-1] != "..": + # Only pop the last segment if it isn't another "..". + new_components.pop() + elif is_relative: + # Preserve leading ".." segments for relative paths. + new_components.append(component) + else: + new_components.append(component) + + path = "/".join(new_components) + if not is_relative: + path = ("/" * initial_slashes) + path + + return path or "." + +def _relativize(path, start): + """Returns the portion of `path` that is relative to `start`. + + Because we do not have access to the underlying file system, this + implementation differs slightly from Python's `os.path.relpath` in that it + will fail if `path` is not beneath `start` (rather than use parent segments to + walk up to the common file system root). + + Relativizing paths that start with parent directory references only works if + the path both start with the same initial parent references. + + Args: + path: The path to relativize. + start: The ancestor path against which to relativize. + + Returns: + The portion of `path` that is relative to `start`. + """ + segments = _normalize(path).split("/") + start_segments = _normalize(start).split("/") + if start_segments == ["."]: + start_segments = [] + start_length = len(start_segments) + + if (path.startswith("/") != start.startswith("/") or + len(segments) < start_length): + fail("Path '%s' is not beneath '%s'" % (path, start)) + + for ancestor_segment, segment in zip(start_segments, segments): + if ancestor_segment != segment: + fail("Path '%s' is not beneath '%s'" % (path, start)) + + length = len(segments) - start_length + result_segments = segments[-length:] + return "/".join(result_segments) + +def _replace_extension(p, new_extension): + """Replaces the extension of the file at the end of a path. + + If the path has no extension, the new extension is added to it. + + Args: + p: The path whose extension should be replaced. + new_extension: The new extension for the file. The new extension should + begin with a dot if you want the new filename to have one. + + Returns: + The path with the extension replaced (or added, if it did not have one). + """ + return _split_extension(p)[0] + new_extension + +def _split_extension(p): + """Splits the path `p` into a tuple containing the root and extension. + + Leading periods on the basename are ignored, so + `path.split_extension(".bashrc")` returns `(".bashrc", "")`. + + Args: + p: The path whose root and extension should be split. + + Returns: + A tuple `(root, ext)` such that the root is the path without the file + extension, and `ext` is the file extension (which, if non-empty, contains + the leading dot). The returned tuple always satisfies the relationship + `root + ext == p`. + """ + b = _basename(p) + last_dot_in_basename = b.rfind(".") + + # If there is no dot or the only dot in the basename is at the front, then + # there is no extension. + if last_dot_in_basename <= 0: + return (p, "") + + dot_distance_from_end = len(b) - last_dot_in_basename + return (p[:-dot_distance_from_end], p[-dot_distance_from_end:]) + +paths = struct( + basename = _basename, + dirname = _dirname, + is_absolute = _is_absolute, + join = _join, + normalize = _normalize, + relativize = _relativize, + replace_extension = _replace_extension, + split_extension = _split_extension, +) diff --git a/starlark/testdata/recursion.star b/starlark/testdata/recursion.star new file mode 100644 index 0000000..3368614 --- /dev/null +++ b/starlark/testdata/recursion.star @@ -0,0 +1,43 @@ +# Tests of Starlark recursion and while statement. + +# This is a "chunked" file: each "---" effectively starts a new file. + +# option:recursion + +load("assert.star", "assert") + +def sum(n): + r = 0 + while n > 0: + r += n + n -= 1 + return r + +def fib(n): + if n <= 1: + return 1 + return fib(n-1) + fib(n-2) + +def while_break(n): + r = 0 + while n > 0: + if n == 5: + break + r += n + n -= 1 + return r + +def while_continue(n): + r = 0 + while n > 0: + if n % 2 == 0: + n -= 1 + continue + r += n + n -= 1 + return r + +assert.eq(fib(5), 8) +assert.eq(sum(5), 5+4+3+2+1) +assert.eq(while_break(10), 40) +assert.eq(while_continue(10), 25) diff --git a/starlark/testdata/set.star b/starlark/testdata/set.star new file mode 100644 index 0000000..bca4144 --- /dev/null +++ b/starlark/testdata/set.star @@ -0,0 +1,118 @@ +# Tests of Starlark 'set' +# option:set + +# Sets are not a standard part of Starlark, so the features +# tested in this file must be enabled in the application by setting +# resolve.AllowSet. (All sets are created by calls to the 'set' +# built-in or derived from operations on existing sets.) +# The semantics are subject to change as the spec evolves. + +# TODO(adonovan): support set mutation: +# - del set[k] +# - set.remove +# - set.update +# - set.clear +# - set += iterable, perhaps? +# Test iterator invalidation. + +load("assert.star", "assert") + +# literals +# Parser does not currently support {1, 2, 3}. +# TODO(adonovan): add test to syntax/testdata/errors.star. + +# set comprehensions +# Parser does not currently support {x for x in y}. +# See syntax/testdata/errors.star. + +# set constructor +assert.eq(type(set()), "set") +assert.eq(list(set()), []) +assert.eq(type(set([1, 3, 2, 3])), "set") +assert.eq(list(set([1, 3, 2, 3])), [1, 3, 2]) +assert.eq(type(set("hello".elems())), "set") +assert.eq(list(set("hello".elems())), ["h", "e", "l", "o"]) +assert.eq(list(set(range(3))), [0, 1, 2]) +assert.fails(lambda : set(1), "got int, want iterable") +assert.fails(lambda : set(1, 2, 3), "got 3 arguments") +assert.fails(lambda : set([1, 2, {}]), "unhashable type: dict") + +# truth +assert.true(not set()) +assert.true(set([False])) +assert.true(set([1, 2, 3])) + +x = set([1, 2, 3]) +y = set([3, 4, 5]) + +# set + any is not defined +assert.fails(lambda : x + y, "unknown.*: set \\+ set") + +# set | set (use resolve.AllowBitwise to enable it) +assert.eq(list(set("a".elems()) | set("b".elems())), ["a", "b"]) +assert.eq(list(set("ab".elems()) | set("bc".elems())), ["a", "b", "c"]) +assert.fails(lambda : set() | [], "unknown binary op: set | list") +assert.eq(type(x | y), "set") +assert.eq(list(x | y), [1, 2, 3, 4, 5]) +assert.eq(list(x | set([5, 1])), [1, 2, 3, 5]) +assert.eq(list(x | set((6, 5, 4))), [1, 2, 3, 6, 5, 4]) + +# set.union (allows any iterable for right operand) +assert.eq(list(set("a".elems()).union("b".elems())), ["a", "b"]) +assert.eq(list(set("ab".elems()).union("bc".elems())), ["a", "b", "c"]) +assert.eq(set().union([]), set()) +assert.eq(type(x.union(y)), "set") +assert.eq(list(x.union(y)), [1, 2, 3, 4, 5]) +assert.eq(list(x.union([5, 1])), [1, 2, 3, 5]) +assert.eq(list(x.union((6, 5, 4))), [1, 2, 3, 6, 5, 4]) +assert.fails(lambda : x.union([1, 2, {}]), "unhashable type: dict") + +# intersection, set & set (use resolve.AllowBitwise to enable it) +assert.eq(list(set("a".elems()) & set("b".elems())), []) +assert.eq(list(set("ab".elems()) & set("bc".elems())), ["b"]) + +# symmetric difference, set ^ set (use resolve.AllowBitwise to enable it) +assert.eq(set([1, 2, 3]) ^ set([4, 5, 3]), set([1, 2, 4, 5])) + +def test_set_augmented_assign(): + x = set([1, 2, 3]) + x &= set([2, 3]) + assert.eq(x, set([2, 3])) + x |= set([1]) + assert.eq(x, set([1, 2, 3])) + x ^= set([4, 5, 3]) + assert.eq(x, set([1, 2, 4, 5])) + +test_set_augmented_assign() + +# len +assert.eq(len(x), 3) +assert.eq(len(y), 3) +assert.eq(len(x | y), 5) + +# str +assert.eq(str(set([1])), "set([1])") +assert.eq(str(set([2, 3])), "set([2, 3])") +assert.eq(str(set([3, 2])), "set([3, 2])") + +# comparison +assert.eq(x, x) +assert.eq(y, y) +assert.true(x != y) +assert.eq(set([1, 2, 3]), set([3, 2, 1])) +assert.fails(lambda : x < y, "set < set not implemented") + +# iteration +assert.true(type([elem for elem in x]), "list") +assert.true(list([elem for elem in x]), [1, 2, 3]) + +def iter(): + list = [] + for elem in x: + list.append(elem) + return list + +assert.eq(iter(), [1, 2, 3]) + +# sets are not indexable +assert.fails(lambda : x[0], "unhandled.*operation") diff --git a/starlark/testdata/string.star b/starlark/testdata/string.star new file mode 100644 index 0000000..b317d1a --- /dev/null +++ b/starlark/testdata/string.star @@ -0,0 +1,472 @@ +# Tests of Starlark 'string' +# option:set + +load("assert.star", "assert") + +# raw string literals: +assert.eq(r"a\bc", "a\\bc") + +# truth +assert.true("abc") +assert.true(chr(0)) +assert.true(not "") + +# str + str +assert.eq("a" + "b" + "c", "abc") + +# str * int, int * str +assert.eq("abc" * 0, "") +assert.eq("abc" * -1, "") +assert.eq("abc" * 1, "abc") +assert.eq("abc" * 5, "abcabcabcabcabc") +assert.eq(0 * "abc", "") +assert.eq(-1 * "abc", "") +assert.eq(1 * "abc", "abc") +assert.eq(5 * "abc", "abcabcabcabcabc") +assert.fails(lambda: 1.0 * "abc", "unknown.*float \\* str") +assert.fails(lambda: "abc" * (1000000 * 1000000), "repeat count 1000000000000 too large") +assert.fails(lambda: "abc" * 1000000 * 1000000, "excessive repeat \\(3000000 \\* 1000000 elements") + +# len +assert.eq(len("Hello, 世界!"), 14) +assert.eq(len("𐐷"), 4) # U+10437 has a 4-byte UTF-8 encoding (and a 2-code UTF-16 encoding) + +# chr & ord +assert.eq(chr(65), "A") # 1-byte UTF-8 encoding +assert.eq(chr(1049), "Й") # 2-byte UTF-8 encoding +assert.eq(chr(0x1F63F), "😿") # 4-byte UTF-8 encoding +assert.fails(lambda: chr(-1), "Unicode code point -1 out of range \\(<0\\)") +assert.fails(lambda: chr(0x110000), "Unicode code point U\\+110000 out of range \\(>0x10FFFF\\)") +assert.eq(ord("A"), 0x41) +assert.eq(ord("Й"), 0x419) +assert.eq(ord("世"), 0x4e16) +assert.eq(ord("😿"), 0x1F63F) +assert.eq(ord("Й"[1:]), 0xFFFD) # = Unicode replacement character +assert.fails(lambda: ord("abc"), "string encodes 3 Unicode code points, want 1") +assert.fails(lambda: ord(""), "string encodes 0 Unicode code points, want 1") +assert.fails(lambda: ord("😿"[1:]), "string encodes 3 Unicode code points, want 1") # 3 x 0xFFFD + +# string.codepoint_ords +assert.eq(type("abcЙ😿".codepoint_ords()), "string.codepoints") +assert.eq(str("abcЙ😿".codepoint_ords()), '"abcЙ😿".codepoint_ords()') +assert.eq(list("abcЙ😿".codepoint_ords()), [97, 98, 99, 1049, 128575]) +assert.eq(list(("A" + "😿Z"[1:]).codepoint_ords()), [ord("A"), 0xFFFD, 0xFFFD, 0xFFFD, ord("Z")]) +assert.eq(list("".codepoint_ords()), []) +assert.fails(lambda: "abcЙ😿".codepoint_ords()[2], "unhandled index") # not indexable +assert.fails(lambda: len("abcЙ😿".codepoint_ords()), "no len") # unknown length + +# string.codepoints +assert.eq(type("abcЙ😿".codepoints()), "string.codepoints") +assert.eq(str("abcЙ😿".codepoints()), '"abcЙ😿".codepoints()') +assert.eq(list("abcЙ😿".codepoints()), ["a", "b", "c", "Й", "😿"]) +assert.eq(list(("A" + "😿Z"[1:]).codepoints()), ["A", "�", "�", "�", "Z"]) +assert.eq(list("".codepoints()), []) +assert.fails(lambda: "abcЙ😿".codepoints()[2], "unhandled index") # not indexable +assert.fails(lambda: len("abcЙ😿".codepoints()), "no len") # unknown length + +# string.elem_ords +assert.eq(type("abcЙ😿".elem_ords()), "string.elems") +assert.eq(str("abcЙ😿".elem_ords()), '"abcЙ😿".elem_ords()') +assert.eq(list("abcЙ😿".elem_ords()), [97, 98, 99, 208, 153, 240, 159, 152, 191]) +assert.eq(list(("A" + "😿Z"[1:]).elem_ords()), [65, 159, 152, 191, 90]) +assert.eq(list("".elem_ords()), []) +assert.eq("abcЙ😿".elem_ords()[2], 99) # indexable +assert.eq(len("abcЙ😿".elem_ords()), 9) # known length + +# string.elems (1-byte substrings, which are invalid text) +assert.eq(type("abcЙ😿".elems()), "string.elems") +assert.eq(str("abcЙ😿".elems()), '"abcЙ😿".elems()') +assert.eq( + repr(list("abcЙ😿".elems())), + r'["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"]', +) +assert.eq( + repr(list(("A" + "😿Z"[1:]).elems())), + r'["A", "\x9f", "\x98", "\xbf", "Z"]', +) +assert.eq(list("".elems()), []) +assert.eq("abcЙ😿".elems()[2], "c") # indexable +assert.eq(len("abcЙ😿".elems()), 9) # known length + +# indexing, x[i] +assert.eq("Hello, 世界!"[0], "H") +assert.eq(repr("Hello, 世界!"[7]), r'"\xe4"') # (invalid text) +assert.eq("Hello, 世界!"[13], "!") +assert.fails(lambda: "abc"[-4], "out of range") +assert.eq("abc"[-3], "a") +assert.eq("abc"[-2], "b") +assert.eq("abc"[-1], "c") +assert.eq("abc"[0], "a") +assert.eq("abc"[1], "b") +assert.eq("abc"[2], "c") +assert.fails(lambda: "abc"[4], "out of range") + +# x[i] = ... +def f(): + "abc"[1] = "B" + +assert.fails(f, "string.*does not support.*assignment") + +# slicing, x[i:j] +assert.eq("abc"[:], "abc") +assert.eq("abc"[-4:], "abc") +assert.eq("abc"[-3:], "abc") +assert.eq("abc"[-2:], "bc") +assert.eq("abc"[-1:], "c") +assert.eq("abc"[0:], "abc") +assert.eq("abc"[1:], "bc") +assert.eq("abc"[2:], "c") +assert.eq("abc"[3:], "") +assert.eq("abc"[4:], "") +assert.eq("abc"[:-4], "") +assert.eq("abc"[:-3], "") +assert.eq("abc"[:-2], "a") +assert.eq("abc"[:-1], "ab") +assert.eq("abc"[:0], "") +assert.eq("abc"[:1], "a") +assert.eq("abc"[:2], "ab") +assert.eq("abc"[:3], "abc") +assert.eq("abc"[:4], "abc") +assert.eq("abc"[1:2], "b") +assert.eq("abc"[2:1], "") +assert.eq(repr("😿"[:1]), r'"\xf0"') # (invalid text) + +# non-unit strides +assert.eq("abcd"[0:4:1], "abcd") +assert.eq("abcd"[::2], "ac") +assert.eq("abcd"[1::2], "bd") +assert.eq("abcd"[4:0:-1], "dcb") +assert.eq("banana"[7::-2], "aaa") +assert.eq("banana"[6::-2], "aaa") +assert.eq("banana"[5::-2], "aaa") +assert.eq("banana"[4::-2], "nnb") +assert.eq("banana"[::-1], "ananab") +assert.eq("banana"[None:None:-2], "aaa") +assert.fails(lambda: "banana"[1.0::], "invalid start index: got float, want int") +assert.fails(lambda: "banana"[:"":], "invalid end index: got string, want int") +assert.fails(lambda: "banana"[:"":True], "invalid slice step: got bool, want int") + +# in, not in +assert.true("oo" in "food") +assert.true("ox" not in "food") +assert.true("" in "food") +assert.true("" in "") +assert.fails(lambda: 1 in "", "requires string as left operand") +assert.fails(lambda: "" in 1, "unknown binary op: string in int") + +# ==, != +assert.eq("hello", "he" + "llo") +assert.ne("hello", "Hello") + +# hash must follow java.lang.String.hashCode. +wanthash = { + "": 0, + "\0" * 100: 0, + "hello": 99162322, + "world": 113318802, + "Hello, 世界!": 417292677, +} +gothash = {s: hash(s) for s in wanthash} +assert.eq(gothash, wanthash) + +# TODO(adonovan): ordered comparisons + +# string % tuple formatting +assert.eq("A %d %x Z" % (123, 456), "A 123 1c8 Z") +assert.eq("A %(foo)d %(bar)s Z" % {"foo": 123, "bar": "hi"}, "A 123 hi Z") +assert.eq("%s %r" % ("hi", "hi"), 'hi "hi"') # TODO(adonovan): use ''-quotation +assert.eq("%%d %d" % 1, "%d 1") +assert.fails(lambda: "%d %d" % 1, "not enough arguments for format string") +assert.fails(lambda: "%d %d" % (1, 2, 3), "too many arguments for format string") +assert.fails(lambda: "" % 1, "too many arguments for format string") + +# %c +assert.eq("%c" % 65, "A") +assert.eq("%c" % 0x3b1, "α") +assert.eq("%c" % "A", "A") +assert.eq("%c" % "α", "α") +assert.fails(lambda: "%c" % "abc", "requires a single-character string") +assert.fails(lambda: "%c" % "", "requires a single-character string") +assert.fails(lambda: "%c" % 65.0, "requires int or single-character string") +assert.fails(lambda: "%c" % 10000000, "requires a valid Unicode code point") +assert.fails(lambda: "%c" % -1, "requires a valid Unicode code point") +# TODO(adonovan): more tests + +# str.format +assert.eq("a{}b".format(123), "a123b") +assert.eq("a{}b{}c{}d{}".format(1, 2, 3, 4), "a1b2c3d4") +assert.eq("a{{b".format(), "a{b") +assert.eq("a}}b".format(), "a}b") +assert.eq("a{{b}}c".format(), "a{b}c") +assert.eq("a{x}b{y}c{}".format(1, x = 2, y = 3), "a2b3c1") +assert.fails(lambda: "a{z}b".format(x = 1), "keyword z not found") +assert.fails(lambda: "{-1}".format(1), "keyword -1 not found") +assert.fails(lambda: "{-0}".format(1), "keyword -0 not found") +assert.fails(lambda: "{+0}".format(1), "keyword \\+0 not found") +assert.fails(lambda: "{+1}".format(1), "keyword \\+1 not found") # starlark-go/issues/114 +assert.eq("{0000000000001}".format(0, 1), "1") +assert.eq("{012}".format(*range(100)), "12") # decimal, despite leading zeros +assert.fails(lambda: "{0,1} and {1}".format(1, 2), "keyword 0,1 not found") +assert.fails(lambda: "a{123}b".format(), "tuple index out of range") +assert.fails(lambda: "a{}b{}c".format(1), "tuple index out of range") +assert.eq("a{010}b".format(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), "a10b") # index is decimal +assert.fails(lambda: "a{}b{1}c".format(1, 2), "cannot switch from automatic field numbering to manual") +assert.eq("a{!s}c".format("b"), "abc") +assert.eq("a{!r}c".format("b"), r'a"b"c') +assert.eq("a{x!r}c".format(x = "b"), r'a"b"c') +assert.fails(lambda: "{x!}".format(x = 1), "unknown conversion") +assert.fails(lambda: "{x!:}".format(x = 1), "unknown conversion") +assert.fails(lambda: "{a.b}".format(1), "syntax x.y is not supported") +assert.fails(lambda: "{a[0]}".format(1), "syntax a\\[i\\] is not supported") +assert.fails(lambda: "{ {} }".format(1), "nested replacement fields not supported") +assert.fails(lambda: "{{}".format(1), "single '}' in format") +assert.fails(lambda: "{}}".format(1), "single '}' in format") +assert.fails(lambda: "}}{".format(1), "unmatched '{' in format") +assert.fails(lambda: "}{{".format(1), "single '}' in format") + +# str.split, str.rsplit +assert.eq("a.b.c.d".split("."), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".rsplit("."), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".split(".", -1), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".rsplit(".", -1), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".split(".", 0), ["a.b.c.d"]) +assert.eq("a.b.c.d".rsplit(".", 0), ["a.b.c.d"]) +assert.eq("a.b.c.d".split(".", 1), ["a", "b.c.d"]) +assert.eq("a.b.c.d".rsplit(".", 1), ["a.b.c", "d"]) +assert.eq("a.b.c.d".split(".", 2), ["a", "b", "c.d"]) +assert.eq("a.b.c.d".rsplit(".", 2), ["a.b", "c", "d"]) +assert.eq(" ".split("."), [" "]) +assert.eq(" ".rsplit("."), [" "]) + +# {,r}split on white space: +assert.eq(" a bc\n def \t ghi".split(), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 0), ["a bc\n def \t ghi"]) +assert.eq(" a bc\n def \t ghi".rsplit(None, 0), [" a bc\n def \t ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 1), ["a", "bc\n def \t ghi"]) +assert.eq(" a bc\n def \t ghi".rsplit(None, 1), [" a bc\n def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 2), ["a", "bc", "def \t ghi"]) +assert.eq(" a bc\n def \t ghi".rsplit(None, 2), [" a bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 3), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".rsplit(None, 3), [" a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 4), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".rsplit(None, 4), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".rsplit(None, 5), ["a", "bc", "def", "ghi"]) + +assert.eq(" a bc\n def \t ghi ".split(None, 0), ["a bc\n def \t ghi "]) +assert.eq(" a bc\n def \t ghi ".rsplit(None, 0), [" a bc\n def \t ghi"]) +assert.eq(" a bc\n def \t ghi ".split(None, 1), ["a", "bc\n def \t ghi "]) +assert.eq(" a bc\n def \t ghi ".rsplit(None, 1), [" a bc\n def", "ghi"]) + +# Observe the algorithmic difference when splitting on spaces versus other delimiters. +assert.eq("--aa--bb--cc--".split("-", 0), ["--aa--bb--cc--"]) # contrast this +assert.eq(" aa bb cc ".split(None, 0), ["aa bb cc "]) # with this +assert.eq("--aa--bb--cc--".rsplit("-", 0), ["--aa--bb--cc--"]) # ditto this +assert.eq(" aa bb cc ".rsplit(None, 0), [" aa bb cc"]) # and this + +# +assert.eq("--aa--bb--cc--".split("-", 1), ["", "-aa--bb--cc--"]) +assert.eq("--aa--bb--cc--".rsplit("-", 1), ["--aa--bb--cc-", ""]) +assert.eq(" aa bb cc ".split(None, 1), ["aa", "bb cc "]) +assert.eq(" aa bb cc ".rsplit(None, 1), [" aa bb", "cc"]) + +# +assert.eq("--aa--bb--cc--".split("-", -1), ["", "", "aa", "", "bb", "", "cc", "", ""]) +assert.eq("--aa--bb--cc--".rsplit("-", -1), ["", "", "aa", "", "bb", "", "cc", "", ""]) +assert.eq(" aa bb cc ".split(None, -1), ["aa", "bb", "cc"]) +assert.eq(" aa bb cc ".rsplit(None, -1), ["aa", "bb", "cc"]) +assert.eq(" ".split(None), []) +assert.eq(" ".rsplit(None), []) + +assert.eq("localhost:80".rsplit(":", 1)[-1], "80") + +# str.splitlines +assert.eq("\nabc\ndef".splitlines(), ["", "abc", "def"]) +assert.eq("\nabc\ndef".splitlines(True), ["\n", "abc\n", "def"]) +assert.eq("\nabc\ndef\n".splitlines(), ["", "abc", "def"]) +assert.eq("\nabc\ndef\n".splitlines(True), ["\n", "abc\n", "def\n"]) +assert.eq("".splitlines(), []) # +assert.eq("".splitlines(True), []) # +assert.eq("a".splitlines(), ["a"]) +assert.eq("a".splitlines(True), ["a"]) +assert.eq("\n".splitlines(), [""]) +assert.eq("\n".splitlines(True), ["\n"]) +assert.eq("a\n".splitlines(), ["a"]) +assert.eq("a\n".splitlines(True), ["a\n"]) +assert.eq("a\n\nb".splitlines(), ["a", "", "b"]) +assert.eq("a\n\nb".splitlines(True), ["a\n", "\n", "b"]) +assert.eq("a\nb\nc".splitlines(), ["a", "b", "c"]) +assert.eq("a\nb\nc".splitlines(True), ["a\n", "b\n", "c"]) +assert.eq("a\nb\nc\n".splitlines(), ["a", "b", "c"]) +assert.eq("a\nb\nc\n".splitlines(True), ["a\n", "b\n", "c\n"]) + +# str.{,l,r}strip +assert.eq(" \tfoo\n ".strip(), "foo") +assert.eq(" \tfoo\n ".lstrip(), "foo\n ") +assert.eq(" \tfoo\n ".rstrip(), " \tfoo") +assert.eq(" \tfoo\n ".strip(""), "foo") +assert.eq(" \tfoo\n ".lstrip(""), "foo\n ") +assert.eq(" \tfoo\n ".rstrip(""), " \tfoo") +assert.eq("blah.h".strip("b.h"), "la") +assert.eq("blah.h".lstrip("b.h"), "lah.h") +assert.eq("blah.h".rstrip("b.h"), "bla") + +# str.count +assert.eq("banana".count("a"), 3) +assert.eq("banana".count("a", 2), 2) +assert.eq("banana".count("a", -4, -2), 1) +assert.eq("banana".count("a", 1, 4), 2) +assert.eq("banana".count("a", 0, -100), 0) + +# str.{starts,ends}with +assert.true("foo".endswith("oo")) +assert.true(not "foo".endswith("x")) +assert.true("foo".startswith("fo")) +assert.true(not "foo".startswith("x")) +assert.fails(lambda: "foo".startswith(1), "got int.*want string") + +# +assert.true("abc".startswith(("a", "A"))) +assert.true("ABC".startswith(("a", "A"))) +assert.true(not "ABC".startswith(("b", "B"))) +assert.fails(lambda: "123".startswith((1, 2)), "got int, for element 0") +assert.fails(lambda: "123".startswith(["3"]), "got list") + +# +assert.true("abc".endswith(("c", "C"))) +assert.true("ABC".endswith(("c", "C"))) +assert.true(not "ABC".endswith(("b", "B"))) +assert.fails(lambda: "123".endswith((1, 2)), "got int, for element 0") +assert.fails(lambda: "123".endswith(["3"]), "got list") + +# start/end +assert.true("abc".startswith("bc", 1)) +assert.true(not "abc".startswith("b", 999)) +assert.true("abc".endswith("ab", None, -1)) +assert.true(not "abc".endswith("b", None, -999)) + +# str.replace +assert.eq("banana".replace("a", "o", 1), "bonana") +assert.eq("banana".replace("a", "o"), "bonono") +# TODO(adonovan): more tests + +# str.{,r}find +assert.eq("foofoo".find("oo"), 1) +assert.eq("foofoo".find("ox"), -1) +assert.eq("foofoo".find("oo", 2), 4) +assert.eq("foofoo".rfind("oo"), 4) +assert.eq("foofoo".rfind("ox"), -1) +assert.eq("foofoo".rfind("oo", 1, 4), 1) +assert.eq("foofoo".find(""), 0) +assert.eq("foofoo".rfind(""), 6) + +# str.{,r}partition +assert.eq("foo/bar/wiz".partition("/"), ("foo", "/", "bar/wiz")) +assert.eq("foo/bar/wiz".rpartition("/"), ("foo/bar", "/", "wiz")) +assert.eq("foo/bar/wiz".partition("."), ("foo/bar/wiz", "", "")) +assert.eq("foo/bar/wiz".rpartition("."), ("", "", "foo/bar/wiz")) +assert.fails(lambda: "foo/bar/wiz".partition(""), "empty separator") +assert.fails(lambda: "foo/bar/wiz".rpartition(""), "empty separator") + +assert.eq("?".join(["foo", "a/b/c.go".rpartition("/")[0]]), "foo?a/b") + +# str.is{alpha,...} +def test_predicates(): + predicates = ["alnum", "alpha", "digit", "lower", "space", "title", "upper"] + table = { + "Hello, World!": "title", + "hello, world!": "lower", + "base64": "alnum lower", + "HAL-9000": "upper", + "Catch-22": "title", + "": "", + "\n\t\r": "space", + "abc": "alnum alpha lower", + "ABC": "alnum alpha upper", + "123": "alnum digit", + "DŽLJ": "alnum alpha upper", + "DžLj": "alnum alpha", + "Dž Lj": "title", + "džlj": "alnum alpha lower", + } + for str, want in table.items(): + got = " ".join([name for name in predicates if getattr(str, "is" + name)()]) + if got != want: + assert.fail("%r matched [%s], want [%s]" % (str, got, want)) + +test_predicates() + +# Strings are not iterable. +# ok +assert.eq(len("abc"), 3) # len +assert.true("a" in "abc") # str in str +assert.eq("abc"[1], "b") # indexing + +# not ok +def for_string(): + for x in "abc": + pass + +def args(*args): + return args + +assert.fails(lambda: args(*"abc"), "must be iterable, not string") # varargs +assert.fails(lambda: list("abc"), "got string, want iterable") # list(str) +assert.fails(lambda: tuple("abc"), "got string, want iterable") # tuple(str) +assert.fails(lambda: set("abc"), "got string, want iterable") # set(str) +assert.fails(lambda: set() | "abc", "unknown binary op: set | string") # set union +assert.fails(lambda: enumerate("ab"), "got string, want iterable") # enumerate +assert.fails(lambda: sorted("abc"), "got string, want iterable") # sorted +assert.fails(lambda: [].extend("bc"), "got string, want iterable") # list.extend +assert.fails(lambda: ",".join("abc"), "got string, want iterable") # string.join +assert.fails(lambda: dict(["ab"]), "not iterable .*string") # dict +assert.fails(for_string, "string value is not iterable") # for loop +assert.fails(lambda: [x for x in "abc"], "string value is not iterable") # comprehension +assert.fails(lambda: all("abc"), "got string, want iterable") # all +assert.fails(lambda: any("abc"), "got string, want iterable") # any +assert.fails(lambda: reversed("abc"), "got string, want iterable") # reversed +assert.fails(lambda: zip("ab", "cd"), "not iterable: string") # zip + +# str.join +assert.eq(",".join([]), "") +assert.eq(",".join(["a"]), "a") +assert.eq(",".join(["a", "b"]), "a,b") +assert.eq(",".join(["a", "b", "c"]), "a,b,c") +assert.eq(",".join(("a", "b", "c")), "a,b,c") +assert.eq("".join(("a", "b", "c")), "abc") +assert.fails(lambda: "".join(None), "got NoneType, want iterable") +assert.fails(lambda: "".join(["one", 2]), "join: in list, want string, got int") + +# TODO(adonovan): tests for: {,r}index + +# str.capitalize +assert.eq("hElLo, WoRlD!".capitalize(), "Hello, world!") +assert.eq("por qué".capitalize(), "Por qué") +assert.eq("¿Por qué?".capitalize(), "¿por qué?") + +# str.lower +assert.eq("hElLo, WoRlD!".lower(), "hello, world!") +assert.eq("por qué".lower(), "por qué") +assert.eq("¿Por qué?".lower(), "¿por qué?") +assert.eq("LJUBOVIĆ".lower(), "ljubović") +assert.true("dženan ljubović".islower()) + +# str.upper +assert.eq("hElLo, WoRlD!".upper(), "HELLO, WORLD!") +assert.eq("por qué".upper(), "POR QUÉ") +assert.eq("¿Por qué?".upper(), "¿POR QUÉ?") +assert.eq("ljubović".upper(), "LJUBOVIĆ") +assert.true("DŽENAN LJUBOVIĆ".isupper()) + +# str.title +assert.eq("hElLo, WoRlD!".title(), "Hello, World!") +assert.eq("por qué".title(), "Por Qué") +assert.eq("¿Por qué?".title(), "¿Por Qué?") +assert.eq("ljubović".title(), "Ljubović") +assert.true("Dženan Ljubović".istitle()) +assert.true(not "DŽenan LJubović".istitle()) + +# method spell check +assert.fails(lambda: "".starts_with, "no .starts_with field.*did you mean .startswith") +assert.fails(lambda: "".StartsWith, "no .StartsWith field.*did you mean .startswith") +assert.fails(lambda: "".fin, "no .fin field.*.did you mean .find") diff --git a/starlark/testdata/tuple.star b/starlark/testdata/tuple.star new file mode 100644 index 0000000..f306133 --- /dev/null +++ b/starlark/testdata/tuple.star @@ -0,0 +1,55 @@ +# Tests of Starlark 'tuple' + +load("assert.star", "assert") + +# literal +assert.eq((), ()) +assert.eq((1), 1) +assert.eq((1,), (1,)) +assert.ne((1), (1,)) +assert.eq((1, 2), (1, 2)) +assert.eq((1, 2, 3, 4, 5), (1, 2, 3, 4, 5)) +assert.ne((1, 2, 3), (1, 2, 4)) + +# truth +assert.true((False,)) +assert.true((False, False)) +assert.true(not ()) + +# indexing, x[i] +assert.eq(("a", "b")[0], "a") +assert.eq(("a", "b")[1], "b") + +# slicing, x[i:j] +assert.eq("abcd"[0:4:1], "abcd") +assert.eq("abcd"[::2], "ac") +assert.eq("abcd"[1::2], "bd") +assert.eq("abcd"[4:0:-1], "dcb") +banana = tuple("banana".elems()) +assert.eq(banana[7::-2], tuple("aaa".elems())) +assert.eq(banana[6::-2], tuple("aaa".elems())) +assert.eq(banana[5::-2], tuple("aaa".elems())) +assert.eq(banana[4::-2], tuple("nnb".elems())) + +# tuple +assert.eq(tuple(), ()) +assert.eq(tuple("abc".elems()), ("a", "b", "c")) +assert.eq(tuple(["a", "b", "c"]), ("a", "b", "c")) +assert.eq(tuple([1]), (1,)) +assert.fails(lambda: tuple(1), "got int, want iterable") + +# tuple * int, int * tuple +abc = tuple("abc".elems()) +assert.eq(abc * 0, ()) +assert.eq(abc * -1, ()) +assert.eq(abc * 1, abc) +assert.eq(abc * 3, ("a", "b", "c", "a", "b", "c", "a", "b", "c")) +assert.eq(0 * abc, ()) +assert.eq(-1 * abc, ()) +assert.eq(1 * abc, abc) +assert.eq(3 * abc, ("a", "b", "c", "a", "b", "c", "a", "b", "c")) +assert.fails(lambda: abc * (1000000 * 1000000), "repeat count 1000000000000 too large") +assert.fails(lambda: abc * 1000000 * 1000000, "excessive repeat \\(3000000 \\* 1000000 elements") + +# TODO(adonovan): test use of tuple as sequence +# (for loop, comprehension, library functions). diff --git a/starlark/unpack.go b/starlark/unpack.go new file mode 100644 index 0000000..1493c85 --- /dev/null +++ b/starlark/unpack.go @@ -0,0 +1,319 @@ +package starlark + +// This file defines the Unpack helper functions used by +// built-in functions to interpret their call arguments. + +import ( + "fmt" + "log" + "reflect" + "strings" +) + +// An Unpacker defines custom argument unpacking behavior. +// See UnpackArgs. +type Unpacker interface { + Unpack(v Value) error +} + +// UnpackArgs unpacks the positional and keyword arguments into the +// supplied parameter variables. pairs is an alternating list of names +// and pointers to variables. +// +// If the variable is a bool, integer, string, *List, *Dict, Callable, +// Iterable, or user-defined implementation of Value, +// UnpackArgs performs the appropriate type check. +// Predeclared Go integer types uses the AsInt check. +// If the parameter name ends with "?", +// it and all following parameters are optional. +// +// If the variable implements Unpacker, its Unpack argument +// is called with the argument value, allowing an application +// to define its own argument validation and conversion. +// +// If the variable implements Value, UnpackArgs may call +// its Type() method while constructing the error message. +// +// Examples: +// +// var ( +// a Value +// b = MakeInt(42) +// c Value = starlark.None +// ) +// +// // 1. mixed parameters, like def f(a, b=42, c=None). +// err := UnpackArgs("f", args, kwargs, "a", &a, "b?", &b, "c?", &c) +// +// // 2. keyword parameters only, like def f(*, a, b, c=None). +// if len(args) > 0 { +// return fmt.Errorf("f: unexpected positional arguments") +// } +// err := UnpackArgs("f", args, kwargs, "a", &a, "b?", &b, "c?", &c) +// +// // 3. positional parameters only, like def f(a, b=42, c=None, /) in Python 3.8. +// err := UnpackPositionalArgs("f", args, kwargs, 1, &a, &b, &c) +// +// More complex forms such as def f(a, b=42, *args, c, d=123, **kwargs) +// require additional logic, but their need in built-ins is exceedingly rare. +// +// In the examples above, the declaration of b with type Int causes UnpackArgs +// to require that b's argument value, if provided, is also an int. +// To allow arguments of any type, while retaining the default value of 42, +// declare b as a Value: +// +// var b Value = MakeInt(42) +// +// The zero value of a variable of type Value, such as 'a' in the +// examples above, is not a valid Starlark value, so if the parameter is +// optional, the caller must explicitly handle the default case by +// interpreting nil as None or some computed default. The same is true +// for the zero values of variables of type *List, *Dict, Callable, or +// Iterable. For example: +// +// // def myfunc(d=None, e=[], f={}) +// var ( +// d Value +// e *List +// f *Dict +// ) +// err := UnpackArgs("myfunc", args, kwargs, "d?", &d, "e?", &e, "f?", &f) +// if d == nil { d = None; } +// if e == nil { e = new(List); } +// if f == nil { f = new(Dict); } +// +func UnpackArgs(fnname string, args Tuple, kwargs []Tuple, pairs ...interface{}) error { + nparams := len(pairs) / 2 + var defined intset + defined.init(nparams) + + paramName := func(x interface{}) string { // (no free variables) + name := x.(string) + if name[len(name)-1] == '?' { + name = name[:len(name)-1] + } + return name + } + + // positional arguments + if len(args) > nparams { + return fmt.Errorf("%s: got %d arguments, want at most %d", + fnname, len(args), nparams) + } + for i, arg := range args { + defined.set(i) + if err := unpackOneArg(arg, pairs[2*i+1]); err != nil { + name := paramName(pairs[2*i]) + return fmt.Errorf("%s: for parameter %s: %s", fnname, name, err) + } + } + + // keyword arguments +kwloop: + for _, item := range kwargs { + name, arg := item[0].(String), item[1] + for i := 0; i < nparams; i++ { + if paramName(pairs[2*i]) == string(name) { + // found it + if defined.set(i) { + return fmt.Errorf("%s: got multiple values for keyword argument %s", + fnname, name) + } + ptr := pairs[2*i+1] + if err := unpackOneArg(arg, ptr); err != nil { + return fmt.Errorf("%s: for parameter %s: %s", fnname, name, err) + } + continue kwloop + } + } + return fmt.Errorf("%s: unexpected keyword argument %s", fnname, name) + } + + // Check that all non-optional parameters are defined. + // (We needn't check the first len(args).) + for i := len(args); i < nparams; i++ { + name := pairs[2*i].(string) + if strings.HasSuffix(name, "?") { + break // optional + } + if !defined.get(i) { + return fmt.Errorf("%s: missing argument for %s", fnname, name) + } + } + + return nil +} + +// UnpackPositionalArgs unpacks the positional arguments into +// corresponding variables. Each element of vars is a pointer; see +// UnpackArgs for allowed types and conversions. +// +// UnpackPositionalArgs reports an error if the number of arguments is +// less than min or greater than len(vars), if kwargs is nonempty, or if +// any conversion fails. +// +// See UnpackArgs for general comments. +func UnpackPositionalArgs(fnname string, args Tuple, kwargs []Tuple, min int, vars ...interface{}) error { + if len(kwargs) > 0 { + return fmt.Errorf("%s: unexpected keyword arguments", fnname) + } + max := len(vars) + if len(args) < min { + var atleast string + if min < max { + atleast = "at least " + } + return fmt.Errorf("%s: got %d arguments, want %s%d", fnname, len(args), atleast, min) + } + if len(args) > max { + var atmost string + if max > min { + atmost = "at most " + } + return fmt.Errorf("%s: got %d arguments, want %s%d", fnname, len(args), atmost, max) + } + for i, arg := range args { + if err := unpackOneArg(arg, vars[i]); err != nil { + return fmt.Errorf("%s: for parameter %d: %s", fnname, i+1, err) + } + } + return nil +} + +func unpackOneArg(v Value, ptr interface{}) error { + // On failure, don't clobber *ptr. + switch ptr := ptr.(type) { + case Unpacker: + return ptr.Unpack(v) + case *Value: + *ptr = v + case *string: + s, ok := AsString(v) + if !ok { + return fmt.Errorf("got %s, want string", v.Type()) + } + *ptr = s + case *bool: + b, ok := v.(Bool) + if !ok { + return fmt.Errorf("got %s, want bool", v.Type()) + } + *ptr = bool(b) + case *int, *int8, *int16, *int32, *int64, + *uint, *uint8, *uint16, *uint32, *uint64, *uintptr: + return AsInt(v, ptr) + case *float64: + f, ok := v.(Float) + if !ok { + return fmt.Errorf("got %s, want float", v.Type()) + } + *ptr = float64(f) + case **List: + list, ok := v.(*List) + if !ok { + return fmt.Errorf("got %s, want list", v.Type()) + } + *ptr = list + case **Dict: + dict, ok := v.(*Dict) + if !ok { + return fmt.Errorf("got %s, want dict", v.Type()) + } + *ptr = dict + case *Callable: + f, ok := v.(Callable) + if !ok { + return fmt.Errorf("got %s, want callable", v.Type()) + } + *ptr = f + case *Iterable: + it, ok := v.(Iterable) + if !ok { + return fmt.Errorf("got %s, want iterable", v.Type()) + } + *ptr = it + default: + // v must have type *V, where V is some subtype of starlark.Value. + ptrv := reflect.ValueOf(ptr) + if ptrv.Kind() != reflect.Ptr { + log.Panicf("internal error: not a pointer: %T", ptr) + } + paramVar := ptrv.Elem() + if !reflect.TypeOf(v).AssignableTo(paramVar.Type()) { + // The value is not assignable to the variable. + + // Detect a possible bug in the Go program that called Unpack: + // If the variable *ptr is not a subtype of Value, + // no value of v can possibly work. + if !paramVar.Type().AssignableTo(reflect.TypeOf(new(Value)).Elem()) { + log.Panicf("pointer element type does not implement Value: %T", ptr) + } + + // Report Starlark dynamic type error. + // + // We prefer the Starlark Value.Type name over + // its Go reflect.Type name, but calling the + // Value.Type method on the variable is not safe + // in general. If the variable is an interface, + // the call will fail. Even if the variable has + // a concrete type, it might not be safe to call + // Type() on a zero instance. Thus we must use + // recover. + + // Default to Go reflect.Type name + paramType := paramVar.Type().String() + + // Attempt to call Value.Type method. + func() { + defer func() { recover() }() + paramType = paramVar.MethodByName("Type").Call(nil)[0].String() + }() + return fmt.Errorf("got %s, want %s", v.Type(), paramType) + } + paramVar.Set(reflect.ValueOf(v)) + } + return nil +} + +type intset struct { + small uint64 // bitset, used if n < 64 + large map[int]bool // set, used if n >= 64 +} + +func (is *intset) init(n int) { + if n >= 64 { + is.large = make(map[int]bool) + } +} + +func (is *intset) set(i int) (prev bool) { + if is.large == nil { + prev = is.small&(1<<uint(i)) != 0 + is.small |= 1 << uint(i) + } else { + prev = is.large[i] + is.large[i] = true + } + return +} + +func (is *intset) get(i int) bool { + if is.large == nil { + return is.small&(1<<uint(i)) != 0 + } + return is.large[i] +} + +func (is *intset) len() int { + if is.large == nil { + // Suboptimal, but used only for error reporting. + len := 0 + for i := 0; i < 64; i++ { + if is.small&(1<<uint(i)) != 0 { + len++ + } + } + return len + } + return len(is.large) +} diff --git a/starlark/value.go b/starlark/value.go new file mode 100644 index 0000000..81e29ed --- /dev/null +++ b/starlark/value.go @@ -0,0 +1,1431 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package starlark provides a Starlark interpreter. +// +// Starlark values are represented by the Value interface. +// The following built-in Value types are known to the evaluator: +// +// NoneType -- NoneType +// Bool -- bool +// Int -- int +// Float -- float +// String -- string +// *List -- list +// Tuple -- tuple +// *Dict -- dict +// *Set -- set +// *Function -- function (implemented in Starlark) +// *Builtin -- builtin_function_or_method (function or method implemented in Go) +// +// Client applications may define new data types that satisfy at least +// the Value interface. Such types may provide additional operations by +// implementing any of these optional interfaces: +// +// Callable -- value is callable like a function +// Comparable -- value defines its own comparison operations +// Iterable -- value is iterable using 'for' loops +// Sequence -- value is iterable sequence of known length +// Indexable -- value is sequence with efficient random access +// Mapping -- value maps from keys to values, like a dictionary +// HasBinary -- value defines binary operations such as * and + +// HasAttrs -- value has readable fields or methods x.f +// HasSetField -- value has settable fields x.f +// HasSetIndex -- value supports element update using x[i]=y +// HasSetKey -- value supports map update using x[k]=v +// HasUnary -- value defines unary operations such as + and - +// +// Client applications may also define domain-specific functions in Go +// and make them available to Starlark programs. Use NewBuiltin to +// construct a built-in value that wraps a Go function. The +// implementation of the Go function may use UnpackArgs to make sense of +// the positional and keyword arguments provided by the caller. +// +// Starlark's None value is not equal to Go's nil. Go's nil is not a legal +// Starlark value, but the compiler will not stop you from converting nil +// to Value. Be careful to avoid allowing Go nil values to leak into +// Starlark data structures. +// +// The Compare operation requires two arguments of the same +// type, but this constraint cannot be expressed in Go's type system. +// (This is the classic "binary method problem".) +// So, each Value type's CompareSameType method is a partial function +// that compares a value only against others of the same type. +// Use the package's standalone Compare (or Equal) function to compare +// an arbitrary pair of values. +// +// To parse and evaluate a Starlark source file, use ExecFile. The Eval +// function evaluates a single expression. All evaluator functions +// require a Thread parameter which defines the "thread-local storage" +// of a Starlark thread and may be used to plumb application state +// through Starlark code and into callbacks. When evaluation fails it +// returns an EvalError from which the application may obtain a +// backtrace of active Starlark calls. +// +package starlark // import "go.starlark.net/starlark" + +// This file defines the data types of Starlark and their basic operations. + +import ( + "fmt" + "math" + "math/big" + "reflect" + "strconv" + "strings" + "unicode/utf8" + + "go.starlark.net/internal/compile" + "go.starlark.net/syntax" +) + +// Value is a value in the Starlark interpreter. +type Value interface { + // String returns the string representation of the value. + // Starlark string values are quoted as if by Python's repr. + String() string + + // Type returns a short string describing the value's type. + Type() string + + // Freeze causes the value, and all values transitively + // reachable from it through collections and closures, to be + // marked as frozen. All subsequent mutations to the data + // structure through this API will fail dynamically, making the + // data structure immutable and safe for publishing to other + // Starlark interpreters running concurrently. + Freeze() + + // Truth returns the truth value of an object. + Truth() Bool + + // Hash returns a function of x such that Equals(x, y) => Hash(x) == Hash(y). + // Hash may fail if the value's type is not hashable, or if the value + // contains a non-hashable value. The hash is used only by dictionaries and + // is not exposed to the Starlark program. + Hash() (uint32, error) +} + +// A Comparable is a value that defines its own equivalence relation and +// perhaps ordered comparisons. +type Comparable interface { + Value + // CompareSameType compares one value to another of the same Type(). + // The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE. + // CompareSameType returns an error if an ordered comparison was + // requested for a type that does not support it. + // + // Implementations that recursively compare subcomponents of + // the value should use the CompareDepth function, not Compare, to + // avoid infinite recursion on cyclic structures. + // + // The depth parameter is used to bound comparisons of cyclic + // data structures. Implementations should decrement depth + // before calling CompareDepth and should return an error if depth + // < 1. + // + // Client code should not call this method. Instead, use the + // standalone Compare or Equals functions, which are defined for + // all pairs of operands. + CompareSameType(op syntax.Token, y Value, depth int) (bool, error) +} + +var ( + _ Comparable = Int{} + _ Comparable = False + _ Comparable = Float(0) + _ Comparable = String("") + _ Comparable = (*Dict)(nil) + _ Comparable = (*List)(nil) + _ Comparable = Tuple(nil) + _ Comparable = (*Set)(nil) +) + +// A Callable value f may be the operand of a function call, f(x). +// +// Clients should use the Call function, never the CallInternal method. +type Callable interface { + Value + Name() string + CallInternal(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) +} + +type callableWithPosition interface { + Callable + Position() syntax.Position +} + +var ( + _ Callable = (*Builtin)(nil) + _ Callable = (*Function)(nil) + _ callableWithPosition = (*Function)(nil) +) + +// An Iterable abstracts a sequence of values. +// An iterable value may be iterated over by a 'for' loop or used where +// any other Starlark iterable is allowed. Unlike a Sequence, the length +// of an Iterable is not necessarily known in advance of iteration. +type Iterable interface { + Value + Iterate() Iterator // must be followed by call to Iterator.Done +} + +// A Sequence is a sequence of values of known length. +type Sequence interface { + Iterable + Len() int +} + +var ( + _ Sequence = (*Dict)(nil) + _ Sequence = (*Set)(nil) +) + +// An Indexable is a sequence of known length that supports efficient random access. +// It is not necessarily iterable. +type Indexable interface { + Value + Index(i int) Value // requires 0 <= i < Len() + Len() int +} + +// A Sliceable is a sequence that can be cut into pieces with the slice operator (x[i:j:step]). +// +// All native indexable objects are sliceable. +// This is a separate interface for backwards-compatibility. +type Sliceable interface { + Indexable + // For positive strides (step > 0), 0 <= start <= end <= n. + // For negative strides (step < 0), -1 <= end <= start < n. + // The caller must ensure that the start and end indices are valid + // and that step is non-zero. + Slice(start, end, step int) Value +} + +// A HasSetIndex is an Indexable value whose elements may be assigned (x[i] = y). +// +// The implementation should not add Len to a negative index as the +// evaluator does this before the call. +type HasSetIndex interface { + Indexable + SetIndex(index int, v Value) error +} + +var ( + _ HasSetIndex = (*List)(nil) + _ Indexable = Tuple(nil) + _ Indexable = String("") + _ Sliceable = Tuple(nil) + _ Sliceable = String("") + _ Sliceable = (*List)(nil) +) + +// An Iterator provides a sequence of values to the caller. +// +// The caller must call Done when the iterator is no longer needed. +// Operations that modify a sequence will fail if it has active iterators. +// +// Example usage: +// +// iter := iterable.Iterator() +// defer iter.Done() +// var x Value +// for iter.Next(&x) { +// ... +// } +// +type Iterator interface { + // If the iterator is exhausted, Next returns false. + // Otherwise it sets *p to the current element of the sequence, + // advances the iterator, and returns true. + Next(p *Value) bool + Done() +} + +// A Mapping is a mapping from keys to values, such as a dictionary. +// +// If a type satisfies both Mapping and Iterable, the iterator yields +// the keys of the mapping. +type Mapping interface { + Value + // Get returns the value corresponding to the specified key, + // or !found if the mapping does not contain the key. + // + // Get also defines the behavior of "v in mapping". + // The 'in' operator reports the 'found' component, ignoring errors. + Get(Value) (v Value, found bool, err error) +} + +// An IterableMapping is a mapping that supports key enumeration. +type IterableMapping interface { + Mapping + Iterate() Iterator // see Iterable interface + Items() []Tuple // a new slice containing all key/value pairs +} + +var _ IterableMapping = (*Dict)(nil) + +// A HasSetKey supports map update using x[k]=v syntax, like a dictionary. +type HasSetKey interface { + Mapping + SetKey(k, v Value) error +} + +var _ HasSetKey = (*Dict)(nil) + +// A HasBinary value may be used as either operand of these binary operators: +// + - * / // % in not in | & ^ << >> +// +// The Side argument indicates whether the receiver is the left or right operand. +// +// An implementation may decline to handle an operation by returning (nil, nil). +// For this reason, clients should always call the standalone Binary(op, x, y) +// function rather than calling the method directly. +type HasBinary interface { + Value + Binary(op syntax.Token, y Value, side Side) (Value, error) +} + +type Side bool + +const ( + Left Side = false + Right Side = true +) + +// A HasUnary value may be used as the operand of these unary operators: +// + - ~ +// +// An implementation may decline to handle an operation by returning (nil, nil). +// For this reason, clients should always call the standalone Unary(op, x) +// function rather than calling the method directly. +type HasUnary interface { + Value + Unary(op syntax.Token) (Value, error) +} + +// A HasAttrs value has fields or methods that may be read by a dot expression (y = x.f). +// Attribute names may be listed using the built-in 'dir' function. +// +// For implementation convenience, a result of (nil, nil) from Attr is +// interpreted as a "no such field or method" error. Implementations are +// free to return a more precise error. +type HasAttrs interface { + Value + Attr(name string) (Value, error) // returns (nil, nil) if attribute not present + AttrNames() []string // callers must not modify the result. +} + +var ( + _ HasAttrs = String("") + _ HasAttrs = new(List) + _ HasAttrs = new(Dict) + _ HasAttrs = new(Set) +) + +// A HasSetField value has fields that may be written by a dot expression (x.f = y). +// +// An implementation of SetField may return a NoSuchAttrError, +// in which case the runtime may augment the error message to +// warn of possible misspelling. +type HasSetField interface { + HasAttrs + SetField(name string, val Value) error +} + +// A NoSuchAttrError may be returned by an implementation of +// HasAttrs.Attr or HasSetField.SetField to indicate that no such field +// exists. In that case the runtime may augment the error message to +// warn of possible misspelling. +type NoSuchAttrError string + +func (e NoSuchAttrError) Error() string { return string(e) } + +// NoneType is the type of None. Its only legal value is None. +// (We represent it as a number, not struct{}, so that None may be constant.) +type NoneType byte + +const None = NoneType(0) + +func (NoneType) String() string { return "None" } +func (NoneType) Type() string { return "NoneType" } +func (NoneType) Freeze() {} // immutable +func (NoneType) Truth() Bool { return False } +func (NoneType) Hash() (uint32, error) { return 0, nil } + +// Bool is the type of a Starlark bool. +type Bool bool + +const ( + False Bool = false + True Bool = true +) + +func (b Bool) String() string { + if b { + return "True" + } else { + return "False" + } +} +func (b Bool) Type() string { return "bool" } +func (b Bool) Freeze() {} // immutable +func (b Bool) Truth() Bool { return b } +func (b Bool) Hash() (uint32, error) { return uint32(b2i(bool(b))), nil } +func (x Bool) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Bool) + return threeway(op, b2i(bool(x))-b2i(bool(y))), nil +} + +// Float is the type of a Starlark float. +type Float float64 + +func (f Float) String() string { + var buf strings.Builder + f.format(&buf, 'g') + return buf.String() +} + +func (f Float) format(buf *strings.Builder, conv byte) { + ff := float64(f) + if !isFinite(ff) { + if math.IsInf(ff, +1) { + buf.WriteString("+inf") + } else if math.IsInf(ff, -1) { + buf.WriteString("-inf") + } else { + buf.WriteString("nan") + } + return + } + + // %g is the default format used by str. + // It uses the minimum precision to avoid ambiguity, + // and always includes a '.' or an 'e' so that the value + // is self-evidently a float, not an int. + if conv == 'g' || conv == 'G' { + s := strconv.FormatFloat(ff, conv, -1, 64) + buf.WriteString(s) + // Ensure result always has a decimal point if no exponent. + // "123" -> "123.0" + if strings.IndexByte(s, conv-'g'+'e') < 0 && strings.IndexByte(s, '.') < 0 { + buf.WriteString(".0") + } + return + } + + // %[eEfF] use 6-digit precision + buf.WriteString(strconv.FormatFloat(ff, conv, 6, 64)) +} + +func (f Float) Type() string { return "float" } +func (f Float) Freeze() {} // immutable +func (f Float) Truth() Bool { return f != 0.0 } +func (f Float) Hash() (uint32, error) { + // Equal float and int values must yield the same hash. + // TODO(adonovan): opt: if f is non-integral, and thus not equal + // to any Int, we can avoid the Int conversion and use a cheaper hash. + if isFinite(float64(f)) { + return finiteFloatToInt(f).Hash() + } + return 1618033, nil // NaN, +/-Inf +} + +func floor(f Float) Float { return Float(math.Floor(float64(f))) } + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +} + +func (x Float) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Float) + return threeway(op, floatCmp(x, y)), nil +} + +// floatCmp performs a three-valued comparison on floats, +// which are totally ordered with NaN > +Inf. +func floatCmp(x, y Float) int { + if x > y { + return +1 + } else if x < y { + return -1 + } else if x == y { + return 0 + } + + // At least one operand is NaN. + if x == x { + return -1 // y is NaN + } else if y == y { + return +1 // x is NaN + } + return 0 // both NaN +} + +func (f Float) rational() *big.Rat { return new(big.Rat).SetFloat64(float64(f)) } + +// AsFloat returns the float64 value closest to x. +// The f result is undefined if x is not a float or Int. +// The result may be infinite if x is a very large Int. +func AsFloat(x Value) (f float64, ok bool) { + switch x := x.(type) { + case Float: + return float64(x), true + case Int: + return float64(x.Float()), true + } + return 0, false +} + +func (x Float) Mod(y Float) Float { + z := Float(math.Mod(float64(x), float64(y))) + if (x < 0) != (y < 0) && z != 0 { + z += y + } + return z +} + +// Unary implements the operations +float and -float. +func (f Float) Unary(op syntax.Token) (Value, error) { + switch op { + case syntax.MINUS: + return -f, nil + case syntax.PLUS: + return +f, nil + } + return nil, nil +} + +// String is the type of a Starlark text string. +// +// A String encapsulates an an immutable sequence of bytes, +// but strings are not directly iterable. Instead, iterate +// over the result of calling one of these four methods: +// codepoints, codepoint_ords, elems, elem_ords. +// +// Strings typically contain text; use Bytes for binary strings. +// The Starlark spec defines text strings as sequences of UTF-k +// codes that encode Unicode code points. In this Go implementation, +// k=8, whereas in a Java implementation, k=16. For portability, +// operations on strings should aim to avoid assumptions about +// the value of k. +// +// Warning: the contract of the Value interface's String method is that +// it returns the value printed in Starlark notation, +// so s.String() or fmt.Sprintf("%s", s) returns a quoted string. +// Use string(s) or s.GoString() or fmt.Sprintf("%#v", s) to obtain the raw contents +// of a Starlark string as a Go string. +type String string + +func (s String) String() string { return syntax.Quote(string(s), false) } +func (s String) GoString() string { return string(s) } +func (s String) Type() string { return "string" } +func (s String) Freeze() {} // immutable +func (s String) Truth() Bool { return len(s) > 0 } +func (s String) Hash() (uint32, error) { return hashString(string(s)), nil } +func (s String) Len() int { return len(s) } // bytes +func (s String) Index(i int) Value { return s[i : i+1] } + +func (s String) Slice(start, end, step int) Value { + if step == 1 { + return s[start:end] + } + + sign := signum(step) + var str []byte + for i := start; signum(end-i) == sign; i += step { + str = append(str, s[i]) + } + return String(str) +} + +func (s String) Attr(name string) (Value, error) { return builtinAttr(s, name, stringMethods) } +func (s String) AttrNames() []string { return builtinAttrNames(stringMethods) } + +func (x String) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(String) + return threeway(op, strings.Compare(string(x), string(y))), nil +} + +func AsString(x Value) (string, bool) { v, ok := x.(String); return string(v), ok } + +// A stringElems is an iterable whose iterator yields a sequence of +// elements (bytes), either numerically or as successive substrings. +// It is an indexable sequence. +type stringElems struct { + s String + ords bool +} + +var ( + _ Iterable = (*stringElems)(nil) + _ Indexable = (*stringElems)(nil) +) + +func (si stringElems) String() string { + if si.ords { + return si.s.String() + ".elem_ords()" + } else { + return si.s.String() + ".elems()" + } +} +func (si stringElems) Type() string { return "string.elems" } +func (si stringElems) Freeze() {} // immutable +func (si stringElems) Truth() Bool { return True } +func (si stringElems) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) } +func (si stringElems) Iterate() Iterator { return &stringElemsIterator{si, 0} } +func (si stringElems) Len() int { return len(si.s) } +func (si stringElems) Index(i int) Value { + if si.ords { + return MakeInt(int(si.s[i])) + } else { + // TODO(adonovan): opt: preallocate canonical 1-byte strings + // to avoid interface allocation. + return si.s[i : i+1] + } +} + +type stringElemsIterator struct { + si stringElems + i int +} + +func (it *stringElemsIterator) Next(p *Value) bool { + if it.i == len(it.si.s) { + return false + } + *p = it.si.Index(it.i) + it.i++ + return true +} + +func (*stringElemsIterator) Done() {} + +// A stringCodepoints is an iterable whose iterator yields a sequence of +// Unicode code points, either numerically or as successive substrings. +// It is not indexable. +type stringCodepoints struct { + s String + ords bool +} + +var _ Iterable = (*stringCodepoints)(nil) + +func (si stringCodepoints) String() string { + if si.ords { + return si.s.String() + ".codepoint_ords()" + } else { + return si.s.String() + ".codepoints()" + } +} +func (si stringCodepoints) Type() string { return "string.codepoints" } +func (si stringCodepoints) Freeze() {} // immutable +func (si stringCodepoints) Truth() Bool { return True } +func (si stringCodepoints) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) } +func (si stringCodepoints) Iterate() Iterator { return &stringCodepointsIterator{si, 0} } + +type stringCodepointsIterator struct { + si stringCodepoints + i int +} + +func (it *stringCodepointsIterator) Next(p *Value) bool { + s := it.si.s[it.i:] + if s == "" { + return false + } + r, sz := utf8.DecodeRuneInString(string(s)) + if !it.si.ords { + if r == utf8.RuneError { + *p = String(r) + } else { + *p = s[:sz] + } + } else { + *p = MakeInt(int(r)) + } + it.i += sz + return true +} + +func (*stringCodepointsIterator) Done() {} + +// A Function is a function defined by a Starlark def statement or lambda expression. +// The initialization behavior of a Starlark module is also represented by a Function. +type Function struct { + funcode *compile.Funcode + module *module + defaults Tuple + freevars Tuple +} + +// A module is the dynamic counterpart to a Program. +// All functions in the same program share a module. +type module struct { + program *compile.Program + predeclared StringDict + globals []Value + constants []Value +} + +// makeGlobalDict returns a new, unfrozen StringDict containing all global +// variables so far defined in the module. +func (m *module) makeGlobalDict() StringDict { + r := make(StringDict, len(m.program.Globals)) + for i, id := range m.program.Globals { + if v := m.globals[i]; v != nil { + r[id.Name] = v + } + } + return r +} + +func (fn *Function) Name() string { return fn.funcode.Name } // "lambda" for anonymous functions +func (fn *Function) Doc() string { return fn.funcode.Doc } +func (fn *Function) Hash() (uint32, error) { return hashString(fn.funcode.Name), nil } +func (fn *Function) Freeze() { fn.defaults.Freeze(); fn.freevars.Freeze() } +func (fn *Function) String() string { return toString(fn) } +func (fn *Function) Type() string { return "function" } +func (fn *Function) Truth() Bool { return true } + +// Globals returns a new, unfrozen StringDict containing all global +// variables so far defined in the function's module. +func (fn *Function) Globals() StringDict { return fn.module.makeGlobalDict() } + +func (fn *Function) Position() syntax.Position { return fn.funcode.Pos } +func (fn *Function) NumParams() int { return fn.funcode.NumParams } +func (fn *Function) NumKwonlyParams() int { return fn.funcode.NumKwonlyParams } + +// Param returns the name and position of the ith parameter, +// where 0 <= i < NumParams(). +// The *args and **kwargs parameters are at the end +// even if there were optional parameters after *args. +func (fn *Function) Param(i int) (string, syntax.Position) { + if i >= fn.NumParams() { + panic(i) + } + id := fn.funcode.Locals[i] + return id.Name, id.Pos +} +func (fn *Function) HasVarargs() bool { return fn.funcode.HasVarargs } +func (fn *Function) HasKwargs() bool { return fn.funcode.HasKwargs } + +// A Builtin is a function implemented in Go. +type Builtin struct { + name string + fn func(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error) + recv Value // for bound methods (e.g. "".startswith) +} + +func (b *Builtin) Name() string { return b.name } +func (b *Builtin) Freeze() { + if b.recv != nil { + b.recv.Freeze() + } +} +func (b *Builtin) Hash() (uint32, error) { + h := hashString(b.name) + if b.recv != nil { + h ^= 5521 + } + return h, nil +} +func (b *Builtin) Receiver() Value { return b.recv } +func (b *Builtin) String() string { return toString(b) } +func (b *Builtin) Type() string { return "builtin_function_or_method" } +func (b *Builtin) CallInternal(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { + return b.fn(thread, b, args, kwargs) +} +func (b *Builtin) Truth() Bool { return true } + +// NewBuiltin returns a new 'builtin_function_or_method' value with the specified name +// and implementation. It compares unequal with all other values. +func NewBuiltin(name string, fn func(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error)) *Builtin { + return &Builtin{name: name, fn: fn} +} + +// BindReceiver returns a new Builtin value representing a method +// closure, that is, a built-in function bound to a receiver value. +// +// In the example below, the value of f is the string.index +// built-in method bound to the receiver value "abc": +// +// f = "abc".index; f("a"); f("b") +// +// In the common case, the receiver is bound only during the call, +// but this still results in the creation of a temporary method closure: +// +// "abc".index("a") +// +func (b *Builtin) BindReceiver(recv Value) *Builtin { + return &Builtin{name: b.name, fn: b.fn, recv: recv} +} + +// A *Dict represents a Starlark dictionary. +// The zero value of Dict is a valid empty dictionary. +// If you know the exact final number of entries, +// it is more efficient to call NewDict. +type Dict struct { + ht hashtable +} + +// NewDict returns a set with initial space for +// at least size insertions before rehashing. +func NewDict(size int) *Dict { + dict := new(Dict) + dict.ht.init(size) + return dict +} + +func (d *Dict) Clear() error { return d.ht.clear() } +func (d *Dict) Delete(k Value) (v Value, found bool, err error) { return d.ht.delete(k) } +func (d *Dict) Get(k Value) (v Value, found bool, err error) { return d.ht.lookup(k) } +func (d *Dict) Items() []Tuple { return d.ht.items() } +func (d *Dict) Keys() []Value { return d.ht.keys() } +func (d *Dict) Len() int { return int(d.ht.len) } +func (d *Dict) Iterate() Iterator { return d.ht.iterate() } +func (d *Dict) SetKey(k, v Value) error { return d.ht.insert(k, v) } +func (d *Dict) String() string { return toString(d) } +func (d *Dict) Type() string { return "dict" } +func (d *Dict) Freeze() { d.ht.freeze() } +func (d *Dict) Truth() Bool { return d.Len() > 0 } +func (d *Dict) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: dict") } + +func (d *Dict) Attr(name string) (Value, error) { return builtinAttr(d, name, dictMethods) } +func (d *Dict) AttrNames() []string { return builtinAttrNames(dictMethods) } + +func (x *Dict) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(*Dict) + switch op { + case syntax.EQL: + ok, err := dictsEqual(x, y, depth) + return ok, err + case syntax.NEQ: + ok, err := dictsEqual(x, y, depth) + return !ok, err + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func dictsEqual(x, y *Dict, depth int) (bool, error) { + if x.Len() != y.Len() { + return false, nil + } + for _, xitem := range x.Items() { + key, xval := xitem[0], xitem[1] + + if yval, found, _ := y.Get(key); !found { + return false, nil + } else if eq, err := EqualDepth(xval, yval, depth-1); err != nil { + return false, err + } else if !eq { + return false, nil + } + } + return true, nil +} + +// A *List represents a Starlark list value. +type List struct { + elems []Value + frozen bool + itercount uint32 // number of active iterators (ignored if frozen) +} + +// NewList returns a list containing the specified elements. +// Callers should not subsequently modify elems. +func NewList(elems []Value) *List { return &List{elems: elems} } + +func (l *List) Freeze() { + if !l.frozen { + l.frozen = true + for _, elem := range l.elems { + elem.Freeze() + } + } +} + +// checkMutable reports an error if the list should not be mutated. +// verb+" list" should describe the operation. +func (l *List) checkMutable(verb string) error { + if l.frozen { + return fmt.Errorf("cannot %s frozen list", verb) + } + if l.itercount > 0 { + return fmt.Errorf("cannot %s list during iteration", verb) + } + return nil +} + +func (l *List) String() string { return toString(l) } +func (l *List) Type() string { return "list" } +func (l *List) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: list") } +func (l *List) Truth() Bool { return l.Len() > 0 } +func (l *List) Len() int { return len(l.elems) } +func (l *List) Index(i int) Value { return l.elems[i] } + +func (l *List) Slice(start, end, step int) Value { + if step == 1 { + elems := append([]Value{}, l.elems[start:end]...) + return NewList(elems) + } + + sign := signum(step) + var list []Value + for i := start; signum(end-i) == sign; i += step { + list = append(list, l.elems[i]) + } + return NewList(list) +} + +func (l *List) Attr(name string) (Value, error) { return builtinAttr(l, name, listMethods) } +func (l *List) AttrNames() []string { return builtinAttrNames(listMethods) } + +func (l *List) Iterate() Iterator { + if !l.frozen { + l.itercount++ + } + return &listIterator{l: l} +} + +func (x *List) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(*List) + // It's tempting to check x == y as an optimization here, + // but wrong because a list containing NaN is not equal to itself. + return sliceCompare(op, x.elems, y.elems, depth) +} + +func sliceCompare(op syntax.Token, x, y []Value, depth int) (bool, error) { + // Fast path: check length. + if len(x) != len(y) && (op == syntax.EQL || op == syntax.NEQ) { + return op == syntax.NEQ, nil + } + + // Find first element that is not equal in both lists. + for i := 0; i < len(x) && i < len(y); i++ { + if eq, err := EqualDepth(x[i], y[i], depth-1); err != nil { + return false, err + } else if !eq { + switch op { + case syntax.EQL: + return false, nil + case syntax.NEQ: + return true, nil + default: + return CompareDepth(op, x[i], y[i], depth-1) + } + } + } + + return threeway(op, len(x)-len(y)), nil +} + +type listIterator struct { + l *List + i int +} + +func (it *listIterator) Next(p *Value) bool { + if it.i < it.l.Len() { + *p = it.l.elems[it.i] + it.i++ + return true + } + return false +} + +func (it *listIterator) Done() { + if !it.l.frozen { + it.l.itercount-- + } +} + +func (l *List) SetIndex(i int, v Value) error { + if err := l.checkMutable("assign to element of"); err != nil { + return err + } + l.elems[i] = v + return nil +} + +func (l *List) Append(v Value) error { + if err := l.checkMutable("append to"); err != nil { + return err + } + l.elems = append(l.elems, v) + return nil +} + +func (l *List) Clear() error { + if err := l.checkMutable("clear"); err != nil { + return err + } + for i := range l.elems { + l.elems[i] = nil // aid GC + } + l.elems = l.elems[:0] + return nil +} + +// A Tuple represents a Starlark tuple value. +type Tuple []Value + +func (t Tuple) Len() int { return len(t) } +func (t Tuple) Index(i int) Value { return t[i] } + +func (t Tuple) Slice(start, end, step int) Value { + if step == 1 { + return t[start:end] + } + + sign := signum(step) + var tuple Tuple + for i := start; signum(end-i) == sign; i += step { + tuple = append(tuple, t[i]) + } + return tuple +} + +func (t Tuple) Iterate() Iterator { return &tupleIterator{elems: t} } +func (t Tuple) Freeze() { + for _, elem := range t { + elem.Freeze() + } +} +func (t Tuple) String() string { return toString(t) } +func (t Tuple) Type() string { return "tuple" } +func (t Tuple) Truth() Bool { return len(t) > 0 } + +func (x Tuple) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Tuple) + return sliceCompare(op, x, y, depth) +} + +func (t Tuple) Hash() (uint32, error) { + // Use same algorithm as Python. + var x, mult uint32 = 0x345678, 1000003 + for _, elem := range t { + y, err := elem.Hash() + if err != nil { + return 0, err + } + x = x ^ y*mult + mult += 82520 + uint32(len(t)+len(t)) + } + return x, nil +} + +type tupleIterator struct{ elems Tuple } + +func (it *tupleIterator) Next(p *Value) bool { + if len(it.elems) > 0 { + *p = it.elems[0] + it.elems = it.elems[1:] + return true + } + return false +} + +func (it *tupleIterator) Done() {} + +// A Set represents a Starlark set value. +// The zero value of Set is a valid empty set. +// If you know the exact final number of elements, +// it is more efficient to call NewSet. +type Set struct { + ht hashtable // values are all None +} + +// NewSet returns a dictionary with initial space for +// at least size insertions before rehashing. +func NewSet(size int) *Set { + set := new(Set) + set.ht.init(size) + return set +} + +func (s *Set) Delete(k Value) (found bool, err error) { _, found, err = s.ht.delete(k); return } +func (s *Set) Clear() error { return s.ht.clear() } +func (s *Set) Has(k Value) (found bool, err error) { _, found, err = s.ht.lookup(k); return } +func (s *Set) Insert(k Value) error { return s.ht.insert(k, None) } +func (s *Set) Len() int { return int(s.ht.len) } +func (s *Set) Iterate() Iterator { return s.ht.iterate() } +func (s *Set) String() string { return toString(s) } +func (s *Set) Type() string { return "set" } +func (s *Set) elems() []Value { return s.ht.keys() } +func (s *Set) Freeze() { s.ht.freeze() } +func (s *Set) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: set") } +func (s *Set) Truth() Bool { return s.Len() > 0 } + +func (s *Set) Attr(name string) (Value, error) { return builtinAttr(s, name, setMethods) } +func (s *Set) AttrNames() []string { return builtinAttrNames(setMethods) } + +func (x *Set) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(*Set) + switch op { + case syntax.EQL: + ok, err := setsEqual(x, y, depth) + return ok, err + case syntax.NEQ: + ok, err := setsEqual(x, y, depth) + return !ok, err + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func setsEqual(x, y *Set, depth int) (bool, error) { + if x.Len() != y.Len() { + return false, nil + } + for _, elem := range x.elems() { + if found, _ := y.Has(elem); !found { + return false, nil + } + } + return true, nil +} + +func (s *Set) Union(iter Iterator) (Value, error) { + set := new(Set) + for _, elem := range s.elems() { + set.Insert(elem) // can't fail + } + var x Value + for iter.Next(&x) { + if err := set.Insert(x); err != nil { + return nil, err + } + } + return set, nil +} + +// toString returns the string form of value v. +// It may be more efficient than v.String() for larger values. +func toString(v Value) string { + buf := new(strings.Builder) + writeValue(buf, v, nil) + return buf.String() +} + +// writeValue writes x to out. +// +// path is used to detect cycles. +// It contains the list of *List and *Dict values we're currently printing. +// (These are the only potentially cyclic structures.) +// Callers should generally pass nil for path. +// It is safe to re-use the same path slice for multiple calls. +func writeValue(out *strings.Builder, x Value, path []Value) { + switch x := x.(type) { + case nil: + out.WriteString("<nil>") // indicates a bug + + // These four cases are duplicates of T.String(), for efficiency. + case NoneType: + out.WriteString("None") + + case Int: + out.WriteString(x.String()) + + case Bool: + if x { + out.WriteString("True") + } else { + out.WriteString("False") + } + + case String: + out.WriteString(syntax.Quote(string(x), false)) + + case *List: + out.WriteByte('[') + if pathContains(path, x) { + out.WriteString("...") // list contains itself + } else { + for i, elem := range x.elems { + if i > 0 { + out.WriteString(", ") + } + writeValue(out, elem, append(path, x)) + } + } + out.WriteByte(']') + + case Tuple: + out.WriteByte('(') + for i, elem := range x { + if i > 0 { + out.WriteString(", ") + } + writeValue(out, elem, path) + } + if len(x) == 1 { + out.WriteByte(',') + } + out.WriteByte(')') + + case *Function: + fmt.Fprintf(out, "<function %s>", x.Name()) + + case *Builtin: + if x.recv != nil { + fmt.Fprintf(out, "<built-in method %s of %s value>", x.Name(), x.recv.Type()) + } else { + fmt.Fprintf(out, "<built-in function %s>", x.Name()) + } + + case *Dict: + out.WriteByte('{') + if pathContains(path, x) { + out.WriteString("...") // dict contains itself + } else { + sep := "" + for _, item := range x.Items() { + k, v := item[0], item[1] + out.WriteString(sep) + writeValue(out, k, path) + out.WriteString(": ") + writeValue(out, v, append(path, x)) // cycle check + sep = ", " + } + } + out.WriteByte('}') + + case *Set: + out.WriteString("set([") + for i, elem := range x.elems() { + if i > 0 { + out.WriteString(", ") + } + writeValue(out, elem, path) + } + out.WriteString("])") + + default: + out.WriteString(x.String()) + } +} + +func pathContains(path []Value, x Value) bool { + for _, y := range path { + if x == y { + return true + } + } + return false +} + +const maxdepth = 10 + +// Equal reports whether two Starlark values are equal. +func Equal(x, y Value) (bool, error) { + if x, ok := x.(String); ok { + return x == y, nil // fast path for an important special case + } + return EqualDepth(x, y, maxdepth) +} + +// EqualDepth reports whether two Starlark values are equal. +// +// Recursive comparisons by implementations of Value.CompareSameType +// should use EqualDepth to prevent infinite recursion. +func EqualDepth(x, y Value, depth int) (bool, error) { + return CompareDepth(syntax.EQL, x, y, depth) +} + +// Compare compares two Starlark values. +// The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE. +// Compare returns an error if an ordered comparison was +// requested for a type that does not support it. +// +// Recursive comparisons by implementations of Value.CompareSameType +// should use CompareDepth to prevent infinite recursion. +func Compare(op syntax.Token, x, y Value) (bool, error) { + return CompareDepth(op, x, y, maxdepth) +} + +// CompareDepth compares two Starlark values. +// The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE. +// CompareDepth returns an error if an ordered comparison was +// requested for a pair of values that do not support it. +// +// The depth parameter limits the maximum depth of recursion +// in cyclic data structures. +func CompareDepth(op syntax.Token, x, y Value, depth int) (bool, error) { + if depth < 1 { + return false, fmt.Errorf("comparison exceeded maximum recursion depth") + } + if sameType(x, y) { + if xcomp, ok := x.(Comparable); ok { + return xcomp.CompareSameType(op, y, depth) + } + + // use identity comparison + switch op { + case syntax.EQL: + return x == y, nil + case syntax.NEQ: + return x != y, nil + } + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } + + // different types + + // int/float ordered comparisons + switch x := x.(type) { + case Int: + if y, ok := y.(Float); ok { + var cmp int + if y != y { + cmp = -1 // y is NaN + } else if !math.IsInf(float64(y), 0) { + cmp = x.rational().Cmp(y.rational()) // y is finite + } else if y > 0 { + cmp = -1 // y is +Inf + } else { + cmp = +1 // y is -Inf + } + return threeway(op, cmp), nil + } + case Float: + if y, ok := y.(Int); ok { + var cmp int + if x != x { + cmp = +1 // x is NaN + } else if !math.IsInf(float64(x), 0) { + cmp = x.rational().Cmp(y.rational()) // x is finite + } else if x > 0 { + cmp = +1 // x is +Inf + } else { + cmp = -1 // x is -Inf + } + return threeway(op, cmp), nil + } + } + + // All other values of different types compare unequal. + switch op { + case syntax.EQL: + return false, nil + case syntax.NEQ: + return true, nil + } + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) +} + +func sameType(x, y Value) bool { + return reflect.TypeOf(x) == reflect.TypeOf(y) || x.Type() == y.Type() +} + +// threeway interprets a three-way comparison value cmp (-1, 0, +1) +// as a boolean comparison (e.g. x < y). +func threeway(op syntax.Token, cmp int) bool { + switch op { + case syntax.EQL: + return cmp == 0 + case syntax.NEQ: + return cmp != 0 + case syntax.LE: + return cmp <= 0 + case syntax.LT: + return cmp < 0 + case syntax.GE: + return cmp >= 0 + case syntax.GT: + return cmp > 0 + } + panic(op) +} + +func b2i(b bool) int { + if b { + return 1 + } else { + return 0 + } +} + +// Len returns the length of a string or sequence value, +// and -1 for all others. +// +// Warning: Len(x) >= 0 does not imply Iterate(x) != nil. +// A string has a known length but is not directly iterable. +func Len(x Value) int { + switch x := x.(type) { + case String: + return x.Len() + case Indexable: + return x.Len() + case Sequence: + return x.Len() + } + return -1 +} + +// Iterate return a new iterator for the value if iterable, nil otherwise. +// If the result is non-nil, the caller must call Done when finished with it. +// +// Warning: Iterate(x) != nil does not imply Len(x) >= 0. +// Some iterables may have unknown length. +func Iterate(x Value) Iterator { + if x, ok := x.(Iterable); ok { + return x.Iterate() + } + return nil +} + +// Bytes is the type of a Starlark binary string. +// +// A Bytes encapsulates an immutable sequence of bytes. +// It is comparable, indexable, and sliceable, but not direcly iterable; +// use bytes.elems() for an iterable view. +// +// In this Go implementation, the elements of 'string' and 'bytes' are +// both bytes, but in other implementations, notably Java, the elements +// of a 'string' are UTF-16 codes (Java chars). The spec abstracts text +// strings as sequences of UTF-k codes that encode Unicode code points, +// and operations that convert from text to binary incur UTF-k-to-UTF-8 +// transcoding; conversely, conversion from binary to text incurs +// UTF-8-to-UTF-k transcoding. Because k=8 for Go, these operations +// are the identity function, at least for valid encodings of text. +type Bytes string + +var ( + _ Comparable = Bytes("") + _ Sliceable = Bytes("") + _ Indexable = Bytes("") +) + +func (b Bytes) String() string { return syntax.Quote(string(b), true) } +func (b Bytes) Type() string { return "bytes" } +func (b Bytes) Freeze() {} // immutable +func (b Bytes) Truth() Bool { return len(b) > 0 } +func (b Bytes) Hash() (uint32, error) { return String(b).Hash() } +func (b Bytes) Len() int { return len(b) } +func (b Bytes) Index(i int) Value { return b[i : i+1] } + +func (b Bytes) Attr(name string) (Value, error) { return builtinAttr(b, name, bytesMethods) } +func (b Bytes) AttrNames() []string { return builtinAttrNames(bytesMethods) } + +func (b Bytes) Slice(start, end, step int) Value { + if step == 1 { + return b[start:end] + } + + sign := signum(step) + var str []byte + for i := start; signum(end-i) == sign; i += step { + str = append(str, b[i]) + } + return Bytes(str) +} + +func (x Bytes) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Bytes) + return threeway(op, strings.Compare(string(x), string(y))), nil +} diff --git a/starlark/value_test.go b/starlark/value_test.go new file mode 100644 index 0000000..6420a95 --- /dev/null +++ b/starlark/value_test.go @@ -0,0 +1,46 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark_test + +// This file defines tests of the Value API. + +import ( + "fmt" + "testing" + + "go.starlark.net/starlark" +) + +func TestStringMethod(t *testing.T) { + s := starlark.String("hello") + for i, test := range [][2]string{ + // quoted string: + {s.String(), `"hello"`}, + {fmt.Sprintf("%s", s), `"hello"`}, + {fmt.Sprintf("%+s", s), `"hello"`}, + {fmt.Sprintf("%v", s), `"hello"`}, + {fmt.Sprintf("%+v", s), `"hello"`}, + // unquoted: + {s.GoString(), `hello`}, + {fmt.Sprintf("%#v", s), `hello`}, + } { + got, want := test[0], test[1] + if got != want { + t.Errorf("#%d: got <<%s>>, want <<%s>>", i, got, want) + } + } +} + +func TestListAppend(t *testing.T) { + l := starlark.NewList(nil) + l.Append(starlark.String("hello")) + res, ok := starlark.AsString(l.Index(0)) + if !ok { + t.Errorf("failed list.Append() got: %s, want: starlark.String", l.Index(0).Type()) + } + if res != "hello" { + t.Errorf("failed list.Append() got: %+v, want: hello", res) + } +} diff --git a/starlarkjson/json.go b/starlarkjson/json.go new file mode 100644 index 0000000..fc5d53f --- /dev/null +++ b/starlarkjson/json.go @@ -0,0 +1,478 @@ +// Copyright 2020 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package starlarkjson defines utilities for converting Starlark values +// to/from JSON strings. The most recent IETF standard for JSON is +// https://www.ietf.org/rfc/rfc7159.txt. +package starlarkjson // import "go.starlark.net/starlarkjson" + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "math" + "math/big" + "sort" + "strconv" + "strings" + "unicode/utf8" + + "go.starlark.net/starlark" + "go.starlark.net/starlarkstruct" +) + +// Module json is a Starlark module of JSON-related functions. +// +// json = module( +// encode, +// decode, +// indent, +// ) +// +// def encode(x): +// +// The encode function accepts one required positional argument, +// which it converts to JSON by cases: +// - A Starlark value that implements Go's standard json.Marshal +// interface defines its own JSON encoding. +// - None, True, and False are converted to null, true, and false, respectively. +// - Starlark int values, no matter how large, are encoded as decimal integers. +// Some decoders may not be able to decode very large integers. +// - Starlark float values are encoded using decimal point notation, +// even if the value is an integer. +// It is an error to encode a non-finite floating-point value. +// - Starlark strings are encoded as JSON strings, using UTF-16 escapes. +// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object. +// It is an error if any key is not a string. +// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array. +// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object. +// It an application-defined type matches more than one the cases describe above, +// (e.g. it implements both Iterable and HasFields), the first case takes precedence. +// Encoding any other value yields an error. +// +// def decode(x): +// +// The decode function accepts one positional parameter, a JSON string. +// It returns the Starlark value that the string denotes. +// - Numbers are parsed as int or float, depending on whether they +// contain a decimal point. +// - JSON objects are parsed as new unfrozen Starlark dicts. +// - JSON arrays are parsed as new unfrozen Starlark lists. +// Decoding fails if x is not a valid JSON string. +// +// def indent(str, *, prefix="", indent="\t"): +// +// The indent function pretty-prints a valid JSON encoding, +// and returns a string containing the indented form. +// It accepts one required positional parameter, the JSON string, +// and two optional keyword-only string parameters, prefix and indent, +// that specify a prefix of each new line, and the unit of indentation. +// +var Module = &starlarkstruct.Module{ + Name: "json", + Members: starlark.StringDict{ + "encode": starlark.NewBuiltin("json.encode", encode), + "decode": starlark.NewBuiltin("json.decode", decode), + "indent": starlark.NewBuiltin("json.indent", indent), + }, +} + +func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var x starlark.Value + if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + + var quoteSpace [128]byte + quote := func(s string) { + // Non-trivial escaping is handled by Go's encoding/json. + if isPrintableASCII(s) { + buf.Write(strconv.AppendQuote(quoteSpace[:0], s)) + } else { + // TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON. + // Can we avoid this call? + data, _ := json.Marshal(s) + buf.Write(data) + } + } + + var emit func(x starlark.Value) error + emit = func(x starlark.Value) error { + switch x := x.(type) { + case json.Marshaler: + // Application-defined starlark.Value types + // may define their own JSON encoding. + data, err := x.MarshalJSON() + if err != nil { + return err + } + buf.Write(data) + + case starlark.NoneType: + buf.WriteString("null") + + case starlark.Bool: + if x { + buf.WriteString("true") + } else { + buf.WriteString("false") + } + + case starlark.Int: + fmt.Fprint(buf, x) + + case starlark.Float: + if !isFinite(float64(x)) { + return fmt.Errorf("cannot encode non-finite float %v", x) + } + fmt.Fprintf(buf, "%g", x) // always contains a decimal point + + case starlark.String: + quote(string(x)) + + case starlark.IterableMapping: + // e.g. dict (must have string keys) + buf.WriteByte('{') + items := x.Items() + for _, item := range items { + if _, ok := item[0].(starlark.String); !ok { + return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type()) + } + } + sort.Slice(items, func(i, j int) bool { + return items[i][0].(starlark.String) < items[j][0].(starlark.String) + }) + for i, item := range items { + if i > 0 { + buf.WriteByte(',') + } + k, _ := starlark.AsString(item[0]) + quote(k) + buf.WriteByte(':') + if err := emit(item[1]); err != nil { + return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err) + } + } + buf.WriteByte('}') + + case starlark.Iterable: + // e.g. tuple, list + buf.WriteByte('[') + iter := x.Iterate() + defer iter.Done() + var elem starlark.Value + for i := 0; iter.Next(&elem); i++ { + if i > 0 { + buf.WriteByte(',') + } + if err := emit(elem); err != nil { + return fmt.Errorf("at %s index %d: %v", x.Type(), i, err) + } + } + buf.WriteByte(']') + + case starlark.HasAttrs: + // e.g. struct + buf.WriteByte('{') + var names []string + names = append(names, x.AttrNames()...) + sort.Strings(names) + for i, name := range names { + v, err := x.Attr(name) + if err != nil || v == nil { + log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name) + } + if i > 0 { + buf.WriteByte(',') + } + quote(name) + buf.WriteByte(':') + if err := emit(v); err != nil { + return fmt.Errorf("in field .%s: %v", name, err) + } + } + buf.WriteByte('}') + + default: + return fmt.Errorf("cannot encode %s as JSON", x.Type()) + } + return nil + } + + if err := emit(x); err != nil { + return nil, fmt.Errorf("%s: %v", b.Name(), err) + } + return starlark.String(buf.String()), nil +} + +// isPrintableASCII reports whether s contains only printable ASCII. +func isPrintableASCII(s string) bool { + for i := 0; i < len(s); i++ { + b := s[i] + if b < 0x20 || b >= 0x80 { + return false + } + } + return true +} + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +} + +func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + prefix, indent := "", "\t" // keyword-only + if err := starlark.UnpackArgs(b.Name(), nil, kwargs, + "prefix?", &prefix, + "indent?", &indent, + ); err != nil { + return nil, err + } + var str string // positional-only + if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + if err := json.Indent(buf, []byte(str), prefix, indent); err != nil { + return nil, fmt.Errorf("%s: %v", b.Name(), err) + } + return starlark.String(buf.String()), nil +} + +func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) { + var s string + if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil { + return nil, err + } + + // The decoder necessarily makes certain representation choices + // such as list vs tuple, struct vs dict, int vs float. + // In principle, we could parameterize it to allow the caller to + // control the returned types, but there's no compelling need yet. + + // Use panic/recover with a distinguished type (failure) for error handling. + type failure string + fail := func(format string, args ...interface{}) { + panic(failure(fmt.Sprintf(format, args...))) + } + + i := 0 + + // skipSpace consumes leading spaces, and reports whether there is more input. + skipSpace := func() bool { + for ; i < len(s); i++ { + b := s[i] + if b != ' ' && b != '\t' && b != '\n' && b != '\r' { + return true + } + } + return false + } + + // next consumes leading spaces and returns the first non-space. + // It panics if at EOF. + next := func() byte { + if skipSpace() { + return s[i] + } + fail("unexpected end of file") + panic("unreachable") + } + + // parse returns the next JSON value from the input. + // It consumes leading but not trailing whitespace. + // It panics on error. + var parse func() starlark.Value + parse = func() starlark.Value { + b := next() + switch b { + case '"': + // string + + // Find end of quotation. + // Also, record whether trivial unquoting is safe. + // Non-trivial unquoting is handled by Go's encoding/json. + safe := true + closed := false + j := i + 1 + for ; j < len(s); j++ { + b := s[j] + if b == '\\' { + safe = false + j++ // skip x in \x + } else if b == '"' { + closed = true + j++ // skip '"' + break + } else if b >= utf8.RuneSelf { + safe = false + } + } + if !closed { + fail("unclosed string literal") + } + + r := s[i:j] + i = j + + // unquote + if safe { + r = r[1 : len(r)-1] + } else if err := json.Unmarshal([]byte(r), &r); err != nil { + fail("%s", err) + } + return starlark.String(r) + + case 'n': + if strings.HasPrefix(s[i:], "null") { + i += len("null") + return starlark.None + } + + case 't': + if strings.HasPrefix(s[i:], "true") { + i += len("true") + return starlark.True + } + + case 'f': + if strings.HasPrefix(s[i:], "false") { + i += len("false") + return starlark.False + } + + case '[': + // array + var elems []starlark.Value + + i++ // '[' + b = next() + if b != ']' { + for { + elem := parse() + elems = append(elems, elem) + b = next() + if b != ',' { + if b != ']' { + fail("got %q, want ',' or ']'", b) + } + break + } + i++ // ',' + } + } + i++ // ']' + return starlark.NewList(elems) + + case '{': + // object + dict := new(starlark.Dict) + + i++ // '{' + b = next() + if b != '}' { + for { + key := parse() + if _, ok := key.(starlark.String); !ok { + fail("got %s for object key, want string", key.Type()) + } + b = next() + if b != ':' { + fail("after object key, got %q, want ':' ", b) + } + i++ // ':' + value := parse() + dict.SetKey(key, value) // can't fail + b = next() + if b != ',' { + if b != '}' { + fail("in object, got %q, want ',' or '}'", b) + } + break + } + i++ // ',' + } + } + i++ // '}' + return dict + + default: + // number? + if isdigit(b) || b == '-' { + // scan literal. Allow [0-9+-eE.] for now. + float := false + var j int + for j = i + 1; j < len(s); j++ { + b = s[j] + if isdigit(b) { + // ok + } else if b == '.' || + b == 'e' || + b == 'E' || + b == '+' || + b == '-' { + float = true + } else { + break + } + } + num := s[i:j] + i = j + + // Unlike most C-like languages, + // JSON disallows a leading zero before a digit. + digits := num + if num[0] == '-' { + digits = num[1:] + } + if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) { + fail("invalid number: %s", num) + } + + // parse literal + if float { + x, err := strconv.ParseFloat(num, 64) + if err != nil { + fail("invalid number: %s", num) + } + return starlark.Float(x) + } else { + x, ok := new(big.Int).SetString(num, 10) + if !ok { + fail("invalid number: %s", num) + } + return starlark.MakeBigInt(x) + } + } + } + fail("unexpected character %q", b) + panic("unreachable") + } + defer func() { + x := recover() + switch x := x.(type) { + case failure: + err = fmt.Errorf("json.decode: at offset %d, %s", i, x) + case nil: + // nop + default: + panic(x) // unexpected panic + } + }() + x := parse() + if skipSpace() { + fail("unexpected character %q after value", s[i]) + } + return x, nil +} + +func isdigit(b byte) bool { + return b >= '0' && b <= '9' +} diff --git a/starlarkstruct/module.go b/starlarkstruct/module.go new file mode 100644 index 0000000..735c98a --- /dev/null +++ b/starlarkstruct/module.go @@ -0,0 +1,43 @@ +package starlarkstruct + +import ( + "fmt" + + "go.starlark.net/starlark" +) + +// A Module is a named collection of values, +// typically a suite of functions imported by a load statement. +// +// It differs from Struct primarily in that its string representation +// does not enumerate its fields. +type Module struct { + Name string + Members starlark.StringDict +} + +var _ starlark.HasAttrs = (*Module)(nil) + +func (m *Module) Attr(name string) (starlark.Value, error) { return m.Members[name], nil } +func (m *Module) AttrNames() []string { return m.Members.Keys() } +func (m *Module) Freeze() { m.Members.Freeze() } +func (m *Module) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", m.Type()) } +func (m *Module) String() string { return fmt.Sprintf("<module %q>", m.Name) } +func (m *Module) Truth() starlark.Bool { return true } +func (m *Module) Type() string { return "module" } + +// MakeModule may be used as the implementation of a Starlark built-in +// function, module(name, **kwargs). It returns a new module with the +// specified name and members. +func MakeModule(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var name string + if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &name); err != nil { + return nil, err + } + members := make(starlark.StringDict, len(kwargs)) + for _, kwarg := range kwargs { + k := string(kwarg[0].(starlark.String)) + members[k] = kwarg[1] + } + return &Module{name, members}, nil +} diff --git a/starlarkstruct/struct.go b/starlarkstruct/struct.go new file mode 100644 index 0000000..1982cc0 --- /dev/null +++ b/starlarkstruct/struct.go @@ -0,0 +1,281 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package starlarkstruct defines the Starlark types 'struct' and +// 'module', both optional language extensions. +// +package starlarkstruct // import "go.starlark.net/starlarkstruct" + +// It is tempting to introduce a variant of Struct that is a wrapper +// around a Go struct value, for stronger typing guarantees and more +// efficient and convenient field lookup. However: +// 1) all fields of Starlark structs are optional, so we cannot represent +// them using more specific types such as String, Int, *Depset, and +// *File, as such types give no way to represent missing fields. +// 2) the efficiency gain of direct struct field access is rather +// marginal: finding the index of a field by binary searching on the +// sorted list of field names is quite fast compared to the other +// overheads. +// 3) the gains in compactness and spatial locality are also rather +// marginal: the array behind the []entry slice is (due to field name +// strings) only a factor of 2 larger than the corresponding Go struct +// would be, and, like the Go struct, requires only a single allocation. + +import ( + "fmt" + "sort" + "strings" + + "go.starlark.net/starlark" + "go.starlark.net/syntax" +) + +// Make is the implementation of a built-in function that instantiates +// an immutable struct from the specified keyword arguments. +// +// An application can add 'struct' to the Starlark environment like so: +// +// globals := starlark.StringDict{ +// "struct": starlark.NewBuiltin("struct", starlarkstruct.Make), +// } +// +func Make(_ *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + if len(args) > 0 { + return nil, fmt.Errorf("struct: unexpected positional arguments") + } + return FromKeywords(Default, kwargs), nil +} + +// FromKeywords returns a new struct instance whose fields are specified by the +// key/value pairs in kwargs. (Each kwargs[i][0] must be a starlark.String.) +func FromKeywords(constructor starlark.Value, kwargs []starlark.Tuple) *Struct { + if constructor == nil { + panic("nil constructor") + } + s := &Struct{ + constructor: constructor, + entries: make(entries, 0, len(kwargs)), + } + for _, kwarg := range kwargs { + k := string(kwarg[0].(starlark.String)) + v := kwarg[1] + s.entries = append(s.entries, entry{k, v}) + } + sort.Sort(s.entries) + return s +} + +// FromStringDict returns a whose elements are those of d. +// The constructor parameter specifies the constructor; use Default for an ordinary struct. +func FromStringDict(constructor starlark.Value, d starlark.StringDict) *Struct { + if constructor == nil { + panic("nil constructor") + } + s := &Struct{ + constructor: constructor, + entries: make(entries, 0, len(d)), + } + for k, v := range d { + s.entries = append(s.entries, entry{k, v}) + } + sort.Sort(s.entries) + return s +} + +// Struct is an immutable Starlark type that maps field names to values. +// It is not iterable and does not support len. +// +// A struct has a constructor, a distinct value that identifies a class +// of structs, and which appears in the struct's string representation. +// +// Operations such as x+y fail if the constructors of the two operands +// are not equal. +// +// The default constructor, Default, is the string "struct", but +// clients may wish to 'brand' structs for their own purposes. +// The constructor value appears in the printed form of the value, +// and is accessible using the Constructor method. +// +// Use Attr to access its fields and AttrNames to enumerate them. +type Struct struct { + constructor starlark.Value + entries entries // sorted by name +} + +// Default is the default constructor for structs. +// It is merely the string "struct". +const Default = starlark.String("struct") + +type entries []entry + +func (a entries) Len() int { return len(a) } +func (a entries) Less(i, j int) bool { return a[i].name < a[j].name } +func (a entries) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +type entry struct { + name string + value starlark.Value +} + +var ( + _ starlark.HasAttrs = (*Struct)(nil) + _ starlark.HasBinary = (*Struct)(nil) +) + +// ToStringDict adds a name/value entry to d for each field of the struct. +func (s *Struct) ToStringDict(d starlark.StringDict) { + for _, e := range s.entries { + d[e.name] = e.value + } +} + +func (s *Struct) String() string { + buf := new(strings.Builder) + if s.constructor == Default { + // NB: The Java implementation always prints struct + // even for Bazel provider instances. + buf.WriteString("struct") // avoid String()'s quotation + } else { + buf.WriteString(s.constructor.String()) + } + buf.WriteByte('(') + for i, e := range s.entries { + if i > 0 { + buf.WriteString(", ") + } + buf.WriteString(e.name) + buf.WriteString(" = ") + buf.WriteString(e.value.String()) + } + buf.WriteByte(')') + return buf.String() +} + +// Constructor returns the constructor used to create this struct. +func (s *Struct) Constructor() starlark.Value { return s.constructor } + +func (s *Struct) Type() string { return "struct" } +func (s *Struct) Truth() starlark.Bool { return true } // even when empty +func (s *Struct) Hash() (uint32, error) { + // Same algorithm as Tuple.hash, but with different primes. + var x, m uint32 = 8731, 9839 + for _, e := range s.entries { + namehash, _ := starlark.String(e.name).Hash() + x = x ^ 3*namehash + y, err := e.value.Hash() + if err != nil { + return 0, err + } + x = x ^ y*m + m += 7349 + } + return x, nil +} +func (s *Struct) Freeze() { + for _, e := range s.entries { + e.value.Freeze() + } +} + +func (x *Struct) Binary(op syntax.Token, y starlark.Value, side starlark.Side) (starlark.Value, error) { + if y, ok := y.(*Struct); ok && op == syntax.PLUS { + if side == starlark.Right { + x, y = y, x + } + + if eq, err := starlark.Equal(x.constructor, y.constructor); err != nil { + return nil, fmt.Errorf("in %s + %s: error comparing constructors: %v", + x.constructor, y.constructor, err) + } else if !eq { + return nil, fmt.Errorf("cannot add structs of different constructors: %s + %s", + x.constructor, y.constructor) + } + + z := make(starlark.StringDict, x.len()+y.len()) + for _, e := range x.entries { + z[e.name] = e.value + } + for _, e := range y.entries { + z[e.name] = e.value + } + + return FromStringDict(x.constructor, z), nil + } + return nil, nil // unhandled +} + +// Attr returns the value of the specified field. +func (s *Struct) Attr(name string) (starlark.Value, error) { + // Binary search the entries. + // This implementation is a specialization of + // sort.Search that avoids dynamic dispatch. + n := len(s.entries) + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) + if s.entries[h].name < name { + i = h + 1 + } else { + j = h + } + } + if i < n && s.entries[i].name == name { + return s.entries[i].value, nil + } + + var ctor string + if s.constructor != Default { + ctor = s.constructor.String() + " " + } + return nil, starlark.NoSuchAttrError( + fmt.Sprintf("%sstruct has no .%s attribute", ctor, name)) +} + +func (s *Struct) len() int { return len(s.entries) } + +// AttrNames returns a new sorted list of the struct fields. +func (s *Struct) AttrNames() []string { + names := make([]string, len(s.entries)) + for i, e := range s.entries { + names[i] = e.name + } + return names +} + +func (x *Struct) CompareSameType(op syntax.Token, y_ starlark.Value, depth int) (bool, error) { + y := y_.(*Struct) + switch op { + case syntax.EQL: + return structsEqual(x, y, depth) + case syntax.NEQ: + eq, err := structsEqual(x, y, depth) + return !eq, err + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func structsEqual(x, y *Struct, depth int) (bool, error) { + if x.len() != y.len() { + return false, nil + } + + if eq, err := starlark.Equal(x.constructor, y.constructor); err != nil { + return false, fmt.Errorf("error comparing struct constructors %v and %v: %v", + x.constructor, y.constructor, err) + } else if !eq { + return false, nil + } + + for i, n := 0, x.len(); i < n; i++ { + if x.entries[i].name != y.entries[i].name { + return false, nil + } else if eq, err := starlark.EqualDepth(x.entries[i].value, y.entries[i].value, depth-1); err != nil { + return false, err + } else if !eq { + return false, nil + } + } + return true, nil +} diff --git a/starlarkstruct/struct_test.go b/starlarkstruct/struct_test.go new file mode 100644 index 0000000..4f103bd --- /dev/null +++ b/starlarkstruct/struct_test.go @@ -0,0 +1,69 @@ +// Copyright 2018 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlarkstruct_test + +import ( + "fmt" + "path/filepath" + "testing" + + "go.starlark.net/starlark" + "go.starlark.net/starlarkstruct" + "go.starlark.net/starlarktest" +) + +func Test(t *testing.T) { + testdata := starlarktest.DataFile("starlarkstruct", ".") + thread := &starlark.Thread{Load: load} + starlarktest.SetReporter(thread, t) + filename := filepath.Join(testdata, "testdata/struct.star") + predeclared := starlark.StringDict{ + "struct": starlark.NewBuiltin("struct", starlarkstruct.Make), + "gensym": starlark.NewBuiltin("gensym", gensym), + } + if _, err := starlark.ExecFile(thread, filename, nil, predeclared); err != nil { + if err, ok := err.(*starlark.EvalError); ok { + t.Fatal(err.Backtrace()) + } + t.Fatal(err) + } +} + +// load implements the 'load' operation as used in the evaluator tests. +func load(thread *starlark.Thread, module string) (starlark.StringDict, error) { + if module == "assert.star" { + return starlarktest.LoadAssertModule() + } + return nil, fmt.Errorf("load not implemented") +} + +// gensym is a built-in function that generates a unique symbol. +func gensym(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var name string + if err := starlark.UnpackArgs("gensym", args, kwargs, "name", &name); err != nil { + return nil, err + } + return &symbol{name: name}, nil +} + +// A symbol is a distinct value that acts as a constructor of "branded" +// struct instances, like a class symbol in Python or a "provider" in Bazel. +type symbol struct{ name string } + +var _ starlark.Callable = (*symbol)(nil) + +func (sym *symbol) Name() string { return sym.name } +func (sym *symbol) String() string { return sym.name } +func (sym *symbol) Type() string { return "symbol" } +func (sym *symbol) Freeze() {} // immutable +func (sym *symbol) Truth() starlark.Bool { return starlark.True } +func (sym *symbol) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", sym.Type()) } + +func (sym *symbol) CallInternal(thread *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + if len(args) > 0 { + return nil, fmt.Errorf("%s: unexpected positional arguments", sym) + } + return starlarkstruct.FromKeywords(sym, kwargs), nil +} diff --git a/starlarkstruct/testdata/struct.star b/starlarkstruct/testdata/struct.star new file mode 100644 index 0000000..e54fe04 --- /dev/null +++ b/starlarkstruct/testdata/struct.star @@ -0,0 +1,63 @@ +# Tests of Starlark 'struct' extension. +# This is not a standard feature and the Go and Starlark APIs may yet change. + +load("assert.star", "assert") + +assert.eq(str(struct), "<built-in function struct>") + +# struct is a constructor for "unbranded" structs. +s = struct(host = "localhost", port = 80) +assert.eq(s, s) +assert.eq(s, struct(host = "localhost", port = 80)) +assert.ne(s, struct(host = "localhost", port = 81)) +assert.eq(type(s), "struct") +assert.eq(str(s), 'struct(host = "localhost", port = 80)') +assert.eq(s.host, "localhost") +assert.eq(s.port, 80) +assert.fails(lambda : s.protocol, "struct has no .protocol attribute") +assert.eq(dir(s), ["host", "port"]) + +# Use gensym to create "branded" struct types. +hostport = gensym(name = "hostport") +assert.eq(type(hostport), "symbol") +assert.eq(str(hostport), "hostport") + +# Call the symbol to instantiate a new type. +http = hostport(host = "localhost", port = 80) +assert.eq(type(http), "struct") +assert.eq(str(http), 'hostport(host = "localhost", port = 80)') # includes name of constructor +assert.eq(http, http) +assert.eq(http, hostport(host = "localhost", port = 80)) +assert.ne(http, hostport(host = "localhost", port = 443)) +assert.eq(http.host, "localhost") +assert.eq(http.port, 80) +assert.fails(lambda : http.protocol, "hostport struct has no .protocol attribute") + +person = gensym(name = "person") +bob = person(name = "bob", age = 50) +alice = person(name = "alice", city = "NYC") +assert.ne(http, bob) # different constructor symbols +assert.ne(bob, alice) # different fields + +hostport2 = gensym(name = "hostport") +assert.eq(hostport, hostport) +assert.ne(hostport, hostport2) # same name, different symbol +assert.ne(http, hostport2(host = "localhost", port = 80)) # equal fields but different ctor symbols + +# dir +assert.eq(dir(alice), ["city", "name"]) +assert.eq(dir(bob), ["age", "name"]) +assert.eq(dir(http), ["host", "port"]) + +# hasattr, getattr +assert.true(hasattr(alice, "city")) +assert.eq(hasattr(alice, "ageaa"), False) +assert.eq(getattr(alice, "city"), "NYC") + +# + +assert.eq(bob + bob, bob) +assert.eq(bob + alice, person(age = 50, city = "NYC", name = "alice")) +assert.eq(alice + bob, person(age = 50, city = "NYC", name = "bob")) # not commutative! a misfeature +assert.fails(lambda : alice + 1, "struct \\+ int") +assert.eq(http + http, http) +assert.fails(lambda : http + bob, "different constructors: hostport \\+ person") diff --git a/starlarktest/assert.star b/starlarktest/assert.star new file mode 100644 index 0000000..c6e480f --- /dev/null +++ b/starlarktest/assert.star @@ -0,0 +1,51 @@ +# Predeclared built-ins for this module: +# +# error(msg): report an error in Go's test framework without halting execution. +# This is distinct from the built-in fail function, which halts execution. +# catch(f): evaluate f() and returns its evaluation error message, if any +# matches(str, pattern): report whether str matches regular expression pattern. +# module(**kwargs): a constructor for a module. +# _freeze(x): freeze the value x and everything reachable from it. +# +# Clients may use these functions to define their own testing abstractions. + +def _eq(x, y): + if x != y: + error("%r != %r" % (x, y)) + +def _ne(x, y): + if x == y: + error("%r == %r" % (x, y)) + +def _true(cond, msg = "assertion failed"): + if not cond: + error(msg) + +def _lt(x, y): + if not (x < y): + error("%s is not less than %s" % (x, y)) + +def _contains(x, y): + if y not in x: + error("%s does not contain %s" % (x, y)) + +def _fails(f, pattern): + "assert_fails asserts that evaluation of f() fails with the specified error." + msg = catch(f) + if msg == None: + error("evaluation succeeded unexpectedly (want error matching %r)" % pattern) + elif not matches(pattern, msg): + error("regular expression (%s) did not match error (%s)" % (pattern, msg)) + +freeze = _freeze # an exported global whose value is the built-in freeze function + +assert = module( + "assert", + fail = error, + eq = _eq, + ne = _ne, + true = _true, + lt = _lt, + contains = _contains, + fails = _fails, +) diff --git a/starlarktest/starlarktest.go b/starlarktest/starlarktest.go new file mode 100644 index 0000000..e449436 --- /dev/null +++ b/starlarktest/starlarktest.go @@ -0,0 +1,147 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package starlarktest defines utilities for testing Starlark programs. +// +// Clients can call LoadAssertModule to load a module that defines +// several functions useful for testing. See assert.star for its +// definition. +// +// The assert.error function, which reports errors to the current Go +// testing.T, requires that clients call SetReporter(thread, t) before use. +package starlarktest // import "go.starlark.net/starlarktest" + +import ( + "fmt" + "go/build" + "os" + "path/filepath" + "regexp" + "strings" + "sync" + + "go.starlark.net/starlark" + "go.starlark.net/starlarkstruct" +) + +const localKey = "Reporter" + +// A Reporter is a value to which errors may be reported. +// It is satisfied by *testing.T. +type Reporter interface { + Error(args ...interface{}) +} + +// SetReporter associates an error reporter (such as a testing.T in +// a Go test) with the Starlark thread so that Starlark programs may +// report errors to it. +func SetReporter(thread *starlark.Thread, r Reporter) { + thread.SetLocal(localKey, r) +} + +// GetReporter returns the Starlark thread's error reporter. +// It must be preceded by a call to SetReporter. +func GetReporter(thread *starlark.Thread) Reporter { + r, ok := thread.Local(localKey).(Reporter) + if !ok { + panic("internal error: starlarktest.SetReporter was not called") + } + return r +} + +var ( + once sync.Once + assert starlark.StringDict + assertErr error +) + +// LoadAssertModule loads the assert module. +// It is concurrency-safe and idempotent. +func LoadAssertModule() (starlark.StringDict, error) { + once.Do(func() { + predeclared := starlark.StringDict{ + "error": starlark.NewBuiltin("error", error_), + "catch": starlark.NewBuiltin("catch", catch), + "matches": starlark.NewBuiltin("matches", matches), + "module": starlark.NewBuiltin("module", starlarkstruct.MakeModule), + "_freeze": starlark.NewBuiltin("freeze", freeze), + } + filename := DataFile("starlarktest", "assert.star") + thread := new(starlark.Thread) + assert, assertErr = starlark.ExecFile(thread, filename, nil, predeclared) + }) + return assert, assertErr +} + +// catch(f) evaluates f() and returns its evaluation error message +// if it failed or None if it succeeded. +func catch(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var fn starlark.Callable + if err := starlark.UnpackArgs("catch", args, kwargs, "fn", &fn); err != nil { + return nil, err + } + if _, err := starlark.Call(thread, fn, nil, nil); err != nil { + return starlark.String(err.Error()), nil + } + return starlark.None, nil +} + +// matches(pattern, str) reports whether string str matches the regular expression pattern. +func matches(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var pattern, str string + if err := starlark.UnpackArgs("matches", args, kwargs, "pattern", &pattern, "str", &str); err != nil { + return nil, err + } + ok, err := regexp.MatchString(pattern, str) + if err != nil { + return nil, fmt.Errorf("matches: %s", err) + } + return starlark.Bool(ok), nil +} + +// error(x) reports an error to the Go test framework. +func error_(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + if len(args) != 1 { + return nil, fmt.Errorf("error: got %d arguments, want 1", len(args)) + } + buf := new(strings.Builder) + stk := thread.CallStack() + stk.Pop() + fmt.Fprintf(buf, "%sError: ", stk) + if s, ok := starlark.AsString(args[0]); ok { + buf.WriteString(s) + } else { + buf.WriteString(args[0].String()) + } + GetReporter(thread).Error(buf.String()) + return starlark.None, nil +} + +// freeze(x) freezes its operand. +func freeze(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("freeze does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("freeze got %d arguments, wants 1", len(args)) + } + args[0].Freeze() + return args[0], nil +} + +// DataFile returns the effective filename of the specified +// test data resource. The function abstracts differences between +// 'go build', under which a test runs in its package directory, +// and Blaze, under which a test runs in the root of the tree. +var DataFile = func(pkgdir, filename string) string { + // Check if we're being run by Bazel and change directories if so. + // TEST_SRCDIR and TEST_WORKSPACE are set by the Bazel test runner, so that makes a decent check + testSrcdir := os.Getenv("TEST_SRCDIR") + testWorkspace := os.Getenv("TEST_WORKSPACE") + if testSrcdir != "" && testWorkspace != "" { + return filepath.Join(testSrcdir, "net_starlark_go", pkgdir, filename) + } + + return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) +} diff --git a/syntax/grammar.txt b/syntax/grammar.txt new file mode 100644 index 0000000..7f5dfc8 --- /dev/null +++ b/syntax/grammar.txt @@ -0,0 +1,129 @@ + +Grammar of Starlark +================== + +File = {Statement | newline} eof . + +Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt . + +DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . + +Parameters = Parameter {',' Parameter}. + +Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier . + +IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . + +ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . + +WhileStmt = 'while' Test ':' Suite . + +Suite = [newline indent {Statement} outdent] | SimpleStmt . + +SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . +# NOTE: '\n' optional at EOF + +SmallStmt = ReturnStmt + | BreakStmt | ContinueStmt | PassStmt + | AssignStmt + | ExprStmt + | LoadStmt + . + +ReturnStmt = 'return' [Expression] . +BreakStmt = 'break' . +ContinueStmt = 'continue' . +PassStmt = 'pass' . +AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression . +ExprStmt = Expression . + +LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' . + +Test = LambdaExpr + | IfExpr + | PrimaryExpr + | UnaryExpr + | BinaryExpr + . + +LambdaExpr = 'lambda' [Parameters] ':' Test . + +IfExpr = Test 'if' Test 'else' Test . + +PrimaryExpr = Operand + | PrimaryExpr DotSuffix + | PrimaryExpr CallSuffix + | PrimaryExpr SliceSuffix + . + +Operand = identifier + | int | float | string + | ListExpr | ListComp + | DictExpr | DictComp + | '(' [Expression [',']] ')' + | ('-' | '+') PrimaryExpr + . + +DotSuffix = '.' identifier . +CallSuffix = '(' [Arguments [',']] ')' . +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . + +Arguments = Argument {',' Argument} . +Argument = Test | identifier '=' Test | '*' Test | '**' Test . + +ListExpr = '[' [Expression [',']] ']' . +ListComp = '[' Test {CompClause} ']'. + +DictExpr = '{' [Entries [',']] '}' . +DictComp = '{' Entry {CompClause} '}' . +Entries = Entry {',' Entry} . +Entry = Test ':' Test . + +CompClause = 'for' LoopVariables 'in' Test | 'if' Test . + +UnaryExpr = 'not' Test . + +BinaryExpr = Test {Binop Test} . + +Binop = 'or' + | 'and' + | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' + | '|' + | '^' + | '&' + | '-' | '+' + | '*' | '%' | '/' | '//' + . + +Expression = Test {',' Test} . +# NOTE: trailing comma permitted only when within [...] or (...). + +LoopVariables = PrimaryExpr {',' PrimaryExpr} . + + +# Notation (similar to Go spec): +- lowercase and 'quoted' items are lexical tokens. +- Capitalized names denote grammar productions. +- (...) implies grouping +- x | y means either x or y. +- [x] means x is optional +- {x} means x is repeated zero or more times +- The end of each declaration is marked with a period. + +# Tokens +- spaces: newline, eof, indent, outdent. +- identifier. +- literals: string, int, float. +- plus all quoted tokens such as '+=', 'return'. + +# Notes: +- Ambiguity is resolved using operator precedence. +- The grammar does not enforce the legal order of params and args, + nor that the first compclause must be a 'for'. + +TODO: +- explain how the lexer generates indent, outdent, and newline tokens. +- why is unary NOT separated from unary - and +? +- the grammar is (mostly) in LL(1) style so, for example, + dot expressions are formed suffixes, not complete expressions, + which makes the spec harder to read. Reorganize into non-LL(1) form? diff --git a/syntax/parse.go b/syntax/parse.go new file mode 100644 index 0000000..f4c8fff --- /dev/null +++ b/syntax/parse.go @@ -0,0 +1,1028 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// This file defines a recursive-descent parser for Starlark. +// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7. +// +// TODO(adonovan): use syntax.Error more systematically throughout the +// package. Verify that error positions are correct using the +// chunkedfile mechanism. + +import "log" + +// Enable this flag to print the token stream and log.Fatal on the first error. +const debug = false + +// A Mode value is a set of flags (or 0) that controls optional parser functionality. +type Mode uint + +const ( + RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments +) + +// Parse parses the input data and returns the corresponding parse tree. +// +// If src != nil, ParseFile parses the source from src and the filename +// is only used when recording position information. +// The type of the argument for the src parameter must be string, +// []byte, io.Reader, or FilePortion. +// If src == nil, ParseFile parses the file specified by filename. +func Parse(filename string, src interface{}, mode Mode) (f *File, err error) { + in, err := newScanner(filename, src, mode&RetainComments != 0) + if err != nil { + return nil, err + } + p := parser{in: in} + defer p.in.recover(&err) + + p.nextToken() // read first lookahead token + f = p.parseFile() + if f != nil { + f.Path = filename + } + p.assignComments(f) + return f, nil +} + +// ParseCompoundStmt parses a single compound statement: +// a blank line, a def, for, while, or if statement, or a +// semicolon-separated list of simple statements followed +// by a newline. These are the units on which the REPL operates. +// ParseCompoundStmt does not consume any following input. +// The parser calls the readline function each +// time it needs a new line of input. +func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { + in, err := newScanner(filename, readline, false) + if err != nil { + return nil, err + } + + p := parser{in: in} + defer p.in.recover(&err) + + p.nextToken() // read first lookahead token + + var stmts []Stmt + switch p.tok { + case DEF, IF, FOR, WHILE: + stmts = p.parseStmt(stmts) + case NEWLINE: + // blank line + default: + stmts = p.parseSimpleStmt(stmts, false) + // Require but don't consume newline, to avoid blocking again. + if p.tok != NEWLINE { + p.in.errorf(p.in.pos, "invalid syntax") + } + } + + return &File{Path: filename, Stmts: stmts}, nil +} + +// ParseExpr parses a Starlark expression. +// A comma-separated list of expressions is parsed as a tuple. +// See Parse for explanation of parameters. +func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { + in, err := newScanner(filename, src, mode&RetainComments != 0) + if err != nil { + return nil, err + } + p := parser{in: in} + defer p.in.recover(&err) + + p.nextToken() // read first lookahead token + + // Use parseExpr, not parseTest, to permit an unparenthesized tuple. + expr = p.parseExpr(false) + + // A following newline (e.g. "f()\n") appears outside any brackets, + // on a non-blank line, and thus results in a NEWLINE token. + if p.tok == NEWLINE { + p.nextToken() + } + + if p.tok != EOF { + p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) + } + p.assignComments(expr) + return expr, nil +} + +type parser struct { + in *scanner + tok Token + tokval tokenValue +} + +// nextToken advances the scanner and returns the position of the +// previous token. +func (p *parser) nextToken() Position { + oldpos := p.tokval.pos + p.tok = p.in.nextToken(&p.tokval) + // enable to see the token stream + if debug { + log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) + } + return oldpos +} + +// file_input = (NEWLINE | stmt)* EOF +func (p *parser) parseFile() *File { + var stmts []Stmt + for p.tok != EOF { + if p.tok == NEWLINE { + p.nextToken() + continue + } + stmts = p.parseStmt(stmts) + } + return &File{Stmts: stmts} +} + +func (p *parser) parseStmt(stmts []Stmt) []Stmt { + if p.tok == DEF { + return append(stmts, p.parseDefStmt()) + } else if p.tok == IF { + return append(stmts, p.parseIfStmt()) + } else if p.tok == FOR { + return append(stmts, p.parseForStmt()) + } else if p.tok == WHILE { + return append(stmts, p.parseWhileStmt()) + } + return p.parseSimpleStmt(stmts, true) +} + +func (p *parser) parseDefStmt() Stmt { + defpos := p.nextToken() // consume DEF + id := p.parseIdent() + p.consume(LPAREN) + params := p.parseParams() + p.consume(RPAREN) + p.consume(COLON) + body := p.parseSuite() + return &DefStmt{ + Def: defpos, + Name: id, + Params: params, + Body: body, + } +} + +func (p *parser) parseIfStmt() Stmt { + ifpos := p.nextToken() // consume IF + cond := p.parseTest() + p.consume(COLON) + body := p.parseSuite() + ifStmt := &IfStmt{ + If: ifpos, + Cond: cond, + True: body, + } + tail := ifStmt + for p.tok == ELIF { + elifpos := p.nextToken() // consume ELIF + cond := p.parseTest() + p.consume(COLON) + body := p.parseSuite() + elif := &IfStmt{ + If: elifpos, + Cond: cond, + True: body, + } + tail.ElsePos = elifpos + tail.False = []Stmt{elif} + tail = elif + } + if p.tok == ELSE { + tail.ElsePos = p.nextToken() // consume ELSE + p.consume(COLON) + tail.False = p.parseSuite() + } + return ifStmt +} + +func (p *parser) parseForStmt() Stmt { + forpos := p.nextToken() // consume FOR + vars := p.parseForLoopVariables() + p.consume(IN) + x := p.parseExpr(false) + p.consume(COLON) + body := p.parseSuite() + return &ForStmt{ + For: forpos, + Vars: vars, + X: x, + Body: body, + } +} + +func (p *parser) parseWhileStmt() Stmt { + whilepos := p.nextToken() // consume WHILE + cond := p.parseTest() + p.consume(COLON) + body := p.parseSuite() + return &WhileStmt{ + While: whilepos, + Cond: cond, + Body: body, + } +} + +// Equivalent to 'exprlist' production in Python grammar. +// +// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? +func (p *parser) parseForLoopVariables() Expr { + // Avoid parseExpr because it would consume the IN token + // following x in "for x in y: ...". + v := p.parsePrimaryWithSuffix() + if p.tok != COMMA { + return v + } + + list := []Expr{v} + for p.tok == COMMA { + p.nextToken() + if terminatesExprList(p.tok) { + break + } + list = append(list, p.parsePrimaryWithSuffix()) + } + return &TupleExpr{List: list} +} + +// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE +// In REPL mode, it does not consume the NEWLINE. +func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt { + for { + stmts = append(stmts, p.parseSmallStmt()) + if p.tok != SEMI { + break + } + p.nextToken() // consume SEMI + if p.tok == NEWLINE || p.tok == EOF { + break + } + } + // EOF without NEWLINE occurs in `if x: pass`, for example. + if p.tok != EOF && consumeNL { + p.consume(NEWLINE) + } + + return stmts +} + +// small_stmt = RETURN expr? +// | PASS | BREAK | CONTINUE +// | LOAD ... +// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign +// | expr +func (p *parser) parseSmallStmt() Stmt { + switch p.tok { + case RETURN: + pos := p.nextToken() // consume RETURN + var result Expr + if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { + result = p.parseExpr(false) + } + return &ReturnStmt{Return: pos, Result: result} + + case BREAK, CONTINUE, PASS: + tok := p.tok + pos := p.nextToken() // consume it + return &BranchStmt{Token: tok, TokenPos: pos} + + case LOAD: + return p.parseLoadStmt() + } + + // Assignment + x := p.parseExpr(false) + switch p.tok { + case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: + op := p.tok + pos := p.nextToken() // consume op + rhs := p.parseExpr(false) + return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} + } + + // Expression statement (e.g. function call, doc string). + return &ExprStmt{X: x} +} + +// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' +func (p *parser) parseLoadStmt() *LoadStmt { + loadPos := p.nextToken() // consume LOAD + lparen := p.consume(LPAREN) + + if p.tok != STRING { + p.in.errorf(p.in.pos, "first operand of load statement must be a string literal") + } + module := p.parsePrimary().(*Literal) + + var from, to []*Ident + for p.tok != RPAREN && p.tok != EOF { + p.consume(COMMA) + if p.tok == RPAREN { + break // allow trailing comma + } + switch p.tok { + case STRING: + // load("module", "id") + // To name is same as original. + lit := p.parsePrimary().(*Literal) + id := &Ident{ + NamePos: lit.TokenPos.add(`"`), + Name: lit.Value.(string), + } + to = append(to, id) + from = append(from, id) + + case IDENT: + // load("module", to="from") + id := p.parseIdent() + to = append(to, id) + if p.tok != EQ { + p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) + } + p.consume(EQ) + if p.tok != STRING { + p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) + } + lit := p.parsePrimary().(*Literal) + from = append(from, &Ident{ + NamePos: lit.TokenPos.add(`"`), + Name: lit.Value.(string), + }) + + case RPAREN: + p.in.errorf(p.in.pos, "trailing comma in load statement") + + default: + p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok) + } + } + rparen := p.consume(RPAREN) + + if len(to) == 0 { + p.in.errorf(lparen, "load statement must import at least 1 symbol") + } + return &LoadStmt{ + Load: loadPos, + Module: module, + To: to, + From: from, + Rparen: rparen, + } +} + +// suite is typically what follows a COLON (e.g. after DEF or FOR). +// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT +func (p *parser) parseSuite() []Stmt { + if p.tok == NEWLINE { + p.nextToken() // consume NEWLINE + p.consume(INDENT) + var stmts []Stmt + for p.tok != OUTDENT && p.tok != EOF { + stmts = p.parseStmt(stmts) + } + p.consume(OUTDENT) + return stmts + } + + return p.parseSimpleStmt(nil, true) +} + +func (p *parser) parseIdent() *Ident { + if p.tok != IDENT { + p.in.error(p.in.pos, "not an identifier") + } + id := &Ident{ + NamePos: p.tokval.pos, + Name: p.tokval.raw, + } + p.nextToken() + return id +} + +func (p *parser) consume(t Token) Position { + if p.tok != t { + p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t) + } + return p.nextToken() +} + +// params = (param COMMA)* param COMMA? +// | +// +// param = IDENT +// | IDENT EQ test +// | STAR +// | STAR IDENT +// | STARSTAR IDENT +// +// parseParams parses a parameter list. The resulting expressions are of the form: +// +// *Ident x +// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y +// *Unary{Op: STAR} * +// *Unary{Op: STAR, X: *Ident} *args +// *Unary{Op: STARSTAR, X: *Ident} **kwargs +func (p *parser) parseParams() []Expr { + var params []Expr + for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { + if len(params) > 0 { + p.consume(COMMA) + } + if p.tok == RPAREN { + break + } + + // * or *args or **kwargs + if p.tok == STAR || p.tok == STARSTAR { + op := p.tok + pos := p.nextToken() + var x Expr + if op == STARSTAR || p.tok == IDENT { + x = p.parseIdent() + } + params = append(params, &UnaryExpr{ + OpPos: pos, + Op: op, + X: x, + }) + continue + } + + // IDENT + // IDENT = test + id := p.parseIdent() + if p.tok == EQ { // default value + eq := p.nextToken() + dflt := p.parseTest() + params = append(params, &BinaryExpr{ + X: id, + OpPos: eq, + Op: EQ, + Y: dflt, + }) + continue + } + + params = append(params, id) + } + return params +} + +// parseExpr parses an expression, possible consisting of a +// comma-separated list of 'test' expressions. +// +// In many cases we must use parseTest to avoid ambiguity such as +// f(x, y) vs. f((x, y)). +func (p *parser) parseExpr(inParens bool) Expr { + x := p.parseTest() + if p.tok != COMMA { + return x + } + + // tuple + exprs := p.parseExprs([]Expr{x}, inParens) + return &TupleExpr{List: exprs} +} + +// parseExprs parses a comma-separated list of expressions, starting with the comma. +// It is used to parse tuples and list elements. +// expr_list = (',' expr)* ','? +func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { + for p.tok == COMMA { + pos := p.nextToken() + if terminatesExprList(p.tok) { + if !allowTrailingComma { + p.in.error(pos, "unparenthesized tuple with trailing comma") + } + break + } + exprs = append(exprs, p.parseTest()) + } + return exprs +} + +// parseTest parses a 'test', a single-component expression. +func (p *parser) parseTest() Expr { + if p.tok == LAMBDA { + return p.parseLambda(true) + } + + x := p.parseTestPrec(0) + + // conditional expression (t IF cond ELSE f) + if p.tok == IF { + ifpos := p.nextToken() + cond := p.parseTestPrec(0) + if p.tok != ELSE { + p.in.error(ifpos, "conditional expression without else clause") + } + elsepos := p.nextToken() + else_ := p.parseTest() + return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} + } + + return x +} + +// parseTestNoCond parses a a single-component expression without +// consuming a trailing 'if expr else expr'. +func (p *parser) parseTestNoCond() Expr { + if p.tok == LAMBDA { + return p.parseLambda(false) + } + return p.parseTestPrec(0) +} + +// parseLambda parses a lambda expression. +// The allowCond flag allows the body to be an 'a if b else c' conditional. +func (p *parser) parseLambda(allowCond bool) Expr { + lambda := p.nextToken() + var params []Expr + if p.tok != COLON { + params = p.parseParams() + } + p.consume(COLON) + + var body Expr + if allowCond { + body = p.parseTest() + } else { + body = p.parseTestNoCond() + } + + return &LambdaExpr{ + Lambda: lambda, + Params: params, + Body: body, + } +} + +func (p *parser) parseTestPrec(prec int) Expr { + if prec >= len(preclevels) { + return p.parsePrimaryWithSuffix() + } + + // expr = NOT expr + if p.tok == NOT && prec == int(precedence[NOT]) { + pos := p.nextToken() + x := p.parseTestPrec(prec) + return &UnaryExpr{ + OpPos: pos, + Op: NOT, + X: x, + } + } + + return p.parseBinopExpr(prec) +} + +// expr = test (OP test)* +// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. +func (p *parser) parseBinopExpr(prec int) Expr { + x := p.parseTestPrec(prec + 1) + for first := true; ; first = false { + if p.tok == NOT { + p.nextToken() // consume NOT + // In this context, NOT must be followed by IN. + // Replace NOT IN by a single NOT_IN token. + if p.tok != IN { + p.in.errorf(p.in.pos, "got %#v, want in", p.tok) + } + p.tok = NOT_IN + } + + // Binary operator of specified precedence? + opprec := int(precedence[p.tok]) + if opprec < prec { + return x + } + + // Comparisons are non-associative. + if !first && opprec == int(precedence[EQL]) { + p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", + x.(*BinaryExpr).Op, p.tok) + } + + op := p.tok + pos := p.nextToken() + y := p.parseTestPrec(opprec + 1) + x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} + } +} + +// precedence maps each operator to its precedence (0-7), or -1 for other tokens. +var precedence [maxToken]int8 + +// preclevels groups operators of equal precedence. +// Comparisons are nonassociative; other binary operators associate to the left. +// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. +// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators +var preclevels = [...][]Token{ + {OR}, // or + {AND}, // and + {NOT}, // not (unary) + {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in + {PIPE}, // | + {CIRCUMFLEX}, // ^ + {AMP}, // & + {LTLT, GTGT}, // << >> + {MINUS, PLUS}, // - + {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // +} + +func init() { + // populate precedence table + for i := range precedence { + precedence[i] = -1 + } + for level, tokens := range preclevels { + for _, tok := range tokens { + precedence[tok] = int8(level) + } + } +} + +// primary_with_suffix = primary +// | primary '.' IDENT +// | primary slice_suffix +// | primary call_suffix +func (p *parser) parsePrimaryWithSuffix() Expr { + x := p.parsePrimary() + for { + switch p.tok { + case DOT: + dot := p.nextToken() + id := p.parseIdent() + x = &DotExpr{Dot: dot, X: x, Name: id} + case LBRACK: + x = p.parseSliceSuffix(x) + case LPAREN: + x = p.parseCallSuffix(x) + default: + return x + } + } +} + +// slice_suffix = '[' expr? ':' expr? ':' expr? ']' +func (p *parser) parseSliceSuffix(x Expr) Expr { + lbrack := p.nextToken() + var lo, hi, step Expr + if p.tok != COLON { + y := p.parseExpr(false) + + // index x[y] + if p.tok == RBRACK { + rbrack := p.nextToken() + return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} + } + + lo = y + } + + // slice or substring x[lo:hi:step] + if p.tok == COLON { + p.nextToken() + if p.tok != COLON && p.tok != RBRACK { + hi = p.parseTest() + } + } + if p.tok == COLON { + p.nextToken() + if p.tok != RBRACK { + step = p.parseTest() + } + } + rbrack := p.consume(RBRACK) + return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} +} + +// call_suffix = '(' arg_list? ')' +func (p *parser) parseCallSuffix(fn Expr) Expr { + lparen := p.consume(LPAREN) + var rparen Position + var args []Expr + if p.tok == RPAREN { + rparen = p.nextToken() + } else { + args = p.parseArgs() + rparen = p.consume(RPAREN) + } + return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} +} + +// parseArgs parses a list of actual parameter values (arguments). +// It mirrors the structure of parseParams. +// arg_list = ((arg COMMA)* arg COMMA?)? +func (p *parser) parseArgs() []Expr { + var args []Expr + for p.tok != RPAREN && p.tok != EOF { + if len(args) > 0 { + p.consume(COMMA) + } + if p.tok == RPAREN { + break + } + + // *args or **kwargs + if p.tok == STAR || p.tok == STARSTAR { + op := p.tok + pos := p.nextToken() + x := p.parseTest() + args = append(args, &UnaryExpr{ + OpPos: pos, + Op: op, + X: x, + }) + continue + } + + // We use a different strategy from Bazel here to stay within LL(1). + // Instead of looking ahead two tokens (IDENT, EQ) we parse + // 'test = test' then check that the first was an IDENT. + x := p.parseTest() + + if p.tok == EQ { + // name = value + if _, ok := x.(*Ident); !ok { + p.in.errorf(p.in.pos, "keyword argument must have form name=expr") + } + eq := p.nextToken() + y := p.parseTest() + x = &BinaryExpr{ + X: x, + OpPos: eq, + Op: EQ, + Y: y, + } + } + + args = append(args, x) + } + return args +} + +// primary = IDENT +// | INT | FLOAT | STRING | BYTES +// | '[' ... // list literal or comprehension +// | '{' ... // dict literal or comprehension +// | '(' ... // tuple or parenthesized expression +// | ('-'|'+'|'~') primary_with_suffix +func (p *parser) parsePrimary() Expr { + switch p.tok { + case IDENT: + return p.parseIdent() + + case INT, FLOAT, STRING, BYTES: + var val interface{} + tok := p.tok + switch tok { + case INT: + if p.tokval.bigInt != nil { + val = p.tokval.bigInt + } else { + val = p.tokval.int + } + case FLOAT: + val = p.tokval.float + case STRING, BYTES: + val = p.tokval.string + } + raw := p.tokval.raw + pos := p.nextToken() + return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} + + case LBRACK: + return p.parseList() + + case LBRACE: + return p.parseDict() + + case LPAREN: + lparen := p.nextToken() + if p.tok == RPAREN { + // empty tuple + rparen := p.nextToken() + return &TupleExpr{Lparen: lparen, Rparen: rparen} + } + e := p.parseExpr(true) // allow trailing comma + rparen := p.consume(RPAREN) + return &ParenExpr{ + Lparen: lparen, + X: e, + Rparen: rparen, + } + + case MINUS, PLUS, TILDE: // unary + tok := p.tok + pos := p.nextToken() + x := p.parsePrimaryWithSuffix() + return &UnaryExpr{ + OpPos: pos, + Op: tok, + X: x, + } + } + p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) + panic("unreachable") +} + +// list = '[' ']' +// | '[' expr ']' +// | '[' expr expr_list ']' +// | '[' expr (FOR loop_variables IN expr)+ ']' +func (p *parser) parseList() Expr { + lbrack := p.nextToken() + if p.tok == RBRACK { + // empty List + rbrack := p.nextToken() + return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} + } + + x := p.parseTest() + + if p.tok == FOR { + // list comprehension + return p.parseComprehensionSuffix(lbrack, x, RBRACK) + } + + exprs := []Expr{x} + if p.tok == COMMA { + // multi-item list literal + exprs = p.parseExprs(exprs, true) // allow trailing comma + } + + rbrack := p.consume(RBRACK) + return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} +} + +// dict = '{' '}' +// | '{' dict_entry_list '}' +// | '{' dict_entry FOR loop_variables IN expr '}' +func (p *parser) parseDict() Expr { + lbrace := p.nextToken() + if p.tok == RBRACE { + // empty dict + rbrace := p.nextToken() + return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} + } + + x := p.parseDictEntry() + + if p.tok == FOR { + // dict comprehension + return p.parseComprehensionSuffix(lbrace, x, RBRACE) + } + + entries := []Expr{x} + for p.tok == COMMA { + p.nextToken() + if p.tok == RBRACE { + break + } + entries = append(entries, p.parseDictEntry()) + } + + rbrace := p.consume(RBRACE) + return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} +} + +// dict_entry = test ':' test +func (p *parser) parseDictEntry() *DictEntry { + k := p.parseTest() + colon := p.consume(COLON) + v := p.parseTest() + return &DictEntry{Key: k, Colon: colon, Value: v} +} + +// comp_suffix = FOR loopvars IN expr comp_suffix +// | IF expr comp_suffix +// | ']' or ')' (end) +// +// There can be multiple FOR/IF clauses; the first is always a FOR. +func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { + var clauses []Node + for p.tok != endBrace { + if p.tok == FOR { + pos := p.nextToken() + vars := p.parseForLoopVariables() + in := p.consume(IN) + // Following Python 3, the operand of IN cannot be: + // - a conditional expression ('x if y else z'), + // due to conflicts in Python grammar + // ('if' is used by the comprehension); + // - a lambda expression + // - an unparenthesized tuple. + x := p.parseTestPrec(0) + clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) + } else if p.tok == IF { + pos := p.nextToken() + cond := p.parseTestNoCond() + clauses = append(clauses, &IfClause{If: pos, Cond: cond}) + } else { + p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) + } + } + rbrace := p.nextToken() + + return &Comprehension{ + Curly: endBrace == RBRACE, + Lbrack: lbrace, + Body: body, + Clauses: clauses, + Rbrack: rbrace, + } +} + +func terminatesExprList(tok Token) bool { + switch tok { + case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: + return true + } + return false +} + +// Comment assignment. +// We build two lists of all subnodes, preorder and postorder. +// The preorder list is ordered by start location, with outer nodes first. +// The postorder list is ordered by end location, with outer nodes last. +// We use the preorder list to assign each whole-line comment to the syntax +// immediately following it, and we use the postorder list to assign each +// end-of-line comment to the syntax immediately preceding it. + +// flattenAST returns the list of AST nodes, both in prefix order and in postfix +// order. +func flattenAST(root Node) (pre, post []Node) { + stack := []Node{} + Walk(root, func(n Node) bool { + if n != nil { + pre = append(pre, n) + stack = append(stack, n) + } else { + post = append(post, stack[len(stack)-1]) + stack = stack[:len(stack)-1] + } + return true + }) + return pre, post +} + +// assignComments attaches comments to nearby syntax. +func (p *parser) assignComments(n Node) { + // Leave early if there are no comments + if len(p.in.lineComments)+len(p.in.suffixComments) == 0 { + return + } + + pre, post := flattenAST(n) + + // Assign line comments to syntax immediately following. + line := p.in.lineComments + for _, x := range pre { + start, _ := x.Span() + + switch x.(type) { + case *File: + continue + } + + for len(line) > 0 && !start.isBefore(line[0].Start) { + x.AllocComments() + x.Comments().Before = append(x.Comments().Before, line[0]) + line = line[1:] + } + } + + // Remaining line comments go at end of file. + if len(line) > 0 { + n.AllocComments() + n.Comments().After = append(n.Comments().After, line...) + } + + // Assign suffix comments to syntax immediately before. + suffix := p.in.suffixComments + for i := len(post) - 1; i >= 0; i-- { + x := post[i] + + // Do not assign suffix comments to file + switch x.(type) { + case *File: + continue + } + + _, end := x.Span() + if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { + x.AllocComments() + x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) + suffix = suffix[:len(suffix)-1] + } + } +} diff --git a/syntax/parse_test.go b/syntax/parse_test.go new file mode 100644 index 0000000..fedbb3e --- /dev/null +++ b/syntax/parse_test.go @@ -0,0 +1,487 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax_test + +import ( + "bufio" + "bytes" + "fmt" + "go/build" + "io/ioutil" + "path/filepath" + "reflect" + "strings" + "testing" + + "go.starlark.net/internal/chunkedfile" + "go.starlark.net/starlarktest" + "go.starlark.net/syntax" +) + +func TestExprParseTrees(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {`print(1)`, + `(CallExpr Fn=print Args=(1))`}, + {"print(1)\n", + `(CallExpr Fn=print Args=(1))`}, + {`x + 1`, + `(BinaryExpr X=x Op=+ Y=1)`}, + {`[x for x in y]`, + `(Comprehension Body=x Clauses=((ForClause Vars=x X=y)))`}, + {`[x for x in (a if b else c)]`, + `(Comprehension Body=x Clauses=((ForClause Vars=x X=(ParenExpr X=(CondExpr Cond=b True=a False=c)))))`}, + {`x[i].f(42)`, + `(CallExpr Fn=(DotExpr X=(IndexExpr X=x Y=i) Name=f) Args=(42))`}, + {`x.f()`, + `(CallExpr Fn=(DotExpr X=x Name=f))`}, + {`x+y*z`, + `(BinaryExpr X=x Op=+ Y=(BinaryExpr X=y Op=* Y=z))`}, + {`x%y-z`, + `(BinaryExpr X=(BinaryExpr X=x Op=% Y=y) Op=- Y=z)`}, + {`a + b not in c`, + `(BinaryExpr X=(BinaryExpr X=a Op=+ Y=b) Op=not in Y=c)`}, + {`lambda x, *args, **kwargs: None`, + `(LambdaExpr Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=None)`}, + {`{"one": 1}`, + `(DictExpr List=((DictEntry Key="one" Value=1)))`}, + {`a[i]`, + `(IndexExpr X=a Y=i)`}, + {`a[i:]`, + `(SliceExpr X=a Lo=i)`}, + {`a[:j]`, + `(SliceExpr X=a Hi=j)`}, + {`a[::]`, + `(SliceExpr X=a)`}, + {`a[::k]`, + `(SliceExpr X=a Step=k)`}, + {`[]`, + `(ListExpr)`}, + {`[1]`, + `(ListExpr List=(1))`}, + {`[1,]`, + `(ListExpr List=(1))`}, + {`[1, 2]`, + `(ListExpr List=(1 2))`}, + {`()`, + `(TupleExpr)`}, + {`(4,)`, + `(ParenExpr X=(TupleExpr List=(4)))`}, + {`(4)`, + `(ParenExpr X=4)`}, + {`(4, 5)`, + `(ParenExpr X=(TupleExpr List=(4 5)))`}, + {`1, 2, 3`, + `(TupleExpr List=(1 2 3))`}, + {`1, 2,`, + `unparenthesized tuple with trailing comma`}, + {`{}`, + `(DictExpr)`}, + {`{"a": 1}`, + `(DictExpr List=((DictEntry Key="a" Value=1)))`}, + {`{"a": 1,}`, + `(DictExpr List=((DictEntry Key="a" Value=1)))`}, + {`{"a": 1, "b": 2}`, + `(DictExpr List=((DictEntry Key="a" Value=1) (DictEntry Key="b" Value=2)))`}, + {`{x: y for (x, y) in z}`, + `(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=(ParenExpr X=(TupleExpr List=(x y))) X=z)))`}, + {`{x: y for a in b if c}`, + `(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=a X=b) (IfClause Cond=c)))`}, + {`-1 + +2`, + `(BinaryExpr X=(UnaryExpr Op=- X=1) Op=+ Y=(UnaryExpr Op=+ X=2))`}, + {`"foo" + "bar"`, + `(BinaryExpr X="foo" Op=+ Y="bar")`}, + {`-1 * 2`, // prec(unary -) > prec(binary *) + `(BinaryExpr X=(UnaryExpr Op=- X=1) Op=* Y=2)`}, + {`-x[i]`, // prec(unary -) < prec(x[i]) + `(UnaryExpr Op=- X=(IndexExpr X=x Y=i))`}, + {`a | b & c | d`, // prec(|) < prec(&) + `(BinaryExpr X=(BinaryExpr X=a Op=| Y=(BinaryExpr X=b Op=& Y=c)) Op=| Y=d)`}, + {`a or b and c or d`, + `(BinaryExpr X=(BinaryExpr X=a Op=or Y=(BinaryExpr X=b Op=and Y=c)) Op=or Y=d)`}, + {`a and b or c and d`, + `(BinaryExpr X=(BinaryExpr X=a Op=and Y=b) Op=or Y=(BinaryExpr X=c Op=and Y=d))`}, + {`f(1, x=y)`, + `(CallExpr Fn=f Args=(1 (BinaryExpr X=x Op== Y=y)))`}, + {`f(*args, **kwargs)`, + `(CallExpr Fn=f Args=((UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)))`}, + {`lambda *args, *, x=1, **kwargs: 0`, + `(LambdaExpr Params=((UnaryExpr Op=* X=args) (UnaryExpr Op=*) (BinaryExpr X=x Op== Y=1) (UnaryExpr Op=** X=kwargs)) Body=0)`}, + {`lambda *, a, *b: 0`, + `(LambdaExpr Params=((UnaryExpr Op=*) a (UnaryExpr Op=* X=b)) Body=0)`}, + {`a if b else c`, + `(CondExpr Cond=b True=a False=c)`}, + {`a and not b`, + `(BinaryExpr X=a Op=and Y=(UnaryExpr Op=not X=b))`}, + {`[e for x in y if cond1 if cond2]`, + `(Comprehension Body=e Clauses=((ForClause Vars=x X=y) (IfClause Cond=cond1) (IfClause Cond=cond2)))`}, // github.com/google/skylark/issues/53 + } { + e, err := syntax.ParseExpr("foo.star", test.input, 0) + var got string + if err != nil { + got = stripPos(err) + } else { + got = treeString(e) + } + if test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +func TestStmtParseTrees(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {`print(1)`, + `(ExprStmt X=(CallExpr Fn=print Args=(1)))`}, + {`return 1, 2`, + `(ReturnStmt Result=(TupleExpr List=(1 2)))`}, + {`return`, + `(ReturnStmt)`}, + {`for i in "abc": break`, + `(ForStmt Vars=i X="abc" Body=((BranchStmt Token=break)))`}, + {`for i in "abc": continue`, + `(ForStmt Vars=i X="abc" Body=((BranchStmt Token=continue)))`}, + {`for x, y in z: pass`, + `(ForStmt Vars=(TupleExpr List=(x y)) X=z Body=((BranchStmt Token=pass)))`}, + {`if True: pass`, + `(IfStmt Cond=True True=((BranchStmt Token=pass)))`}, + {`if True: break`, + `(IfStmt Cond=True True=((BranchStmt Token=break)))`}, + {`if True: continue`, + `(IfStmt Cond=True True=((BranchStmt Token=continue)))`}, + {`if True: pass +else: + pass`, + `(IfStmt Cond=True True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`}, + {"if a: pass\nelif b: pass\nelse: pass", + `(IfStmt Cond=a True=((BranchStmt Token=pass)) False=((IfStmt Cond=b True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))))`}, + {`x, y = 1, 2`, + `(AssignStmt Op== LHS=(TupleExpr List=(x y)) RHS=(TupleExpr List=(1 2)))`}, + {`x[i] = 1`, + `(AssignStmt Op== LHS=(IndexExpr X=x Y=i) RHS=1)`}, + {`x.f = 1`, + `(AssignStmt Op== LHS=(DotExpr X=x Name=f) RHS=1)`}, + {`(x, y) = 1`, + `(AssignStmt Op== LHS=(ParenExpr X=(TupleExpr List=(x y))) RHS=1)`}, + {`load("", "a", b="c")`, + `(LoadStmt Module="" From=(a c) To=(a b))`}, + {`if True: load("", "a", b="c")`, // load needn't be at toplevel + `(IfStmt Cond=True True=((LoadStmt Module="" From=(a c) To=(a b))))`}, + {`def f(x, *args, **kwargs): + pass`, + `(DefStmt Name=f Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=((BranchStmt Token=pass)))`}, + {`def f(**kwargs, *args): pass`, + `(DefStmt Name=f Params=((UnaryExpr Op=** X=kwargs) (UnaryExpr Op=* X=args)) Body=((BranchStmt Token=pass)))`}, + {`def f(a, b, c=d): pass`, + `(DefStmt Name=f Params=(a b (BinaryExpr X=c Op== Y=d)) Body=((BranchStmt Token=pass)))`}, + {`def f(a, b=c, d): pass`, + `(DefStmt Name=f Params=(a (BinaryExpr X=b Op== Y=c) d) Body=((BranchStmt Token=pass)))`}, // TODO(adonovan): fix this + {`def f(): + def g(): + pass + pass +def h(): + pass`, + `(DefStmt Name=f Body=((DefStmt Name=g Body=((BranchStmt Token=pass))) (BranchStmt Token=pass)))`}, + {"f();g()", + `(ExprStmt X=(CallExpr Fn=f))`}, + {"f();", + `(ExprStmt X=(CallExpr Fn=f))`}, + {"f();g()\n", + `(ExprStmt X=(CallExpr Fn=f))`}, + {"f();\n", + `(ExprStmt X=(CallExpr Fn=f))`}, + } { + f, err := syntax.Parse("foo.star", test.input, 0) + if err != nil { + t.Errorf("parse `%s` failed: %v", test.input, stripPos(err)) + continue + } + if got := treeString(f.Stmts[0]); test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +// TestFileParseTrees tests sequences of statements, and particularly +// handling of indentation, newlines, line continuations, and blank lines. +func TestFileParseTrees(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {`x = 1 +print(x)`, + `(AssignStmt Op== LHS=x RHS=1) +(ExprStmt X=(CallExpr Fn=print Args=(x)))`}, + {"if cond:\n\tpass", + `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`}, + {"if cond:\n\tpass\nelse:\n\tpass", + `(IfStmt Cond=cond True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`}, + {`def f(): + pass +pass + +pass`, + `(DefStmt Name=f Body=((BranchStmt Token=pass))) +(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {`pass; pass`, + `(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {"pass\npass", + `(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {"pass\n\npass", + `(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {`x = (1 + +2)`, + `(AssignStmt Op== LHS=x RHS=(ParenExpr X=(BinaryExpr X=1 Op=+ Y=2)))`}, + {`x = 1 \ ++ 2`, + `(AssignStmt Op== LHS=x RHS=(BinaryExpr X=1 Op=+ Y=2))`}, + } { + f, err := syntax.Parse("foo.star", test.input, 0) + if err != nil { + t.Errorf("parse `%s` failed: %v", test.input, stripPos(err)) + continue + } + var buf bytes.Buffer + for i, stmt := range f.Stmts { + if i > 0 { + buf.WriteByte('\n') + } + writeTree(&buf, reflect.ValueOf(stmt)) + } + if got := buf.String(); test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +// TestCompoundStmt tests handling of REPL-style compound statements. +func TestCompoundStmt(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + // blank lines + {"\n", + ``}, + {" \n", + ``}, + {"# comment\n", + ``}, + // simple statement + {"1\n", + `(ExprStmt X=1)`}, + {"print(1)\n", + `(ExprStmt X=(CallExpr Fn=print Args=(1)))`}, + {"1;2;3;\n", + `(ExprStmt X=1)(ExprStmt X=2)(ExprStmt X=3)`}, + {"f();g()\n", + `(ExprStmt X=(CallExpr Fn=f))(ExprStmt X=(CallExpr Fn=g))`}, + {"f();\n", + `(ExprStmt X=(CallExpr Fn=f))`}, + {"f(\n\n\n\n\n\n\n)\n", + `(ExprStmt X=(CallExpr Fn=f))`}, + // complex statements + {"def f():\n pass\n\n", + `(DefStmt Name=f Body=((BranchStmt Token=pass)))`}, + {"if cond:\n pass\n\n", + `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`}, + // Even as a 1-liner, the following blank line is required. + {"if cond: pass\n\n", + `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`}, + // github.com/google/starlark-go/issues/121 + {"a; b; c\n", + `(ExprStmt X=a)(ExprStmt X=b)(ExprStmt X=c)`}, + {"a; b c\n", + `invalid syntax`}, + } { + + // Fake readline input from string. + // The ! suffix, which would cause a parse error, + // tests that the parser doesn't read more than necessary. + sc := bufio.NewScanner(strings.NewReader(test.input + "!")) + readline := func() ([]byte, error) { + if sc.Scan() { + return []byte(sc.Text() + "\n"), nil + } + return nil, sc.Err() + } + + var got string + f, err := syntax.ParseCompoundStmt("foo.star", readline) + if err != nil { + got = stripPos(err) + } else { + for _, stmt := range f.Stmts { + got += treeString(stmt) + } + } + if test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +func stripPos(err error) string { + s := err.Error() + if i := strings.Index(s, ": "); i >= 0 { + s = s[i+len(": "):] // strip file:line:col + } + return s +} + +// treeString prints a syntax node as a parenthesized tree. +// Idents are printed as foo and Literals as "foo" or 42. +// Structs are printed as (type name=value ...). +// Only non-empty fields are shown. +func treeString(n syntax.Node) string { + var buf bytes.Buffer + writeTree(&buf, reflect.ValueOf(n)) + return buf.String() +} + +func writeTree(out *bytes.Buffer, x reflect.Value) { + switch x.Kind() { + case reflect.String, reflect.Int, reflect.Bool: + fmt.Fprintf(out, "%v", x.Interface()) + case reflect.Ptr, reflect.Interface: + if elem := x.Elem(); elem.Kind() == 0 { + out.WriteString("nil") + } else { + writeTree(out, elem) + } + case reflect.Struct: + switch v := x.Interface().(type) { + case syntax.Literal: + switch v.Token { + case syntax.STRING: + fmt.Fprintf(out, "%q", v.Value) + case syntax.BYTES: + fmt.Fprintf(out, "b%q", v.Value) + case syntax.INT: + fmt.Fprintf(out, "%d", v.Value) + } + return + case syntax.Ident: + out.WriteString(v.Name) + return + } + fmt.Fprintf(out, "(%s", strings.TrimPrefix(x.Type().String(), "syntax.")) + for i, n := 0, x.NumField(); i < n; i++ { + f := x.Field(i) + if f.Type() == reflect.TypeOf(syntax.Position{}) { + continue // skip positions + } + name := x.Type().Field(i).Name + if name == "commentsRef" { + continue // skip comments fields + } + if f.Type() == reflect.TypeOf(syntax.Token(0)) { + fmt.Fprintf(out, " %s=%s", name, f.Interface()) + continue + } + + switch f.Kind() { + case reflect.Slice: + if n := f.Len(); n > 0 { + fmt.Fprintf(out, " %s=(", name) + for i := 0; i < n; i++ { + if i > 0 { + out.WriteByte(' ') + } + writeTree(out, f.Index(i)) + } + out.WriteByte(')') + } + continue + case reflect.Ptr, reflect.Interface: + if f.IsNil() { + continue + } + case reflect.Int: + if f.Int() != 0 { + fmt.Fprintf(out, " %s=%d", name, f.Int()) + } + continue + case reflect.Bool: + if f.Bool() { + fmt.Fprintf(out, " %s", name) + } + continue + } + fmt.Fprintf(out, " %s=", name) + writeTree(out, f) + } + fmt.Fprintf(out, ")") + default: + fmt.Fprintf(out, "%T", x.Interface()) + } +} + +func TestParseErrors(t *testing.T) { + filename := starlarktest.DataFile("syntax", "testdata/errors.star") + for _, chunk := range chunkedfile.Read(filename, t) { + _, err := syntax.Parse(filename, chunk.Source, 0) + switch err := err.(type) { + case nil: + // ok + case syntax.Error: + chunk.GotError(int(err.Pos.Line), err.Msg) + default: + t.Error(err) + } + chunk.Done() + } +} + +func TestFilePortion(t *testing.T) { + // Imagine that the Starlark file or expression print(x.f) is extracted + // from the middle of a file in some hypothetical template language; + // see https://github.com/google/starlark-go/issues/346. For example: + // -- + // {{loop x seq}} + // {{print(x.f)}} + // {{end}} + // -- + fp := syntax.FilePortion{Content: []byte("print(x.f)"), FirstLine: 2, FirstCol: 4} + file, err := syntax.Parse("foo.template", fp, 0) + if err != nil { + t.Fatal(err) + } + span := fmt.Sprint(file.Stmts[0].Span()) + want := "foo.template:2:4 foo.template:2:14" + if span != want { + t.Errorf("wrong span: got %q, want %q", span, want) + } +} + +// dataFile is the same as starlarktest.DataFile. +// We make a copy to avoid a dependency cycle. +var dataFile = func(pkgdir, filename string) string { + return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) +} + +func BenchmarkParse(b *testing.B) { + filename := dataFile("syntax", "testdata/scan.star") + b.StopTimer() + data, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + _, err := syntax.Parse(filename, data, 0) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/syntax/quote.go b/syntax/quote.go new file mode 100644 index 0000000..741e106 --- /dev/null +++ b/syntax/quote.go @@ -0,0 +1,309 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// Starlark quoted string utilities. + +import ( + "fmt" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// unesc maps single-letter chars following \ to their actual values. +var unesc = [256]byte{ + 'a': '\a', + 'b': '\b', + 'f': '\f', + 'n': '\n', + 'r': '\r', + 't': '\t', + 'v': '\v', + '\\': '\\', + '\'': '\'', + '"': '"', +} + +// esc maps escape-worthy bytes to the char that should follow \. +var esc = [256]byte{ + '\a': 'a', + '\b': 'b', + '\f': 'f', + '\n': 'n', + '\r': 'r', + '\t': 't', + '\v': 'v', + '\\': '\\', + '\'': '\'', + '"': '"', +} + +// unquote unquotes the quoted string, returning the actual +// string value, whether the original was triple-quoted, +// whether it was a byte string, and an error describing invalid input. +func unquote(quoted string) (s string, triple, isByte bool, err error) { + // Check for raw prefix: means don't interpret the inner \. + raw := false + if strings.HasPrefix(quoted, "r") { + raw = true + quoted = quoted[1:] + } + // Check for bytes prefix. + if strings.HasPrefix(quoted, "b") { + isByte = true + quoted = quoted[1:] + } + + if len(quoted) < 2 { + err = fmt.Errorf("string literal too short") + return + } + + if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] { + err = fmt.Errorf("string literal has invalid quotes") + return + } + + // Check for triple quoted string. + quote := quoted[0] + if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] { + triple = true + quoted = quoted[3 : len(quoted)-3] + } else { + quoted = quoted[1 : len(quoted)-1] + } + + // Now quoted is the quoted data, but no quotes. + // If we're in raw mode or there are no escapes or + // carriage returns, we're done. + var unquoteChars string + if raw { + unquoteChars = "\r" + } else { + unquoteChars = "\\\r" + } + if !strings.ContainsAny(quoted, unquoteChars) { + s = quoted + return + } + + // Otherwise process quoted string. + // Each iteration processes one escape sequence along with the + // plain text leading up to it. + buf := new(strings.Builder) + for { + // Remove prefix before escape sequence. + i := strings.IndexAny(quoted, unquoteChars) + if i < 0 { + i = len(quoted) + } + buf.WriteString(quoted[:i]) + quoted = quoted[i:] + + if len(quoted) == 0 { + break + } + + // Process carriage return. + if quoted[0] == '\r' { + buf.WriteByte('\n') + if len(quoted) > 1 && quoted[1] == '\n' { + quoted = quoted[2:] + } else { + quoted = quoted[1:] + } + continue + } + + // Process escape sequence. + if len(quoted) == 1 { + err = fmt.Errorf(`truncated escape sequence \`) + return + } + + switch quoted[1] { + default: + // In Starlark, like Go, a backslash must escape something. + // (Python still treats unnecessary backslashes literally, + // but since 3.6 has emitted a deprecation warning.) + err = fmt.Errorf("invalid escape sequence \\%c", quoted[1]) + return + + case '\n': + // Ignore the escape and the line break. + quoted = quoted[2:] + + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"': + // One-char escape. + // Escapes are allowed for both kinds of quotation + // mark, not just the kind in use. + buf.WriteByte(unesc[quoted[1]]) + quoted = quoted[2:] + + case '0', '1', '2', '3', '4', '5', '6', '7': + // Octal escape, up to 3 digits, \OOO. + n := int(quoted[1] - '0') + quoted = quoted[2:] + for i := 1; i < 3; i++ { + if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] { + break + } + n = n*8 + int(quoted[0]-'0') + quoted = quoted[1:] + } + if !isByte && n > 127 { + err = fmt.Errorf(`non-ASCII octal escape \%o (use \u%04X for the UTF-8 encoding of U+%04X)`, n, n, n) + return + } + if n >= 256 { + // NOTE: Python silently discards the high bit, + // so that '\541' == '\141' == 'a'. + // Let's see if we can avoid doing that in BUILD files. + err = fmt.Errorf(`invalid escape sequence \%03o`, n) + return + } + buf.WriteByte(byte(n)) + + case 'x': + // Hexadecimal escape, exactly 2 digits, \xXX. [0-127] + if len(quoted) < 4 { + err = fmt.Errorf(`truncated escape sequence %s`, quoted) + return + } + n, err1 := strconv.ParseUint(quoted[2:4], 16, 0) + if err1 != nil { + err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4]) + return + } + if !isByte && n > 127 { + err = fmt.Errorf(`non-ASCII hex escape %s (use \u%04X for the UTF-8 encoding of U+%04X)`, + quoted[:4], n, n) + return + } + buf.WriteByte(byte(n)) + quoted = quoted[4:] + + case 'u', 'U': + // Unicode code point, 4 (\uXXXX) or 8 (\UXXXXXXXX) hex digits. + sz := 6 + if quoted[1] == 'U' { + sz = 10 + } + if len(quoted) < sz { + err = fmt.Errorf(`truncated escape sequence %s`, quoted) + return + } + n, err1 := strconv.ParseUint(quoted[2:sz], 16, 0) + if err1 != nil { + err = fmt.Errorf(`invalid escape sequence %s`, quoted[:sz]) + return + } + if n > unicode.MaxRune { + err = fmt.Errorf(`code point out of range: %s (max \U%08x)`, + quoted[:sz], n) + return + } + // As in Go, surrogates are disallowed. + if 0xD800 <= n && n < 0xE000 { + err = fmt.Errorf(`invalid Unicode code point U+%04X`, n) + return + } + buf.WriteRune(rune(n)) + quoted = quoted[sz:] + } + } + + s = buf.String() + return +} + +// indexByte returns the index of the first instance of b in s, or else -1. +func indexByte(s string, b byte) int { + for i := 0; i < len(s); i++ { + if s[i] == b { + return i + } + } + return -1 +} + +// Quote returns a Starlark literal that denotes s. +// If b, it returns a bytes literal. +func Quote(s string, b bool) string { + const hex = "0123456789abcdef" + var runeTmp [utf8.UTFMax]byte + + buf := make([]byte, 0, 3*len(s)/2) + if b { + buf = append(buf, 'b') + } + buf = append(buf, '"') + for width := 0; len(s) > 0; s = s[width:] { + r := rune(s[0]) + width = 1 + if r >= utf8.RuneSelf { + r, width = utf8.DecodeRuneInString(s) + } + if width == 1 && r == utf8.RuneError { + // String (!b) literals accept \xXX escapes only for ASCII, + // but we must use them here to represent invalid bytes. + // The result is not a legal literal. + buf = append(buf, `\x`...) + buf = append(buf, hex[s[0]>>4]) + buf = append(buf, hex[s[0]&0xF]) + continue + } + if r == '"' || r == '\\' { // always backslashed + buf = append(buf, '\\') + buf = append(buf, byte(r)) + continue + } + if strconv.IsPrint(r) { + n := utf8.EncodeRune(runeTmp[:], r) + buf = append(buf, runeTmp[:n]...) + continue + } + switch r { + case '\a': + buf = append(buf, `\a`...) + case '\b': + buf = append(buf, `\b`...) + case '\f': + buf = append(buf, `\f`...) + case '\n': + buf = append(buf, `\n`...) + case '\r': + buf = append(buf, `\r`...) + case '\t': + buf = append(buf, `\t`...) + case '\v': + buf = append(buf, `\v`...) + default: + switch { + case r < ' ' || r == 0x7f: + buf = append(buf, `\x`...) + buf = append(buf, hex[byte(r)>>4]) + buf = append(buf, hex[byte(r)&0xF]) + case r > utf8.MaxRune: + r = 0xFFFD + fallthrough + case r < 0x10000: + buf = append(buf, `\u`...) + for s := 12; s >= 0; s -= 4 { + buf = append(buf, hex[r>>uint(s)&0xF]) + } + default: + buf = append(buf, `\U`...) + for s := 28; s >= 0; s -= 4 { + buf = append(buf, hex[r>>uint(s)&0xF]) + } + } + } + } + buf = append(buf, '"') + return string(buf) +} diff --git a/syntax/quote_test.go b/syntax/quote_test.go new file mode 100644 index 0000000..be7498b --- /dev/null +++ b/syntax/quote_test.go @@ -0,0 +1,65 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "strings" + "testing" +) + +var quoteTests = []struct { + q string // quoted + s string // unquoted (actual string) + std bool // q is standard form for s +}{ + {`""`, "", true}, + {`''`, "", false}, + {`"hello"`, `hello`, true}, + {`'hello'`, `hello`, false}, + {`"quote\"here"`, `quote"here`, true}, + {`'quote"here'`, `quote"here`, false}, + {`"quote'here"`, `quote'here`, true}, + {`'quote\'here'`, `quote'here`, false}, + + {`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", true}, + {`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false}, + {`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false}, + {`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", true}, + {`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false}, + {`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false}, + {`"\a\b\f\n\r\t\v\x00\x7f\"\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"\\\x03", false}, + { + `"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ \x27\\1\x27,/g' >> $@; "`, + "cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ", + false, + }, + { + `"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; "`, + "cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ", + true, + }, +} + +func TestQuote(t *testing.T) { + for _, tt := range quoteTests { + if !tt.std { + continue + } + q := Quote(tt.s, false) + if q != tt.q { + t.Errorf("quote(%#q) = %s, want %s", tt.s, q, tt.q) + } + } +} + +func TestUnquote(t *testing.T) { + for _, tt := range quoteTests { + s, triple, _, err := unquote(tt.q) + wantTriple := strings.HasPrefix(tt.q, `"""`) || strings.HasPrefix(tt.q, `'''`) + if s != tt.s || triple != wantTriple || err != nil { + t.Errorf("unquote(%s) = %#q, %v, %v want %#q, %v, nil", tt.q, s, triple, err, tt.s, wantTriple) + } + } +} diff --git a/syntax/scan.go b/syntax/scan.go new file mode 100644 index 0000000..bb4165e --- /dev/null +++ b/syntax/scan.go @@ -0,0 +1,1123 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// A lexical scanner for Starlark. + +import ( + "fmt" + "io" + "io/ioutil" + "log" + "math/big" + "os" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A Token represents a Starlark lexical token. +type Token int8 + +const ( + ILLEGAL Token = iota + EOF + + NEWLINE + INDENT + OUTDENT + + // Tokens with values + IDENT // x + INT // 123 + FLOAT // 1.23e45 + STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo" + BYTES // b"foo", etc + + // Punctuation + PLUS // + + MINUS // - + STAR // * + SLASH // / + SLASHSLASH // // + PERCENT // % + AMP // & + PIPE // | + CIRCUMFLEX // ^ + LTLT // << + GTGT // >> + TILDE // ~ + DOT // . + COMMA // , + EQ // = + SEMI // ; + COLON // : + LPAREN // ( + RPAREN // ) + LBRACK // [ + RBRACK // ] + LBRACE // { + RBRACE // } + LT // < + GT // > + GE // >= + LE // <= + EQL // == + NEQ // != + PLUS_EQ // += (keep order consistent with PLUS..GTGT) + MINUS_EQ // -= + STAR_EQ // *= + SLASH_EQ // /= + SLASHSLASH_EQ // //= + PERCENT_EQ // %= + AMP_EQ // &= + PIPE_EQ // |= + CIRCUMFLEX_EQ // ^= + LTLT_EQ // <<= + GTGT_EQ // >>= + STARSTAR // ** + + // Keywords + AND + BREAK + CONTINUE + DEF + ELIF + ELSE + FOR + IF + IN + LAMBDA + LOAD + NOT + NOT_IN // synthesized by parser from NOT IN + OR + PASS + RETURN + WHILE + + maxToken +) + +func (tok Token) String() string { return tokenNames[tok] } + +// GoString is like String but quotes punctuation tokens. +// Use Sprintf("%#v", tok) when constructing error messages. +func (tok Token) GoString() string { + if tok >= PLUS && tok <= STARSTAR { + return "'" + tokenNames[tok] + "'" + } + return tokenNames[tok] +} + +var tokenNames = [...]string{ + ILLEGAL: "illegal token", + EOF: "end of file", + NEWLINE: "newline", + INDENT: "indent", + OUTDENT: "outdent", + IDENT: "identifier", + INT: "int literal", + FLOAT: "float literal", + STRING: "string literal", + PLUS: "+", + MINUS: "-", + STAR: "*", + SLASH: "/", + SLASHSLASH: "//", + PERCENT: "%", + AMP: "&", + PIPE: "|", + CIRCUMFLEX: "^", + LTLT: "<<", + GTGT: ">>", + TILDE: "~", + DOT: ".", + COMMA: ",", + EQ: "=", + SEMI: ";", + COLON: ":", + LPAREN: "(", + RPAREN: ")", + LBRACK: "[", + RBRACK: "]", + LBRACE: "{", + RBRACE: "}", + LT: "<", + GT: ">", + GE: ">=", + LE: "<=", + EQL: "==", + NEQ: "!=", + PLUS_EQ: "+=", + MINUS_EQ: "-=", + STAR_EQ: "*=", + SLASH_EQ: "/=", + SLASHSLASH_EQ: "//=", + PERCENT_EQ: "%=", + AMP_EQ: "&=", + PIPE_EQ: "|=", + CIRCUMFLEX_EQ: "^=", + LTLT_EQ: "<<=", + GTGT_EQ: ">>=", + STARSTAR: "**", + AND: "and", + BREAK: "break", + CONTINUE: "continue", + DEF: "def", + ELIF: "elif", + ELSE: "else", + FOR: "for", + IF: "if", + IN: "in", + LAMBDA: "lambda", + LOAD: "load", + NOT: "not", + NOT_IN: "not in", + OR: "or", + PASS: "pass", + RETURN: "return", + WHILE: "while", +} + +// A FilePortion describes the content of a portion of a file. +// Callers may provide a FilePortion for the src argument of Parse +// when the desired initial line and column numbers are not (1, 1), +// such as when an expression is parsed from within larger file. +type FilePortion struct { + Content []byte + FirstLine, FirstCol int32 +} + +// A Position describes the location of a rune of input. +type Position struct { + file *string // filename (indirect for compactness) + Line int32 // 1-based line number; 0 if line unknown + Col int32 // 1-based column (rune) number; 0 if column unknown +} + +// IsValid reports whether the position is valid. +func (p Position) IsValid() bool { return p.file != nil } + +// Filename returns the name of the file containing this position. +func (p Position) Filename() string { + if p.file != nil { + return *p.file + } + return "<invalid>" +} + +// MakePosition returns position with the specified components. +func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} } + +// add returns the position at the end of s, assuming it starts at p. +func (p Position) add(s string) Position { + if n := strings.Count(s, "\n"); n > 0 { + p.Line += int32(n) + s = s[strings.LastIndex(s, "\n")+1:] + p.Col = 1 + } + p.Col += int32(utf8.RuneCountInString(s)) + return p +} + +func (p Position) String() string { + file := p.Filename() + if p.Line > 0 { + if p.Col > 0 { + return fmt.Sprintf("%s:%d:%d", file, p.Line, p.Col) + } + return fmt.Sprintf("%s:%d", file, p.Line) + } + return file +} + +func (p Position) isBefore(q Position) bool { + if p.Line != q.Line { + return p.Line < q.Line + } + return p.Col < q.Col +} + +// An scanner represents a single input file being parsed. +type scanner struct { + rest []byte // rest of input (in REPL, a line of input) + token []byte // token being scanned + pos Position // current input position + depth int // nesting of [ ] { } ( ) + indentstk []int // stack of indentation levels + dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return + lineStart bool // after NEWLINE; convert spaces to indentation tokens + keepComments bool // accumulate comments in slice + lineComments []Comment // list of full line comments (if keepComments) + suffixComments []Comment // list of suffix comments (if keepComments) + + readline func() ([]byte, error) // read next line of input (REPL only) +} + +func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) { + var firstLine, firstCol int32 = 1, 1 + if portion, ok := src.(FilePortion); ok { + firstLine, firstCol = portion.FirstLine, portion.FirstCol + } + sc := &scanner{ + pos: MakePosition(&filename, firstLine, firstCol), + indentstk: make([]int, 1, 10), // []int{0} + spare capacity + lineStart: true, + keepComments: keepComments, + } + sc.readline, _ = src.(func() ([]byte, error)) // ParseCompoundStmt (REPL) only + if sc.readline == nil { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + sc.rest = data + } + return sc, nil +} + +func readSource(filename string, src interface{}) ([]byte, error) { + switch src := src.(type) { + case string: + return []byte(src), nil + case []byte: + return src, nil + case io.Reader: + data, err := ioutil.ReadAll(src) + if err != nil { + err = &os.PathError{Op: "read", Path: filename, Err: err} + return nil, err + } + return data, nil + case FilePortion: + return src.Content, nil + case nil: + return ioutil.ReadFile(filename) + default: + return nil, fmt.Errorf("invalid source: %T", src) + } +} + +// An Error describes the nature and position of a scanner or parser error. +type Error struct { + Pos Position + Msg string +} + +func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg } + +// errorf is called to report an error. +// errorf does not return: it panics. +func (sc *scanner) error(pos Position, s string) { + panic(Error{pos, s}) +} + +func (sc *scanner) errorf(pos Position, format string, args ...interface{}) { + sc.error(pos, fmt.Sprintf(format, args...)) +} + +func (sc *scanner) recover(err *error) { + // The scanner and parser panic both for routine errors like + // syntax errors and for programmer bugs like array index + // errors. Turn both into error returns. Catching bug panics + // is especially important when processing many files. + switch e := recover().(type) { + case nil: + // no panic + case Error: + *err = e + default: + *err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)} + if debug { + log.Fatal(*err) + } + } +} + +// eof reports whether the input has reached end of file. +func (sc *scanner) eof() bool { + return len(sc.rest) == 0 && !sc.readLine() +} + +// readLine attempts to read another line of input. +// Precondition: len(sc.rest)==0. +func (sc *scanner) readLine() bool { + if sc.readline != nil { + var err error + sc.rest, err = sc.readline() + if err != nil { + sc.errorf(sc.pos, "%v", err) // EOF or ErrInterrupt + } + return len(sc.rest) > 0 + } + return false +} + +// peekRune returns the next rune in the input without consuming it. +// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. +func (sc *scanner) peekRune() rune { + // TODO(adonovan): opt: measure and perhaps inline eof. + if sc.eof() { + return 0 + } + + // fast path: ASCII + if b := sc.rest[0]; b < utf8.RuneSelf { + if b == '\r' { + return '\n' + } + return rune(b) + } + + r, _ := utf8.DecodeRune(sc.rest) + return r +} + +// readRune consumes and returns the next rune in the input. +// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. +func (sc *scanner) readRune() rune { + // eof() has been inlined here, both to avoid a call + // and to establish len(rest)>0 to avoid a bounds check. + if len(sc.rest) == 0 { + if !sc.readLine() { + sc.error(sc.pos, "internal scanner error: readRune at EOF") + } + // Redundant, but eliminates the bounds-check below. + if len(sc.rest) == 0 { + return 0 + } + } + + // fast path: ASCII + if b := sc.rest[0]; b < utf8.RuneSelf { + r := rune(b) + sc.rest = sc.rest[1:] + if r == '\r' { + if len(sc.rest) > 0 && sc.rest[0] == '\n' { + sc.rest = sc.rest[1:] + } + r = '\n' + } + if r == '\n' { + sc.pos.Line++ + sc.pos.Col = 1 + } else { + sc.pos.Col++ + } + return r + } + + r, size := utf8.DecodeRune(sc.rest) + sc.rest = sc.rest[size:] + sc.pos.Col++ + return r +} + +// tokenValue records the position and value associated with each token. +type tokenValue struct { + raw string // raw text of token + int int64 // decoded int + bigInt *big.Int // decoded integers > int64 + float float64 // decoded float + string string // decoded string or bytes + pos Position // start position of token +} + +// startToken marks the beginning of the next input token. +// It must be followed by a call to endToken once the token has +// been consumed using readRune. +func (sc *scanner) startToken(val *tokenValue) { + sc.token = sc.rest + val.raw = "" + val.pos = sc.pos +} + +// endToken marks the end of an input token. +// It records the actual token string in val.raw if the caller +// has not done that already. +func (sc *scanner) endToken(val *tokenValue) { + if val.raw == "" { + val.raw = string(sc.token[:len(sc.token)-len(sc.rest)]) + } +} + +// nextToken is called by the parser to obtain the next input token. +// It returns the token value and sets val to the data associated with +// the token. +// +// For all our input tokens, the associated data is val.pos (the +// position where the token begins), val.raw (the input string +// corresponding to the token). For string and int tokens, the string +// and int fields additionally contain the token's interpreted value. +func (sc *scanner) nextToken(val *tokenValue) Token { + + // The following distribution of tokens guides case ordering: + // + // COMMA 27 % + // STRING 23 % + // IDENT 15 % + // EQL 11 % + // LBRACK 5.5 % + // RBRACK 5.5 % + // NEWLINE 3 % + // LPAREN 2.9 % + // RPAREN 2.9 % + // INT 2 % + // others < 1 % + // + // Although NEWLINE tokens are infrequent, and lineStart is + // usually (~97%) false on entry, skipped newlines account for + // about 50% of all iterations of the 'start' loop. + +start: + var c rune + + // Deal with leading spaces and indentation. + blank := false + savedLineStart := sc.lineStart + if sc.lineStart { + sc.lineStart = false + col := 0 + for { + c = sc.peekRune() + if c == ' ' { + col++ + sc.readRune() + } else if c == '\t' { + const tab = 8 + col += int(tab - (sc.pos.Col-1)%tab) + sc.readRune() + } else { + break + } + } + + // The third clause matches EOF. + if c == '#' || c == '\n' || c == 0 { + blank = true + } + + // Compute indentation level for non-blank lines not + // inside an expression. This is not the common case. + if !blank && sc.depth == 0 { + cur := sc.indentstk[len(sc.indentstk)-1] + if col > cur { + // indent + sc.dents++ + sc.indentstk = append(sc.indentstk, col) + } else if col < cur { + // outdent(s) + for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] { + sc.dents-- + sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop + } + if col != sc.indentstk[len(sc.indentstk)-1] { + sc.error(sc.pos, "unindent does not match any outer indentation level") + } + } + } + } + + // Return saved indentation tokens. + if sc.dents != 0 { + sc.startToken(val) + sc.endToken(val) + if sc.dents < 0 { + sc.dents++ + return OUTDENT + } else { + sc.dents-- + return INDENT + } + } + + // start of line proper + c = sc.peekRune() + + // Skip spaces. + for c == ' ' || c == '\t' { + sc.readRune() + c = sc.peekRune() + } + + // comment + if c == '#' { + if sc.keepComments { + sc.startToken(val) + } + // Consume up to newline (included). + for c != 0 && c != '\n' { + sc.readRune() + c = sc.peekRune() + } + if sc.keepComments { + sc.endToken(val) + if blank { + sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw}) + } else { + sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw}) + } + } + } + + // newline + if c == '\n' { + sc.lineStart = true + + // Ignore newlines within expressions (common case). + if sc.depth > 0 { + sc.readRune() + goto start + } + + // Ignore blank lines, except in the REPL, + // where they emit OUTDENTs and NEWLINE. + if blank { + if sc.readline == nil { + sc.readRune() + goto start + } else if len(sc.indentstk) > 1 { + sc.dents = 1 - len(sc.indentstk) + sc.indentstk = sc.indentstk[:1] + goto start + } + } + + // At top-level (not in an expression). + sc.startToken(val) + sc.readRune() + val.raw = "\n" + return NEWLINE + } + + // end of file + if c == 0 { + // Emit OUTDENTs for unfinished indentation, + // preceded by a NEWLINE if we haven't just emitted one. + if len(sc.indentstk) > 1 { + if savedLineStart { + sc.dents = 1 - len(sc.indentstk) + sc.indentstk = sc.indentstk[:1] + goto start + } else { + sc.lineStart = true + sc.startToken(val) + val.raw = "\n" + return NEWLINE + } + } + + sc.startToken(val) + sc.endToken(val) + return EOF + } + + // line continuation + if c == '\\' { + sc.readRune() + if sc.peekRune() != '\n' { + sc.errorf(sc.pos, "stray backslash in program") + } + sc.readRune() + goto start + } + + // start of the next token + sc.startToken(val) + + // comma (common case) + if c == ',' { + sc.readRune() + sc.endToken(val) + return COMMA + } + + // string literal + if c == '"' || c == '\'' { + return sc.scanString(val, c) + } + + // identifier or keyword + if isIdentStart(c) { + if (c == 'r' || c == 'b') && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') { + // r"..." + // b"..." + sc.readRune() + c = sc.peekRune() + return sc.scanString(val, c) + } else if c == 'r' && len(sc.rest) > 2 && sc.rest[1] == 'b' && (sc.rest[2] == '"' || sc.rest[2] == '\'') { + // rb"..." + sc.readRune() + sc.readRune() + c = sc.peekRune() + return sc.scanString(val, c) + } + + for isIdent(c) { + sc.readRune() + c = sc.peekRune() + } + sc.endToken(val) + if k, ok := keywordToken[val.raw]; ok { + return k + } + + return IDENT + } + + // brackets + switch c { + case '[', '(', '{': + sc.depth++ + sc.readRune() + sc.endToken(val) + switch c { + case '[': + return LBRACK + case '(': + return LPAREN + case '{': + return LBRACE + } + panic("unreachable") + + case ']', ')', '}': + if sc.depth == 0 { + sc.errorf(sc.pos, "unexpected %q", c) + } else { + sc.depth-- + } + sc.readRune() + sc.endToken(val) + switch c { + case ']': + return RBRACK + case ')': + return RPAREN + case '}': + return RBRACE + } + panic("unreachable") + } + + // int or float literal, or period + if isdigit(c) || c == '.' { + return sc.scanNumber(val, c) + } + + // other punctuation + defer sc.endToken(val) + switch c { + case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^': // possibly followed by '=' + start := sc.pos + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + switch c { + case '<': + return LE + case '>': + return GE + case '=': + return EQL + case '!': + return NEQ + case '+': + return PLUS_EQ + case '-': + return MINUS_EQ + case '/': + return SLASH_EQ + case '%': + return PERCENT_EQ + case '&': + return AMP_EQ + case '|': + return PIPE_EQ + case '^': + return CIRCUMFLEX_EQ + } + } + switch c { + case '=': + return EQ + case '<': + if sc.peekRune() == '<' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return LTLT_EQ + } else { + return LTLT + } + } + return LT + case '>': + if sc.peekRune() == '>' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return GTGT_EQ + } else { + return GTGT + } + } + return GT + case '!': + sc.error(start, "unexpected input character '!'") + case '+': + return PLUS + case '-': + return MINUS + case '/': + if sc.peekRune() == '/' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return SLASHSLASH_EQ + } else { + return SLASHSLASH + } + } + return SLASH + case '%': + return PERCENT + case '&': + return AMP + case '|': + return PIPE + case '^': + return CIRCUMFLEX + } + panic("unreachable") + + case ':', ';', '~': // single-char tokens (except comma) + sc.readRune() + switch c { + case ':': + return COLON + case ';': + return SEMI + case '~': + return TILDE + } + panic("unreachable") + + case '*': // possibly followed by '*' or '=' + sc.readRune() + switch sc.peekRune() { + case '*': + sc.readRune() + return STARSTAR + case '=': + sc.readRune() + return STAR_EQ + } + return STAR + } + + sc.errorf(sc.pos, "unexpected input character %#q", c) + panic("unreachable") +} + +func (sc *scanner) scanString(val *tokenValue, quote rune) Token { + start := sc.pos + triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote) + sc.readRune() + + // String literals may contain escaped or unescaped newlines, + // causing them to span multiple lines (gulps) of REPL input; + // they are the only such token. Thus we cannot call endToken, + // as it assumes sc.rest is unchanged since startToken. + // Instead, buffer the token here. + // TODO(adonovan): opt: buffer only if we encounter a newline. + raw := new(strings.Builder) + + // Copy the prefix, e.g. r' or " (see startToken). + raw.Write(sc.token[:len(sc.token)-len(sc.rest)]) + + if !triple { + // single-quoted string literal + for { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c := sc.readRune() + raw.WriteRune(c) + if c == quote { + break + } + if c == '\n' { + sc.error(val.pos, "unexpected newline in string") + } + if c == '\\' { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c = sc.readRune() + raw.WriteRune(c) + } + } + } else { + // triple-quoted string literal + sc.readRune() + raw.WriteRune(quote) + sc.readRune() + raw.WriteRune(quote) + + quoteCount := 0 + for { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c := sc.readRune() + raw.WriteRune(c) + if c == quote { + quoteCount++ + if quoteCount == 3 { + break + } + } else { + quoteCount = 0 + } + if c == '\\' { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c = sc.readRune() + raw.WriteRune(c) + } + } + } + val.raw = raw.String() + + s, _, isByte, err := unquote(val.raw) + if err != nil { + sc.error(start, err.Error()) + } + val.string = s + if isByte { + return BYTES + } else { + return STRING + } +} + +func (sc *scanner) scanNumber(val *tokenValue, c rune) Token { + // https://github.com/google/starlark-go/blob/master/doc/spec.md#lexical-elements + // + // Python features not supported: + // - integer literals of >64 bits of precision + // - 123L or 123l long suffix + // - traditional octal: 0755 + // https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals + + start := sc.pos + fraction, exponent := false, false + + if c == '.' { + // dot or start of fraction + sc.readRune() + c = sc.peekRune() + if !isdigit(c) { + sc.endToken(val) + return DOT + } + fraction = true + } else if c == '0' { + // hex, octal, binary or float + sc.readRune() + c = sc.peekRune() + + if c == '.' { + fraction = true + } else if c == 'x' || c == 'X' { + // hex + sc.readRune() + c = sc.peekRune() + if !isxdigit(c) { + sc.error(start, "invalid hex literal") + } + for isxdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else if c == 'o' || c == 'O' { + // octal + sc.readRune() + c = sc.peekRune() + if !isodigit(c) { + sc.error(sc.pos, "invalid octal literal") + } + for isodigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else if c == 'b' || c == 'B' { + // binary + sc.readRune() + c = sc.peekRune() + if !isbdigit(c) { + sc.error(sc.pos, "invalid binary literal") + } + for isbdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else { + // float (or obsolete octal "0755") + allzeros, octal := true, true + for isdigit(c) { + if c != '0' { + allzeros = false + } + if c > '7' { + octal = false + } + sc.readRune() + c = sc.peekRune() + } + if c == '.' { + fraction = true + } else if c == 'e' || c == 'E' { + exponent = true + } else if octal && !allzeros { + sc.endToken(val) + sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:]) + } + } + } else { + // decimal + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + + if c == '.' { + fraction = true + } else if c == 'e' || c == 'E' { + exponent = true + } + } + + if fraction { + sc.readRune() // consume '.' + c = sc.peekRune() + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + + if c == 'e' || c == 'E' { + exponent = true + } + } + + if exponent { + sc.readRune() // consume [eE] + c = sc.peekRune() + if c == '+' || c == '-' { + sc.readRune() + c = sc.peekRune() + if !isdigit(c) { + sc.error(sc.pos, "invalid float literal") + } + } + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } + + sc.endToken(val) + if fraction || exponent { + var err error + val.float, err = strconv.ParseFloat(val.raw, 64) + if err != nil { + sc.error(sc.pos, "invalid float literal") + } + return FLOAT + } else { + var err error + s := val.raw + val.bigInt = nil + if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') { + val.int, err = strconv.ParseInt(s[2:], 8, 64) + } else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') { + val.int, err = strconv.ParseInt(s[2:], 2, 64) + } else { + val.int, err = strconv.ParseInt(s, 0, 64) + if err != nil { + num := new(big.Int) + var ok bool + val.bigInt, ok = num.SetString(s, 0) + if ok { + err = nil + } + } + } + if err != nil { + sc.error(start, "invalid int literal") + } + return INT + } +} + +// isIdent reports whether c is an identifier rune. +func isIdent(c rune) bool { + return isdigit(c) || isIdentStart(c) +} + +func isIdentStart(c rune) bool { + return 'a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z' || + c == '_' || + unicode.IsLetter(c) +} + +func isdigit(c rune) bool { return '0' <= c && c <= '9' } +func isodigit(c rune) bool { return '0' <= c && c <= '7' } +func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' } +func isbdigit(c rune) bool { return '0' == c || c == '1' } + +// keywordToken records the special tokens for +// strings that should not be treated as ordinary identifiers. +var keywordToken = map[string]Token{ + "and": AND, + "break": BREAK, + "continue": CONTINUE, + "def": DEF, + "elif": ELIF, + "else": ELSE, + "for": FOR, + "if": IF, + "in": IN, + "lambda": LAMBDA, + "load": LOAD, + "not": NOT, + "or": OR, + "pass": PASS, + "return": RETURN, + "while": WHILE, + + // reserved words: + "as": ILLEGAL, + // "assert": ILLEGAL, // heavily used by our tests + "class": ILLEGAL, + "del": ILLEGAL, + "except": ILLEGAL, + "finally": ILLEGAL, + "from": ILLEGAL, + "global": ILLEGAL, + "import": ILLEGAL, + "is": ILLEGAL, + "nonlocal": ILLEGAL, + "raise": ILLEGAL, + "try": ILLEGAL, + "with": ILLEGAL, + "yield": ILLEGAL, +} diff --git a/syntax/scan_test.go b/syntax/scan_test.go new file mode 100644 index 0000000..9582bd7 --- /dev/null +++ b/syntax/scan_test.go @@ -0,0 +1,310 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "bytes" + "fmt" + "go/build" + "io/ioutil" + "path/filepath" + "strings" + "testing" +) + +func scan(src interface{}) (tokens string, err error) { + sc, err := newScanner("foo.star", src, false) + if err != nil { + return "", err + } + + defer sc.recover(&err) + + var buf bytes.Buffer + var val tokenValue + for { + tok := sc.nextToken(&val) + + if buf.Len() > 0 { + buf.WriteByte(' ') + } + switch tok { + case EOF: + buf.WriteString("EOF") + case IDENT: + buf.WriteString(val.raw) + case INT: + if val.bigInt != nil { + fmt.Fprintf(&buf, "%d", val.bigInt) + } else { + fmt.Fprintf(&buf, "%d", val.int) + } + case FLOAT: + fmt.Fprintf(&buf, "%e", val.float) + case STRING, BYTES: + buf.WriteString(Quote(val.string, tok == BYTES)) + default: + buf.WriteString(tok.String()) + } + if tok == EOF { + break + } + } + return buf.String(), nil +} + +func TestScanner(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {``, "EOF"}, + {`123`, "123 EOF"}, + {`x.y`, "x . y EOF"}, + {`chocolate.éclair`, `chocolate . éclair EOF`}, + {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, + {`print(x)`, "print ( x ) EOF"}, + {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, + {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token + {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, + {`# hello +print(x)`, "print ( x ) EOF"}, + {`# hello +print(1) +cc_binary(name="foo") +def f(x): + return x+1 +print(1) +`, + `print ( 1 ) newline ` + + `cc_binary ( name = "foo" ) newline ` + + `def f ( x ) : newline ` + + `indent return x + 1 newline ` + + `outdent print ( 1 ) newline ` + + `EOF`}, + // EOF should act line an implicit newline. + {`def f(): pass`, + "def f ( ) : pass EOF"}, + {`def f(): + pass`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass +# oops`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass \ +`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass +`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`pass + + +pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated + {`def f(): + pass + `, "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass + ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, + {"pass", "pass EOF"}, + {"pass\n", "pass newline EOF"}, + {"pass\n ", "pass newline EOF"}, + {"pass\n \n", "pass newline EOF"}, + {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, + {`x = 1 + \ +2`, `x = 1 + 2 EOF`}, + {`x = 'a\nb'`, `x = "a\nb" EOF`}, + {`x = r'a\nb'`, `x = "a\\nb" EOF`}, + {"x = 'a\\\nb'", `x = "ab" EOF`}, + {`x = '\''`, `x = "'" EOF`}, + {`x = "\""`, `x = "\"" EOF`}, + {`x = r'\''`, `x = "\\'" EOF`}, + {`x = '''\''''`, `x = "'" EOF`}, + {`x = r'''\''''`, `x = "\\'" EOF`}, + {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, + {"x = '''a\nb'''", `x = "a\nb" EOF`}, + {"x = '''a\rb'''", `x = "a\nb" EOF`}, + {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, + {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, + {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, + {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, + {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, + {"a\rb", `a newline b EOF`}, + {"a\nb", `a newline b EOF`}, + {"a\r\nb", `a newline b EOF`}, + {"a\n\nb", `a newline b EOF`}, + // numbers + {"0", `0 EOF`}, + {"00", `0 EOF`}, + {"0.", `0.000000e+00 EOF`}, + {"0.e1", `0.000000e+00 EOF`}, + {".0", `0.000000e+00 EOF`}, + {"0.0", `0.000000e+00 EOF`}, + {".e1", `. e1 EOF`}, + {"1", `1 EOF`}, + {"1.", `1.000000e+00 EOF`}, + {".1", `1.000000e-01 EOF`}, + {".1e1", `1.000000e+00 EOF`}, + {".1e+1", `1.000000e+00 EOF`}, + {".1e-1", `1.000000e-02 EOF`}, + {"1e1", `1.000000e+01 EOF`}, + {"1e+1", `1.000000e+01 EOF`}, + {"1e-1", `1.000000e-01 EOF`}, + {"123", `123 EOF`}, + {"123e45", `1.230000e+47 EOF`}, + {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, + {"12345678901234567890", `12345678901234567890 EOF`}, + // hex + {"0xA", `10 EOF`}, + {"0xAAG", `170 G EOF`}, + {"0xG", `foo.star:1:1: invalid hex literal`}, + {"0XA", `10 EOF`}, + {"0XG", `foo.star:1:1: invalid hex literal`}, + {"0xA.", `10 . EOF`}, + {"0xA.e1", `10 . e1 EOF`}, + {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, + // binary + {"0b1010", `10 EOF`}, + {"0B111101", `61 EOF`}, + {"0b3", `foo.star:1:3: invalid binary literal`}, + {"0b1010201", `10 201 EOF`}, + {"0b1010.01", `10 1.000000e-02 EOF`}, + {"0b0000", `0 EOF`}, + // octal + {"0o123", `83 EOF`}, + {"0o12834", `10 834 EOF`}, + {"0o12934", `10 934 EOF`}, + {"0o12934.", `10 9.340000e+02 EOF`}, + {"0o12934.1", `10 9.341000e+02 EOF`}, + {"0o12934e1", `10 9.340000e+03 EOF`}, + {"0o123.", `83 . EOF`}, + {"0o123.1", `83 1.000000e-01 EOF`}, + {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`}, + {"012834", `foo.star:1:1: invalid int literal`}, + {"012934", `foo.star:1:1: invalid int literal`}, + {"i = 012934", `foo.star:1:5: invalid int literal`}, + // octal escapes in string literals + {`"\037"`, `"\x1f" EOF`}, + {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`}, + {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' + {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3 + // hex escapes + {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable + {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`}, + {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`}, + {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`}, + {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`}, + {`"\x"`, `foo.star:1:1: truncated escape sequence \x`}, + {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`}, + // Unicode escapes + // \uXXXX + {`"\u0400"`, `"Ѐ" EOF`}, + {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`}, + {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' + {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`}, + {`"\u4E16"`, `"世" EOF`}, + {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate + // \UXXXXXXXX + {`"\U00000400"`, `"Ѐ" EOF`}, + {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`}, + {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' + {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`}, + {`"\U0010FFFF"`, `"\U0010ffff" EOF`}, + {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`}, + {`"\U0001F63F"`, `"😿" EOF`}, + {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate + + // backslash escapes + // As in Go, a backslash must escape something. + // (Python started issuing a deprecation warning in 3.6.) + {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`}, + {`"\+"`, `foo.star:1:1: invalid escape sequence \+`}, + {`"\w"`, `foo.star:1:1: invalid escape sequence \w`}, + {`"\""`, `"\"" EOF`}, + {`"\'"`, `"'" EOF`}, + {`'\w'`, `foo.star:1:1: invalid escape sequence \w`}, + {`'\''`, `"'" EOF`}, + {`'\"'`, `"\"" EOF`}, + {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`}, + {`"""\""""`, `"\"" EOF`}, + {`"""\'"""`, `"'" EOF`}, + {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`}, + {`'''\''''`, `"'" EOF`}, + {`'''\"'''`, `"\"" EOF`}, + {`r"\w"`, `"\\w" EOF`}, + {`r"\""`, `"\\\"" EOF`}, + {`r"\'"`, `"\\'" EOF`}, + {`r'\w'`, `"\\w" EOF`}, + {`r'\''`, `"\\'" EOF`}, + {`r'\"'`, `"\\\"" EOF`}, + {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`}, + {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`}, + // bytes literals (where they differ from text strings) + {`b"AЀ世😿"`, `b"AЀ世😿`}, // 1-4 byte encodings, literal + {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`}, // same, as escapes + {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII + {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`}, + {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string) + // floats starting with octal digits + {"012934.", `1.293400e+04 EOF`}, + {"012934.1", `1.293410e+04 EOF`}, + {"012934e1", `1.293400e+05 EOF`}, + {"0123.", `1.230000e+02 EOF`}, + {"0123.1", `1.231000e+02 EOF`}, + // github.com/google/skylark/issues/16 + {"x ! 0", "foo.star:1:3: unexpected input character '!'"}, + // github.com/google/starlark-go/issues/80 + {"([{<>}])", "( [ { < > } ] ) EOF"}, + {"f();", "f ( ) ; EOF"}, + // github.com/google/starlark-go/issues/104 + {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`}, + {`while cond: pass`, "while cond : pass EOF"}, + // github.com/google/starlark-go/issues/107 + {"~= ~= 5", "~ = ~ = 5 EOF"}, + {"0in", "0 in EOF"}, + {"0or", "foo.star:1:3: invalid octal literal"}, + {"6in", "6 in EOF"}, + {"6or", "6 or EOF"}, + } { + got, err := scan(test.input) + if err != nil { + got = err.(Error).Error() + } + // Prefix match allows us to truncate errors in expecations. + // Success cases all end in EOF. + if !strings.HasPrefix(got, test.want) { + t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) + } + } +} + +// dataFile is the same as starlarktest.DataFile. +// We make a copy to avoid a dependency cycle. +var dataFile = func(pkgdir, filename string) string { + return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) +} + +func BenchmarkScan(b *testing.B) { + filename := dataFile("syntax", "testdata/scan.star") + b.StopTimer() + data, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + sc, err := newScanner(filename, data, false) + if err != nil { + b.Fatal(err) + } + var val tokenValue + for sc.nextToken(&val) != EOF { + } + } +} diff --git a/syntax/syntax.go b/syntax/syntax.go new file mode 100644 index 0000000..20b28bb --- /dev/null +++ b/syntax/syntax.go @@ -0,0 +1,529 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package syntax provides a Starlark parser and abstract syntax tree. +package syntax // import "go.starlark.net/syntax" + +// A Node is a node in a Starlark syntax tree. +type Node interface { + // Span returns the start and end position of the expression. + Span() (start, end Position) + + // Comments returns the comments associated with this node. + // It returns nil if RetainComments was not specified during parsing, + // or if AllocComments was not called. + Comments() *Comments + + // AllocComments allocates a new Comments node if there was none. + // This makes possible to add new comments using Comments() method. + AllocComments() +} + +// A Comment represents a single # comment. +type Comment struct { + Start Position + Text string // without trailing newline +} + +// Comments collects the comments associated with an expression. +type Comments struct { + Before []Comment // whole-line comments before this expression + Suffix []Comment // end-of-line comments after this expression (up to 1) + + // For top-level expressions only, After lists whole-line + // comments following the expression. + After []Comment +} + +// A commentsRef is a possibly-nil reference to a set of comments. +// A commentsRef is embedded in each type of syntax node, +// and provides its Comments and AllocComments methods. +type commentsRef struct{ ref *Comments } + +// Comments returns the comments associated with a syntax node, +// or nil if AllocComments has not yet been called. +func (cr commentsRef) Comments() *Comments { return cr.ref } + +// AllocComments enables comments to be associated with a syntax node. +func (cr *commentsRef) AllocComments() { + if cr.ref == nil { + cr.ref = new(Comments) + } +} + +// Start returns the start position of the expression. +func Start(n Node) Position { + start, _ := n.Span() + return start +} + +// End returns the end position of the expression. +func End(n Node) Position { + _, end := n.Span() + return end +} + +// A File represents a Starlark file. +type File struct { + commentsRef + Path string + Stmts []Stmt + + Module interface{} // a *resolve.Module, set by resolver +} + +func (x *File) Span() (start, end Position) { + if len(x.Stmts) == 0 { + return + } + start, _ = x.Stmts[0].Span() + _, end = x.Stmts[len(x.Stmts)-1].Span() + return start, end +} + +// A Stmt is a Starlark statement. +type Stmt interface { + Node + stmt() +} + +func (*AssignStmt) stmt() {} +func (*BranchStmt) stmt() {} +func (*DefStmt) stmt() {} +func (*ExprStmt) stmt() {} +func (*ForStmt) stmt() {} +func (*WhileStmt) stmt() {} +func (*IfStmt) stmt() {} +func (*LoadStmt) stmt() {} +func (*ReturnStmt) stmt() {} + +// An AssignStmt represents an assignment: +// x = 0 +// x, y = y, x +// x += 1 +type AssignStmt struct { + commentsRef + OpPos Position + Op Token // = EQ | {PLUS,MINUS,STAR,PERCENT}_EQ + LHS Expr + RHS Expr +} + +func (x *AssignStmt) Span() (start, end Position) { + start, _ = x.LHS.Span() + _, end = x.RHS.Span() + return +} + +// A DefStmt represents a function definition. +type DefStmt struct { + commentsRef + Def Position + Name *Ident + Params []Expr // param = ident | ident=expr | * | *ident | **ident + Body []Stmt + + Function interface{} // a *resolve.Function, set by resolver +} + +func (x *DefStmt) Span() (start, end Position) { + _, end = x.Body[len(x.Body)-1].Span() + return x.Def, end +} + +// An ExprStmt is an expression evaluated for side effects. +type ExprStmt struct { + commentsRef + X Expr +} + +func (x *ExprStmt) Span() (start, end Position) { + return x.X.Span() +} + +// An IfStmt is a conditional: If Cond: True; else: False. +// 'elseif' is desugared into a chain of IfStmts. +type IfStmt struct { + commentsRef + If Position // IF or ELIF + Cond Expr + True []Stmt + ElsePos Position // ELSE or ELIF + False []Stmt // optional +} + +func (x *IfStmt) Span() (start, end Position) { + body := x.False + if body == nil { + body = x.True + } + _, end = body[len(body)-1].Span() + return x.If, end +} + +// A LoadStmt loads another module and binds names from it: +// load(Module, "x", y="foo"). +// +// The AST is slightly unfaithful to the concrete syntax here because +// Starlark's load statement, so that it can be implemented in Python, +// binds some names (like y above) with an identifier and some (like x) +// without. For consistency we create fake identifiers for all the +// strings. +type LoadStmt struct { + commentsRef + Load Position + Module *Literal // a string + From []*Ident // name defined in loading module + To []*Ident // name in loaded module + Rparen Position +} + +func (x *LoadStmt) Span() (start, end Position) { + return x.Load, x.Rparen +} + +// ModuleName returns the name of the module loaded by this statement. +func (x *LoadStmt) ModuleName() string { return x.Module.Value.(string) } + +// A BranchStmt changes the flow of control: break, continue, pass. +type BranchStmt struct { + commentsRef + Token Token // = BREAK | CONTINUE | PASS + TokenPos Position +} + +func (x *BranchStmt) Span() (start, end Position) { + return x.TokenPos, x.TokenPos.add(x.Token.String()) +} + +// A ReturnStmt returns from a function. +type ReturnStmt struct { + commentsRef + Return Position + Result Expr // may be nil +} + +func (x *ReturnStmt) Span() (start, end Position) { + if x.Result == nil { + return x.Return, x.Return.add("return") + } + _, end = x.Result.Span() + return x.Return, end +} + +// An Expr is a Starlark expression. +type Expr interface { + Node + expr() +} + +func (*BinaryExpr) expr() {} +func (*CallExpr) expr() {} +func (*Comprehension) expr() {} +func (*CondExpr) expr() {} +func (*DictEntry) expr() {} +func (*DictExpr) expr() {} +func (*DotExpr) expr() {} +func (*Ident) expr() {} +func (*IndexExpr) expr() {} +func (*LambdaExpr) expr() {} +func (*ListExpr) expr() {} +func (*Literal) expr() {} +func (*ParenExpr) expr() {} +func (*SliceExpr) expr() {} +func (*TupleExpr) expr() {} +func (*UnaryExpr) expr() {} + +// An Ident represents an identifier. +type Ident struct { + commentsRef + NamePos Position + Name string + + Binding interface{} // a *resolver.Binding, set by resolver +} + +func (x *Ident) Span() (start, end Position) { + return x.NamePos, x.NamePos.add(x.Name) +} + +// A Literal represents a literal string or number. +type Literal struct { + commentsRef + Token Token // = STRING | BYTES | INT | FLOAT + TokenPos Position + Raw string // uninterpreted text + Value interface{} // = string | int64 | *big.Int | float64 +} + +func (x *Literal) Span() (start, end Position) { + return x.TokenPos, x.TokenPos.add(x.Raw) +} + +// A ParenExpr represents a parenthesized expression: (X). +type ParenExpr struct { + commentsRef + Lparen Position + X Expr + Rparen Position +} + +func (x *ParenExpr) Span() (start, end Position) { + return x.Lparen, x.Rparen.add(")") +} + +// A CallExpr represents a function call expression: Fn(Args). +type CallExpr struct { + commentsRef + Fn Expr + Lparen Position + Args []Expr // arg = expr | ident=expr | *expr | **expr + Rparen Position +} + +func (x *CallExpr) Span() (start, end Position) { + start, _ = x.Fn.Span() + return start, x.Rparen.add(")") +} + +// A DotExpr represents a field or method selector: X.Name. +type DotExpr struct { + commentsRef + X Expr + Dot Position + NamePos Position + Name *Ident +} + +func (x *DotExpr) Span() (start, end Position) { + start, _ = x.X.Span() + _, end = x.Name.Span() + return +} + +// A Comprehension represents a list or dict comprehension: +// [Body for ... if ...] or {Body for ... if ...} +type Comprehension struct { + commentsRef + Curly bool // {x:y for ...} or {x for ...}, not [x for ...] + Lbrack Position + Body Expr + Clauses []Node // = *ForClause | *IfClause + Rbrack Position +} + +func (x *Comprehension) Span() (start, end Position) { + return x.Lbrack, x.Rbrack.add("]") +} + +// A ForStmt represents a loop: for Vars in X: Body. +type ForStmt struct { + commentsRef + For Position + Vars Expr // name, or tuple of names + X Expr + Body []Stmt +} + +func (x *ForStmt) Span() (start, end Position) { + _, end = x.Body[len(x.Body)-1].Span() + return x.For, end +} + +// A WhileStmt represents a while loop: while X: Body. +type WhileStmt struct { + commentsRef + While Position + Cond Expr + Body []Stmt +} + +func (x *WhileStmt) Span() (start, end Position) { + _, end = x.Body[len(x.Body)-1].Span() + return x.While, end +} + +// A ForClause represents a for clause in a list comprehension: for Vars in X. +type ForClause struct { + commentsRef + For Position + Vars Expr // name, or tuple of names + In Position + X Expr +} + +func (x *ForClause) Span() (start, end Position) { + _, end = x.X.Span() + return x.For, end +} + +// An IfClause represents an if clause in a list comprehension: if Cond. +type IfClause struct { + commentsRef + If Position + Cond Expr +} + +func (x *IfClause) Span() (start, end Position) { + _, end = x.Cond.Span() + return x.If, end +} + +// A DictExpr represents a dictionary literal: { List }. +type DictExpr struct { + commentsRef + Lbrace Position + List []Expr // all *DictEntrys + Rbrace Position +} + +func (x *DictExpr) Span() (start, end Position) { + return x.Lbrace, x.Rbrace.add("}") +} + +// A DictEntry represents a dictionary entry: Key: Value. +// Used only within a DictExpr. +type DictEntry struct { + commentsRef + Key Expr + Colon Position + Value Expr +} + +func (x *DictEntry) Span() (start, end Position) { + start, _ = x.Key.Span() + _, end = x.Value.Span() + return start, end +} + +// A LambdaExpr represents an inline function abstraction. +// +// Although they may be added in future, lambda expressions are not +// currently part of the Starlark spec, so their use is controlled by the +// resolver.AllowLambda flag. +type LambdaExpr struct { + commentsRef + Lambda Position + Params []Expr // param = ident | ident=expr | * | *ident | **ident + Body Expr + + Function interface{} // a *resolve.Function, set by resolver +} + +func (x *LambdaExpr) Span() (start, end Position) { + _, end = x.Body.Span() + return x.Lambda, end +} + +// A ListExpr represents a list literal: [ List ]. +type ListExpr struct { + commentsRef + Lbrack Position + List []Expr + Rbrack Position +} + +func (x *ListExpr) Span() (start, end Position) { + return x.Lbrack, x.Rbrack.add("]") +} + +// CondExpr represents the conditional: X if COND else ELSE. +type CondExpr struct { + commentsRef + If Position + Cond Expr + True Expr + ElsePos Position + False Expr +} + +func (x *CondExpr) Span() (start, end Position) { + start, _ = x.True.Span() + _, end = x.False.Span() + return start, end +} + +// A TupleExpr represents a tuple literal: (List). +type TupleExpr struct { + commentsRef + Lparen Position // optional (e.g. in x, y = 0, 1), but required if List is empty + List []Expr + Rparen Position +} + +func (x *TupleExpr) Span() (start, end Position) { + if x.Lparen.IsValid() { + return x.Lparen, x.Rparen + } else { + return Start(x.List[0]), End(x.List[len(x.List)-1]) + } +} + +// A UnaryExpr represents a unary expression: Op X. +// +// As a special case, UnaryOp{Op:Star} may also represent +// the star parameter in def f(*args) or def f(*, x). +type UnaryExpr struct { + commentsRef + OpPos Position + Op Token + X Expr // may be nil if Op==STAR +} + +func (x *UnaryExpr) Span() (start, end Position) { + if x.X != nil { + _, end = x.X.Span() + } else { + end = x.OpPos.add("*") + } + return x.OpPos, end +} + +// A BinaryExpr represents a binary expression: X Op Y. +// +// As a special case, BinaryExpr{Op:EQ} may also +// represent a named argument in a call f(k=v) +// or a named parameter in a function declaration +// def f(param=default). +type BinaryExpr struct { + commentsRef + X Expr + OpPos Position + Op Token + Y Expr +} + +func (x *BinaryExpr) Span() (start, end Position) { + start, _ = x.X.Span() + _, end = x.Y.Span() + return start, end +} + +// A SliceExpr represents a slice or substring expression: X[Lo:Hi:Step]. +type SliceExpr struct { + commentsRef + X Expr + Lbrack Position + Lo, Hi, Step Expr // all optional + Rbrack Position +} + +func (x *SliceExpr) Span() (start, end Position) { + start, _ = x.X.Span() + return start, x.Rbrack +} + +// An IndexExpr represents an index expression: X[Y]. +type IndexExpr struct { + commentsRef + X Expr + Lbrack Position + Y Expr + Rbrack Position +} + +func (x *IndexExpr) Span() (start, end Position) { + start, _ = x.X.Span() + return start, x.Rbrack +} diff --git a/syntax/testdata/errors.star b/syntax/testdata/errors.star new file mode 100644 index 0000000..cee1fc9 --- /dev/null +++ b/syntax/testdata/errors.star @@ -0,0 +1,212 @@ +# Tests of parse errors. +# This is a "chunked" file; each "---" line demarcates a new parser input. +# +# TODO(adonovan): lots more tests. + +x = 1 + +2 ### "got newline, want primary expression" + +--- + +_ = *x ### `got '\*', want primary` + +--- +# trailing comma is ok + +def f(a, ): pass +def f(*args, ): pass +def f(**kwargs, ): pass + +--- + +# Parameters are validated later. +def f(**kwargs, *args, *, b=1, a, **kwargs, *args, *, b=1, a): + pass + +--- + +def f(a, *-b, c): # ### `got '-', want ','` + pass + +--- + +def f(**kwargs, *args, b=1, a, **kwargs, *args, b=1, a): + pass + +--- + +def pass(): ### "not an identifier" + pass + +--- + +def f : ### `got ':', want '\('` + +--- +# trailing comma is ok + +f(a, ) +f(*args, ) +f(**kwargs, ) + +--- + +f(a=1, *, b=2) ### `got ',', want primary` + +--- + +_ = {x:y for y in z} # ok +_ = {x for y in z} ### `got for, want ':'` + +--- + +def f(): + pass + pass ### `unindent does not match any outer indentation level` + +--- +def f(): pass +--- +# Blank line after pass => outdent. +def f(): + pass + +--- +# No blank line after pass; EOF acts like a newline. +def f(): + pass +--- +# This is a well known parsing ambiguity in Python. +# Python 2.7 accepts it but Python3 and Starlark reject it. +_ = [x for x in lambda: True, lambda: False if x()] ### "got lambda, want primary" + +_ = [x for x in (lambda: True, lambda: False) if x()] # ok in all dialects + +--- +# Starlark, following Python 3, allows an unparenthesized +# tuple after 'in' only in a for statement but not in a comprehension. +# (Python 2.7 allows both.) +for x in 1, 2, 3: + print(x) + +_ = [x for x in 1, 2, 3] ### `got ',', want ']', for, or if` +--- +# Unparenthesized tuple is not allowed as operand of 'if' in comprehension. +_ = [a for b in c if 1, 2] ### `got ',', want ']', for, or if` + +--- +# Lambda is ok though. +_ = [a for b in c if lambda: d] # ok + +# But the body of such a lambda may not be a conditional: +_ = [a for b in c if (lambda: d if e else f)] # ok +_ = [a for b in c if lambda: d if e else f] ### "got else, want ']'" + +--- +# A lambda is not allowed as the operand of a 'for' clause. +_ = [a for b in lambda: c] ### `got lambda, want primary` + +--- +# Comparison operations are not associative. + +_ = (0 == 1) == 2 # ok +_ = 0 == (1 == 2) # ok +_ = 0 == 1 == 2 ### "== does not associate with ==" + +--- + +_ = (0 <= i) < n # ok +_ = 0 <= (i < n) # ok +_ = 0 <= i < n ### "<= does not associate with <" + +--- + +_ = (a in b) not in c # ok +_ = a in (b not in c) # ok +_ = a in b not in c ### "in does not associate with not in" + +--- +# shift/reduce ambiguity is reduced +_ = [x for x in a if b else c] ### `got else, want ']', for, or if` +--- +[a for b in c else d] ### `got else, want ']', for, or if` +--- +_ = a + b not c ### "got identifier, want in" +--- +f(1+2 = 3) ### "keyword argument must have form name=expr" +--- +print(1, 2, 3 +### `got end of file, want '\)'` +--- +_ = a if b ### "conditional expression without else clause" +--- +load("") ### "load statement must import at least 1 symbol" +--- +load("", 1) ### `load operand must be "name" or localname="name" \(got int literal\)` +--- +load("a", "x") # ok +--- +load(1, 2) ### "first operand of load statement must be a string literal" +--- +load("a", x) ### `load operand must be "x" or x="originalname"` +--- +load("a", x2=x) ### `original name of loaded symbol must be quoted: x2="originalname"` +--- +# All of these parse. +load("a", "x") +load("a", "x", y2="y") +load("a", x2="x", "y") # => positional-before-named arg check happens later (!) +--- +# 'load' is not an identifier +load = 1 ### `got '=', want '\('` +--- +# 'load' is not an identifier +f(load()) ### `got load, want primary` +--- +# 'load' is not an identifier +def load(): ### `not an identifier` + pass +--- +# 'load' is not an identifier +def f(load): ### `not an identifier` + pass +--- +# A load statement allows a trailing comma. +load("module", "x",) +--- +x = 1 + +2 ### "got newline, want primary expression" +--- +def f(): + pass +# this used to cause a spurious indentation error +--- +print 1 2 ### `got int literal, want newline` + +--- +# newlines are not allowed in raw string literals +raw = r'a ### `unexpected newline in string` +b' + +--- +# The parser permits an unparenthesized tuple expression for the first index. +x[1, 2:] # ok +--- +# But not if it has a trailing comma. +x[1, 2,:] ### `got ':', want primary` +--- +# Trailing tuple commas are permitted only within parens; see b/28867036. +(a, b,) = 1, 2 # ok +c, d = 1, 2 # ok +--- +a, b, = 1, 2 ### `unparenthesized tuple with trailing comma` +--- +a, b = 1, 2, ### `unparenthesized tuple with trailing comma` + +--- +# See github.com/google/starlark-go/issues/48 +a = max(range(10))) ### `unexpected '\)'` + +--- +# github.com/google/starlark-go/issues/85 +s = "\x-0" ### `invalid escape sequence` diff --git a/syntax/testdata/scan.star b/syntax/testdata/scan.star new file mode 100644 index 0000000..4f62ba9 --- /dev/null +++ b/syntax/testdata/scan.star @@ -0,0 +1,1324 @@ +# Copyright 2014 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# (From https://github.com/bazelbuild/rules_go/blob/master/go/def.bzl@a6f9d0c) + +load("//go/private:repositories.bzl", "go_repositories") +load("//go/private:go_repository.bzl", "go_repository", "new_go_repository") +load("//go/private:go_prefix.bzl", "go_prefix") +load("//go/private:json.bzl", "json_marshal") + +"""These are bare-bones Go rules. + +In order of priority: + +- BUILD file must be written by hand. + +- No support for SWIG + +- No test sharding or test XML. + +""" + +_DEFAULT_LIB = "go_default_library" + +_VENDOR_PREFIX = "/vendor/" + +go_filetype = FileType([ + ".go", + ".s", + ".S", + ".h", # may be included by .s +]) + +# be consistent to cc_library. +hdr_exts = [ + ".h", + ".hh", + ".hpp", + ".hxx", + ".inc", +] + +cc_hdr_filetype = FileType(hdr_exts) + +# Extensions of files we can build with the Go compiler or with cc_library. +# This is a subset of the extensions recognized by go/build. +cgo_filetype = FileType([ + ".go", + ".c", + ".cc", + ".cxx", + ".cpp", + ".s", + ".S", + ".h", + ".hh", + ".hpp", + ".hxx", +]) + +################ + +def go_environment_vars(ctx): + """Return a map of environment variables for use with actions, based on + the arguments. Uses the ctx.fragments.cpp.cpu attribute, if present, + and picks a default of target_os="linux" and target_arch="amd64" + otherwise. + + Args: + The starlark Context. + + Returns: + A dict of environment variables for running Go tool commands that build for + the target OS and architecture. + """ + default_toolchain = {"GOOS": "linux", "GOARCH": "amd64"} + bazel_to_go_toolchain = { + "k8": {"GOOS": "linux", "GOARCH": "amd64"}, + "piii": {"GOOS": "linux", "GOARCH": "386"}, + "darwin": {"GOOS": "darwin", "GOARCH": "amd64"}, + "darwin_x86_64": {"GOOS": "darwin", "GOARCH": "amd64"}, + "freebsd": {"GOOS": "freebsd", "GOARCH": "amd64"}, + "armeabi-v7a": {"GOOS": "linux", "GOARCH": "arm"}, + "arm": {"GOOS": "linux", "GOARCH": "arm"}, + } + env = {} + if hasattr(ctx.file, "go_tool"): + env["GOROOT"] = ctx.file.go_tool.dirname + "/.." + env.update(bazel_to_go_toolchain.get(ctx.fragments.cpp.cpu, default_toolchain)) + return env + +def _is_darwin_cpu(ctx): + cpu = ctx.fragments.cpp.cpu + return cpu == "darwin" or cpu == "darwin_x86_64" + +def _emit_generate_params_action(cmds, ctx, fn): + cmds_all = [ + # Use bash explicitly. /bin/sh is default, and it may be linked to a + # different shell, e.g., /bin/dash on Ubuntu. + "#!/bin/bash", + "set -e", + ] + cmds_all += cmds + cmds_all_str = "\n".join(cmds_all) + "\n" + f = ctx.new_file(ctx.configuration.bin_dir, fn) + ctx.file_action( + output = f, + content = cmds_all_str, + executable = True, + ) + return f + +def _emit_go_asm_action(ctx, source, hdrs, out_obj): + """Construct the command line for compiling Go Assembly code. + Constructs a symlink tree to accomodate for workspace name. + Args: + ctx: The starlark Context. + source: a source code artifact + hdrs: list of .h files that may be included + out_obj: the artifact (configured target?) that should be produced + """ + params = { + "go_tool": ctx.file.go_tool.path, + "includes": [f.dirname for f in hdrs] + [ctx.file.go_include.path], + "source": source.path, + "out": out_obj.path, + } + + inputs = hdrs + ctx.files.toolchain + [source] + ctx.action( + inputs = inputs, + outputs = [out_obj], + mnemonic = "GoAsmCompile", + executable = ctx.executable._asm, + arguments = [json_marshal(params)], + ) + +def _go_importpath(ctx): + """Returns the expected importpath of the go_library being built. + + Args: + ctx: The starlark Context + + Returns: + Go importpath of the library + """ + path = ctx.attr.importpath + if path != "": + return path + path = ctx.attr.go_prefix.go_prefix + if path.endswith("/"): + path = path[:-1] + if ctx.label.package: + path += "/" + ctx.label.package + if ctx.label.name != _DEFAULT_LIB: + path += "/" + ctx.label.name + if path.rfind(_VENDOR_PREFIX) != -1: + path = path[len(_VENDOR_PREFIX) + path.rfind(_VENDOR_PREFIX):] + if path[0] == "/": + path = path[1:] + return path + +def _emit_go_compile_action(ctx, sources, deps, libpaths, out_object, gc_goopts): + """Construct the command line for compiling Go code. + + Args: + ctx: The starlark Context. + sources: an iterable of source code artifacts (or CTs? or labels?) + deps: an iterable of dependencies. Each dependency d should have an + artifact in d.transitive_go_libraries representing all imported libraries. + libpaths: the set of paths to search for imported libraries. + out_object: the object file that should be produced + gc_goopts: additional flags to pass to the compiler. + """ + if ctx.coverage_instrumented(): + sources = _emit_go_cover_action(ctx, sources) + + # Compile filtered files. + args = [ + "-cgo", + ctx.file.go_tool.path, + "tool", + "compile", + "-o", + out_object.path, + "-trimpath", + "-abs-.", + "-I", + "-abs-.", + ] + inputs = depset(sources + ctx.files.toolchain) + for dep in deps: + inputs += dep.transitive_go_libraries + for path in libpaths: + args += ["-I", path] + args += gc_goopts + [("" if i.basename.startswith("_cgo") else "-filter-") + i.path for i in sources] + ctx.action( + inputs = list(inputs), + outputs = [out_object], + mnemonic = "GoCompile", + executable = ctx.executable._filter_exec, + arguments = args, + env = go_environment_vars(ctx), + ) + + return sources + +def _emit_go_pack_action(ctx, out_lib, objects): + """Construct the command line for packing objects together. + + Args: + ctx: The starlark Context. + out_lib: the archive that should be produced + objects: an iterable of object files to be added to the output archive file. + """ + ctx.action( + inputs = objects + ctx.files.toolchain, + outputs = [out_lib], + mnemonic = "GoPack", + executable = ctx.file.go_tool, + arguments = ["tool", "pack", "c", out_lib.path] + [a.path for a in objects], + env = go_environment_vars(ctx), + ) + +def _emit_go_cover_action(ctx, sources): + """Construct the command line for test coverage instrument. + + Args: + ctx: The starlark Context. + sources: an iterable of Go source files. + + Returns: + A list of Go source code files which might be coverage instrumented. + """ + outputs = [] + + # TODO(linuxerwang): make the mode configurable. + count = 0 + + for src in sources: + if not src.path.endswith(".go") or src.path.endswith("_test.go"): + outputs += [src] + continue + + cover_var = "GoCover_%d" % count + out = ctx.new_file(src, src.basename[:-3] + "_" + cover_var + ".cover.go") + outputs += [out] + ctx.action( + inputs = [src] + ctx.files.toolchain, + outputs = [out], + mnemonic = "GoCover", + executable = ctx.file.go_tool, + arguments = ["tool", "cover", "--mode=set", "-var=%s" % cover_var, "-o", out.path, src.path], + env = go_environment_vars(ctx), + ) + count += 1 + + return outputs + +def go_library_impl(ctx): + """Implements the go_library() rule.""" + + sources = depset(ctx.files.srcs) + go_srcs = depset([s for s in sources if s.basename.endswith(".go")]) + asm_srcs = [s for s in sources if s.basename.endswith(".s") or s.basename.endswith(".S")] + asm_hdrs = [s for s in sources if s.basename.endswith(".h")] + deps = ctx.attr.deps + dep_runfiles = [d.data_runfiles for d in deps] + + cgo_object = None + if hasattr(ctx.attr, "cgo_object"): + cgo_object = ctx.attr.cgo_object + + if ctx.attr.library: + go_srcs += ctx.attr.library.go_sources + asm_srcs += ctx.attr.library.asm_sources + asm_hdrs += ctx.attr.library.asm_headers + deps += ctx.attr.library.direct_deps + dep_runfiles += [ctx.attr.library.data_runfiles] + if ctx.attr.library.cgo_object: + if cgo_object: + fail("go_library %s cannot have cgo_object because the package " + + "already has cgo_object in %s" % ( + ctx.label.name, + ctx.attr.library.name, + )) + cgo_object = ctx.attr.library.cgo_object + if not go_srcs: + fail("may not be empty", "srcs") + + transitive_cgo_deps = depset([], order = "topological") + if cgo_object: + dep_runfiles += [cgo_object.data_runfiles] + transitive_cgo_deps += cgo_object.cgo_deps + + extra_objects = [cgo_object.cgo_obj] if cgo_object else [] + for src in asm_srcs: + obj = ctx.new_file(src, "%s.dir/%s.o" % (ctx.label.name, src.basename[:-2])) + _emit_go_asm_action(ctx, src, asm_hdrs, obj) + extra_objects += [obj] + + lib_name = _go_importpath(ctx) + ".a" + out_lib = ctx.new_file(lib_name) + out_object = ctx.new_file(ctx.label.name + ".o") + search_path = out_lib.path[:-len(lib_name)] + gc_goopts = _gc_goopts(ctx) + transitive_go_libraries = depset([out_lib]) + transitive_go_library_paths = depset([search_path]) + for dep in deps: + transitive_go_libraries += dep.transitive_go_libraries + transitive_cgo_deps += dep.transitive_cgo_deps + transitive_go_library_paths += dep.transitive_go_library_paths + + go_srcs = _emit_go_compile_action( + ctx, + sources = go_srcs, + deps = deps, + libpaths = transitive_go_library_paths, + out_object = out_object, + gc_goopts = gc_goopts, + ) + _emit_go_pack_action(ctx, out_lib, [out_object] + extra_objects) + + dylibs = [] + if cgo_object: + dylibs += [d for d in cgo_object.cgo_deps if d.path.endswith(".so")] + + runfiles = ctx.runfiles(files = dylibs, collect_data = True) + for d in dep_runfiles: + runfiles = runfiles.merge(d) + + return struct( + label = ctx.label, + files = depset([out_lib]), + runfiles = runfiles, + go_sources = go_srcs, + asm_sources = asm_srcs, + asm_headers = asm_hdrs, + cgo_object = cgo_object, + direct_deps = ctx.attr.deps, + transitive_cgo_deps = transitive_cgo_deps, + transitive_go_libraries = transitive_go_libraries, + transitive_go_library_paths = transitive_go_library_paths, + gc_goopts = gc_goopts, + ) + +def _c_linker_options(ctx, blocklist = []): + """Extracts flags to pass to $(CC) on link from the current context + + Args: + ctx: the current context + blocklist: Any flags starts with any of these prefixes are filtered out from + the return value. + + Returns: + A list of command line flags + """ + cpp = ctx.fragments.cpp + features = ctx.features + options = cpp.compiler_options(features) + options += cpp.unfiltered_compiler_options(features) + options += cpp.link_options + options += cpp.mostly_static_link_options(ctx.features, False) + filtered = [] + for opt in options: + if any([opt.startswith(prefix) for prefix in blocklist]): + continue + filtered.append(opt) + return filtered + +def _gc_goopts(ctx): + gc_goopts = [ + ctx.expand_make_variables("gc_goopts", f, {}) + for f in ctx.attr.gc_goopts + ] + if ctx.attr.library: + gc_goopts += ctx.attr.library.gc_goopts + return gc_goopts + +def _gc_linkopts(ctx): + gc_linkopts = [ + ctx.expand_make_variables("gc_linkopts", f, {}) + for f in ctx.attr.gc_linkopts + ] + for k, v in ctx.attr.x_defs.items(): + gc_linkopts += ["-X", "%s='%s'" % (k, v)] + return gc_linkopts + +def _extract_extldflags(gc_linkopts, extldflags): + """Extracts -extldflags from gc_linkopts and combines them into a single list. + + Args: + gc_linkopts: a list of flags passed in through the gc_linkopts attributes. + ctx.expand_make_variables should have already been applied. + extldflags: a list of flags to be passed to the external linker. + + Return: + A tuple containing the filtered gc_linkopts with external flags removed, + and a combined list of external flags. + """ + filtered_gc_linkopts = [] + is_extldflags = False + for opt in gc_linkopts: + if is_extldflags: + is_extldflags = False + extldflags += [opt] + elif opt == "-extldflags": + is_extldflags = True + else: + filtered_gc_linkopts += [opt] + return filtered_gc_linkopts, extldflags + +def _emit_go_link_action( + ctx, + transitive_go_library_paths, + transitive_go_libraries, + cgo_deps, + libs, + executable, + gc_linkopts): + """Sets up a symlink tree to libraries to link together.""" + config_strip = len(ctx.configuration.bin_dir.path) + 1 + pkg_depth = executable.dirname[config_strip:].count("/") + 1 + + ld = "%s" % ctx.fragments.cpp.compiler_executable + extldflags = _c_linker_options(ctx) + [ + "-Wl,-rpath,$ORIGIN/" + ("../" * pkg_depth), + ] + for d in cgo_deps: + if d.basename.endswith(".so"): + short_dir = d.dirname[len(d.root.path):] + extldflags += ["-Wl,-rpath,$ORIGIN/" + ("../" * pkg_depth) + short_dir] + gc_linkopts, extldflags = _extract_extldflags(gc_linkopts, extldflags) + + link_cmd = [ + ctx.file.go_tool.path, + "tool", + "link", + "-L", + ".", + ] + for path in transitive_go_library_paths: + link_cmd += ["-L", path] + link_cmd += [ + "-o", + executable.path, + ] + gc_linkopts + ['"${STAMP_XDEFS[@]}"'] + + # workaround for a bug in ld(1) on Mac OS X. + # http://lists.apple.com/archives/Darwin-dev/2006/Sep/msg00084.html + # TODO(yugui) Remove this workaround once rules_go stops supporting XCode 7.2 + # or earlier. + if not _is_darwin_cpu(ctx): + link_cmd += ["-s"] + + link_cmd += [ + "-extld", + ld, + "-extldflags", + "'%s'" % " ".join(extldflags), + ] + [lib.path for lib in libs] + + # Avoided -s on OSX but but it requires dsymutil to be on $PATH. + # TODO(yugui) Remove this workaround once rules_go stops supporting XCode 7.2 + # or earlier. + cmds = ["export PATH=$PATH:/usr/bin"] + + cmds += [ + "STAMP_XDEFS=()", + ] + + stamp_inputs = [] + if ctx.attr.linkstamp: + # read workspace status files, converting "KEY value" lines + # to "-X $linkstamp.KEY=value" arguments to the go linker. + stamp_inputs = [ctx.info_file, ctx.version_file] + for f in stamp_inputs: + cmds += [ + "while read -r key value || [[ -n $key ]]; do", + " STAMP_XDEFS+=(-X \"%s.$key=$value\")" % ctx.attr.linkstamp, + "done < " + f.path, + ] + + cmds += [" ".join(link_cmd)] + + f = _emit_generate_params_action(cmds, ctx, lib.basename + ".GoLinkFile.params") + + ctx.action( + inputs = [f] + (list(transitive_go_libraries) + [lib] + list(cgo_deps) + + ctx.files.toolchain + ctx.files._crosstool) + stamp_inputs, + outputs = [executable], + command = f.path, + mnemonic = "GoLink", + env = go_environment_vars(ctx), + ) + +def go_binary_impl(ctx): + """go_binary_impl emits actions for compiling and linking a go executable.""" + lib_result = go_library_impl(ctx) + _emit_go_link_action( + ctx, + transitive_go_libraries = lib_result.transitive_go_libraries, + transitive_go_library_paths = lib_result.transitive_go_library_paths, + cgo_deps = lib_result.transitive_cgo_deps, + libs = lib_result.files, + executable = ctx.outputs.executable, + gc_linkopts = _gc_linkopts(ctx), + ) + + return struct( + files = depset([ctx.outputs.executable]), + runfiles = lib_result.runfiles, + cgo_object = lib_result.cgo_object, + ) + +def go_test_impl(ctx): + """go_test_impl implements go testing. + + It emits an action to run the test generator, and then compiles the + test into a binary.""" + + lib_result = go_library_impl(ctx) + main_go = ctx.new_file(ctx.label.name + "_main_test.go") + main_object = ctx.new_file(ctx.label.name + "_main_test.o") + main_lib = ctx.new_file(ctx.label.name + "_main_test.a") + go_import = _go_importpath(ctx) + + cmds = [ + "UNFILTERED_TEST_FILES=(%s)" % + " ".join(["'%s'" % f.path for f in lib_result.go_sources]), + "FILTERED_TEST_FILES=()", + "while read -r line; do", + ' if [ -n "$line" ]; then', + ' FILTERED_TEST_FILES+=("$line")', + " fi", + 'done < <(\'%s\' -cgo "${UNFILTERED_TEST_FILES[@]}")' % + ctx.executable._filter_tags.path, + " ".join([ + "'%s'" % ctx.executable.test_generator.path, + "--package", + go_import, + "--output", + "'%s'" % main_go.path, + '"${FILTERED_TEST_FILES[@]}"', + ]), + ] + f = _emit_generate_params_action( + cmds, + ctx, + ctx.label.name + ".GoTestGenTest.params", + ) + inputs = (list(lib_result.go_sources) + list(ctx.files.toolchain) + + [f, ctx.executable._filter_tags, ctx.executable.test_generator]) + ctx.action( + inputs = inputs, + outputs = [main_go], + command = f.path, + mnemonic = "GoTestGenTest", + env = dict(go_environment_vars(ctx), RUNDIR = ctx.label.package), + ) + + _emit_go_compile_action( + ctx, + sources = depset([main_go]), + deps = ctx.attr.deps + [lib_result], + libpaths = lib_result.transitive_go_library_paths, + out_object = main_object, + gc_goopts = _gc_goopts(ctx), + ) + _emit_go_pack_action(ctx, main_lib, [main_object]) + _emit_go_link_action( + ctx, + transitive_go_library_paths = lib_result.transitive_go_library_paths, + transitive_go_libraries = lib_result.transitive_go_libraries, + cgo_deps = lib_result.transitive_cgo_deps, + libs = [main_lib], + executable = ctx.outputs.executable, + gc_linkopts = _gc_linkopts(ctx), + ) + + # TODO(bazel-team): the Go tests should do a chdir to the directory + # holding the data files, so open-source go tests continue to work + # without code changes. + runfiles = ctx.runfiles(files = [ctx.outputs.executable]) + runfiles = runfiles.merge(lib_result.runfiles) + return struct( + files = depset([ctx.outputs.executable]), + runfiles = runfiles, + ) + +go_env_attrs = { + "toolchain": attr.label( + default = Label("//go/toolchain:toolchain"), + allow_files = True, + cfg = "host", + ), + "go_tool": attr.label( + default = Label("//go/toolchain:go_tool"), + single_file = True, + allow_files = True, + cfg = "host", + ), + "go_prefix": attr.label( + providers = ["go_prefix"], + default = Label( + "//:go_prefix", + relative_to_caller_repository = True, + ), + allow_files = False, + cfg = "host", + ), + "go_src": attr.label( + default = Label("//go/toolchain:go_src"), + allow_files = True, + cfg = "host", + ), + "go_include": attr.label( + default = Label("//go/toolchain:go_include"), + single_file = True, + allow_files = True, + cfg = "host", + ), + "go_root": attr.label( + providers = ["go_root"], + default = Label( + "//go/toolchain:go_root", + ), + allow_files = False, + cfg = "host", + ), + "_filter_tags": attr.label( + default = Label("//go/tools/filter_tags"), + cfg = "host", + executable = True, + single_file = True, + ), + "_filter_exec": attr.label( + default = Label("//go/tools/filter_exec"), + cfg = "host", + executable = True, + single_file = True, + ), + "_asm": attr.label( + default = Label("//go/tools/builders:asm"), + cfg = "host", + executable = True, + single_file = True, + ), +} + +go_library_attrs = go_env_attrs + { + "data": attr.label_list( + allow_files = True, + cfg = "data", + ), + "srcs": attr.label_list(allow_files = go_filetype), + "deps": attr.label_list( + providers = [ + "transitive_go_library_paths", + "transitive_go_libraries", + "transitive_cgo_deps", + ], + ), + "importpath": attr.string(), + "library": attr.label( + providers = [ + "direct_deps", + "go_sources", + "asm_sources", + "cgo_object", + "gc_goopts", + ], + ), + "gc_goopts": attr.string_list(), +} + +_crosstool_attrs = { + "_crosstool": attr.label( + default = Label("//tools/defaults:crosstool"), + ), +} + +go_link_attrs = go_library_attrs + _crosstool_attrs + { + "gc_linkopts": attr.string_list(), + "linkstamp": attr.string(), + "x_defs": attr.string_dict(), +} + +go_library = rule( + go_library_impl, + attrs = go_library_attrs + { + "cgo_object": attr.label( + providers = [ + "cgo_obj", + "cgo_deps", + ], + ), + }, + fragments = ["cpp"], +) + +go_binary = rule( + go_binary_impl, + attrs = go_library_attrs + _crosstool_attrs + go_link_attrs, + executable = True, + fragments = ["cpp"], +) + +go_test = rule( + go_test_impl, + attrs = go_library_attrs + _crosstool_attrs + go_link_attrs + { + "test_generator": attr.label( + executable = True, + default = Label( + "//go/tools:generate_test_main", + ), + cfg = "host", + ), + }, + executable = True, + fragments = ["cpp"], + test = True, +) + +def _pkg_dir(workspace_root, package_name): + if workspace_root and package_name: + return workspace_root + "/" + package_name + if workspace_root: + return workspace_root + if package_name: + return package_name + return "." + +def _exec_path(path): + if path.startswith("/"): + return path + return "${execroot}/" + path + +def _cgo_filter_srcs_impl(ctx): + srcs = ctx.files.srcs + dsts = [] + cmds = [] + for src in srcs: + stem, _, ext = src.path.rpartition(".") + dst_basename = "%s.filtered.%s" % (stem, ext) + dst = ctx.new_file(src, dst_basename) + cmds += [ + "if '%s' -cgo -quiet '%s'; then" % + (ctx.executable._filter_tags.path, src.path), + " cp '%s' '%s'" % (src.path, dst.path), + "else", + " echo -n >'%s'" % dst.path, + "fi", + ] + dsts.append(dst) + + if ctx.label.package == "": + script_name = ctx.label.name + ".CGoFilterSrcs.params" + else: + script_name = ctx.label.package + "/" + ctx.label.name + ".CGoFilterSrcs.params" + f = _emit_generate_params_action(cmds, ctx, script_name) + ctx.action( + inputs = [f, ctx.executable._filter_tags] + srcs, + outputs = dsts, + command = f.path, + mnemonic = "CgoFilterSrcs", + ) + return struct( + files = depset(dsts), + ) + +_cgo_filter_srcs = rule( + implementation = _cgo_filter_srcs_impl, + attrs = { + "srcs": attr.label_list( + allow_files = cgo_filetype, + ), + "_filter_tags": attr.label( + default = Label("//go/tools/filter_tags"), + cfg = "host", + executable = True, + single_file = True, + ), + }, + fragments = ["cpp"], +) + +def _cgo_codegen_impl(ctx): + go_srcs = ctx.files.srcs + srcs = go_srcs + ctx.files.c_hdrs + linkopts = ctx.attr.linkopts + copts = ctx.fragments.cpp.c_options + ctx.attr.copts + deps = depset([], order = "topological") + for d in ctx.attr.deps: + srcs += list(d.cc.transitive_headers) + deps += d.cc.libs + copts += ["-D" + define for define in d.cc.defines] + for inc in d.cc.include_directories: + copts += ["-I", _exec_path(inc)] + for hdr in ctx.files.c_hdrs: + copts += ["-iquote", hdr.dirname] + for inc in d.cc.quote_include_directories: + copts += ["-iquote", _exec_path(inc)] + for inc in d.cc.system_include_directories: + copts += ["-isystem", _exec_path(inc)] + for lib in d.cc.libs: + if lib.basename.startswith("lib") and lib.basename.endswith(".so"): + linkopts += ["-L", lib.dirname, "-l", lib.basename[3:-3]] + else: + linkopts += [lib.path] + linkopts += d.cc.link_flags + + p = _pkg_dir(ctx.label.workspace_root, ctx.label.package) + "/" + if p == "./": + p = "" # workaround when cgo_library in repository root + out_dir = (ctx.configuration.genfiles_dir.path + "/" + + p + ctx.attr.outdir) + cc = ctx.fragments.cpp.compiler_executable + cmds = [ + # We cannot use env for CC because $(CC) on OSX is relative + # and '../' does not work fine due to symlinks. + "export CC=$(cd $(dirname {cc}); pwd)/$(basename {cc})".format(cc = cc), + "export CXX=$CC", + 'objdir="%s/gen"' % out_dir, + "execroot=$(pwd)", + 'mkdir -p "$objdir"', + "unfiltered_go_files=(%s)" % " ".join(["'%s'" % f.path for f in go_srcs]), + "filtered_go_files=()", + 'for file in "${unfiltered_go_files[@]}"; do', + ' stem=$(basename "$file" .go)', + ' if %s -cgo -quiet "$file"; then' % ctx.executable._filter_tags.path, + ' filtered_go_files+=("$file")', + " else", + ' grep --max-count 1 "^package " "$file" >"$objdir/$stem.go"', + ' echo -n >"$objdir/$stem.c"', + " fi", + "done", + "if [ ${#filtered_go_files[@]} -eq 0 ]; then", + " echo no buildable Go source files in %s >&1" % str(ctx.label), + " exit 1", + "fi", + '"$GOROOT/bin/go" tool cgo -objdir "$objdir" -- %s "${filtered_go_files[@]}"' % + " ".join(['"%s"' % copt for copt in copts]), + # Rename the outputs using glob so we don't have to understand cgo's mangling + # TODO(#350): might be fixed by this?. + 'for file in "${filtered_go_files[@]}"; do', + ' stem=$(basename "$file" .go)', + ' mv "$objdir/"*"$stem.cgo1.go" "$objdir/$stem.go"', + ' mv "$objdir/"*"$stem.cgo2.c" "$objdir/$stem.c"', + "done", + "rm -f $objdir/_cgo_.o $objdir/_cgo_flags", + ] + + f = _emit_generate_params_action(cmds, ctx, out_dir + ".CGoCodeGenFile.params") + + inputs = (srcs + ctx.files.toolchain + ctx.files._crosstool + + [f, ctx.executable._filter_tags]) + ctx.action( + inputs = inputs, + outputs = ctx.outputs.outs, + mnemonic = "CGoCodeGen", + progress_message = "CGoCodeGen %s" % ctx.label, + command = f.path, + env = go_environment_vars(ctx) + { + "CGO_LDFLAGS": " ".join(linkopts), + }, + ) + return struct( + label = ctx.label, + files = depset(ctx.outputs.outs), + cgo_deps = deps, + ) + +_cgo_codegen_rule = rule( + _cgo_codegen_impl, + attrs = go_env_attrs + _crosstool_attrs + { + "srcs": attr.label_list( + allow_files = go_filetype, + non_empty = True, + ), + "c_hdrs": attr.label_list( + allow_files = cc_hdr_filetype, + ), + "deps": attr.label_list( + allow_files = False, + providers = ["cc"], + ), + "copts": attr.string_list(), + "linkopts": attr.string_list(), + "outdir": attr.string(mandatory = True), + "outs": attr.output_list( + mandatory = True, + non_empty = True, + ), + }, + fragments = ["cpp"], + output_to_genfiles = True, +) + +def _cgo_codegen( + name, + srcs, + c_hdrs = [], + deps = [], + copts = [], + linkopts = [], + go_tool = None, + toolchain = None): + """Generates glue codes for interop between C and Go + + Args: + name: A unique name of the rule + srcs: list of Go source files. + Each of them must contain `import "C"`. + c_hdrs: C/C++ header files necessary to determine kinds of + C/C++ identifiers in srcs. + deps: A list of cc_library rules. + The generated codes are expected to be linked with these deps. + linkopts: A list of linker options, + These flags are passed to the linker when the generated codes + are linked into the target binary. + """ + outdir = name + ".dir" + outgen = outdir + "/gen" + + go_thunks = [] + c_thunks = [] + for s in srcs: + if not s.endswith(".go"): + fail("not a .go file: %s" % s) + basename = s[:-3] + if basename.rfind("/") >= 0: + basename = basename[basename.rfind("/") + 1:] + go_thunks.append(outgen + "/" + basename + ".go") + c_thunks.append(outgen + "/" + basename + ".c") + + outs = struct( + name = name, + outdir = outgen, + go_thunks = go_thunks, + c_thunks = c_thunks, + c_exports = [ + outgen + "/_cgo_export.c", + outgen + "/_cgo_export.h", + ], + c_dummy = outgen + "/_cgo_main.c", + gotypes = outgen + "/_cgo_gotypes.go", + ) + + _cgo_codegen_rule( + name = name, + srcs = srcs, + c_hdrs = c_hdrs, + deps = deps, + copts = copts, + linkopts = linkopts, + go_tool = go_tool, + toolchain = toolchain, + outdir = outdir, + outs = outs.go_thunks + outs.c_thunks + outs.c_exports + [ + outs.c_dummy, + outs.gotypes, + ], + visibility = ["//visibility:private"], + ) + return outs + +def _cgo_import_impl(ctx): + cmds = [ + (ctx.file.go_tool.path + " tool cgo" + + " -dynout " + ctx.outputs.out.path + + " -dynimport " + ctx.file.cgo_o.path + + " -dynpackage $(%s %s)" % ( + ctx.executable._extract_package.path, + ctx.file.sample_go_src.path, + )), + ] + f = _emit_generate_params_action(cmds, ctx, ctx.outputs.out.path + ".CGoImportGenFile.params") + ctx.action( + inputs = (ctx.files.toolchain + + [ + f, + ctx.file.go_tool, + ctx.executable._extract_package, + ctx.file.cgo_o, + ctx.file.sample_go_src, + ]), + outputs = [ctx.outputs.out], + command = f.path, + mnemonic = "CGoImportGen", + env = go_environment_vars(ctx), + ) + return struct( + files = depset([ctx.outputs.out]), + ) + +_cgo_import = rule( + _cgo_import_impl, + attrs = go_env_attrs + { + "cgo_o": attr.label( + allow_files = True, + single_file = True, + ), + "sample_go_src": attr.label( + allow_files = True, + single_file = True, + ), + "out": attr.output( + mandatory = True, + ), + "_extract_package": attr.label( + default = Label("//go/tools/extract_package"), + executable = True, + cfg = "host", + ), + }, + fragments = ["cpp"], +) + +def _cgo_genrule_impl(ctx): + return struct( + label = ctx.label, + go_sources = ctx.files.srcs, + asm_sources = [], + asm_headers = [], + cgo_object = ctx.attr.cgo_object, + direct_deps = ctx.attr.deps, + gc_goopts = [], + ) + +_cgo_genrule = rule( + _cgo_genrule_impl, + attrs = { + "srcs": attr.label_list(allow_files = FileType([".go"])), + "cgo_object": attr.label( + providers = [ + "cgo_obj", + "cgo_deps", + ], + ), + "deps": attr.label_list( + providers = [ + "direct_deps", + "transitive_go_library_paths", + "transitive_go_libraries", + "transitive_cgo_deps", + ], + ), + }, + fragments = ["cpp"], +) + +"""Generates symbol-import directives for cgo + +Args: + cgo_o: The loadable object to extract dynamic symbols from. + sample_go_src: A go source which is compiled together with the generated file. + The generated file will have the same Go package name as this file. + out: Destination of the generated codes. +""" + +def _cgo_object_impl(ctx): + arguments = _c_linker_options(ctx, blocklist = [ + # never link any dependency libraries + "-l", + "-L", + # manage flags to ld(1) by ourselves + "-Wl,", + ]) + arguments += [ + "-o", + ctx.outputs.out.path, + "-nostdlib", + "-Wl,-r", + ] + if _is_darwin_cpu(ctx): + arguments += ["-shared", "-Wl,-all_load"] + else: + arguments += ["-Wl,-whole-archive"] + + lo = ctx.files.src[-1] + arguments += [lo.path] + + ctx.action( + inputs = [lo] + ctx.files._crosstool, + outputs = [ctx.outputs.out], + mnemonic = "CGoObject", + progress_message = "Linking %s" % ctx.outputs.out.short_path, + executable = ctx.fragments.cpp.compiler_executable, + arguments = arguments, + ) + runfiles = ctx.runfiles(collect_data = True) + runfiles = runfiles.merge(ctx.attr.src.data_runfiles) + return struct( + files = depset([ctx.outputs.out]), + cgo_obj = ctx.outputs.out, + cgo_deps = ctx.attr.cgogen.cgo_deps, + runfiles = runfiles, + ) + +_cgo_object = rule( + _cgo_object_impl, + attrs = _crosstool_attrs + { + "src": attr.label( + mandatory = True, + providers = ["cc"], + ), + "cgogen": attr.label( + mandatory = True, + providers = ["cgo_deps"], + ), + "out": attr.output( + mandatory = True, + ), + }, + fragments = ["cpp"], +) + +"""Generates _all.o to be archived together with Go objects. + +Args: + src: source static library which contains objects + cgogen: _cgo_codegen rule which knows the dependency cc_library() rules + to be linked together with src when we generate the final go binary. +""" + +def _setup_cgo_library(name, srcs, cdeps, copts, clinkopts, go_tool, toolchain): + go_srcs = [s for s in srcs if s.endswith(".go")] + c_hdrs = [s for s in srcs if any([s.endswith(ext) for ext in hdr_exts])] + c_srcs = [s for s in srcs if not s in (go_srcs + c_hdrs)] + + # Split cgo files into .go parts and .c parts (plus some other files). + cgogen = _cgo_codegen( + name = name + ".cgo", + srcs = go_srcs, + c_hdrs = c_hdrs, + deps = cdeps, + copts = copts, + linkopts = clinkopts, + go_tool = go_tool, + toolchain = toolchain, + ) + + # Filter c_srcs with build constraints. + c_filtered_srcs = [] + if len(c_srcs) > 0: + c_filtered_srcs_name = name + "_filter_cgo_srcs" + _cgo_filter_srcs( + name = c_filtered_srcs_name, + srcs = c_srcs, + ) + c_filtered_srcs.append(":" + c_filtered_srcs_name) + + pkg_dir = _pkg_dir( + "external/" + REPOSITORY_NAME[1:] if len(REPOSITORY_NAME) > 1 else "", + PACKAGE_NAME, + ) + + # Platform-specific settings + native.config_setting( + name = name + "_windows_setting", + values = { + "cpu": "x64_windows_msvc", + }, + ) + platform_copts = select({ + ":" + name + "_windows_setting": ["-mthreads"], + "//conditions:default": ["-pthread"], + }) + platform_linkopts = select({ + ":" + name + "_windows_setting": ["-mthreads"], + "//conditions:default": ["-pthread"], + }) + + # Bundles objects into an archive so that _cgo_.o and _all.o can share them. + native.cc_library( + name = cgogen.outdir + "/_cgo_lib", + srcs = cgogen.c_thunks + cgogen.c_exports + c_filtered_srcs + c_hdrs, + deps = cdeps, + copts = copts + platform_copts + [ + "-I", + pkg_dir, + "-I", + "$(GENDIR)/" + pkg_dir + "/" + cgogen.outdir, + # The generated thunks often contain unused variables. + "-Wno-unused-variable", + ], + linkopts = clinkopts + platform_linkopts, + linkstatic = 1, + # _cgo_.o and _all.o keep all objects in this archive. + # But it should not be very annoying in the final binary target + # because _cgo_object rule does not propagate alwayslink=1 + alwayslink = 1, + visibility = ["//visibility:private"], + ) + + # Loadable object which cgo reads when it generates _cgo_import.go + native.cc_binary( + name = cgogen.outdir + "/_cgo_.o", + srcs = [cgogen.c_dummy], + deps = cdeps + [cgogen.outdir + "/_cgo_lib"], + copts = copts, + linkopts = clinkopts, + visibility = ["//visibility:private"], + ) + _cgo_import( + name = "%s.cgo.importgen" % name, + cgo_o = cgogen.outdir + "/_cgo_.o", + out = cgogen.outdir + "/_cgo_import.go", + sample_go_src = go_srcs[0], + go_tool = go_tool, + toolchain = toolchain, + visibility = ["//visibility:private"], + ) + + _cgo_object( + name = cgogen.outdir + "/_cgo_object", + src = cgogen.outdir + "/_cgo_lib", + out = cgogen.outdir + "/_all.o", + cgogen = cgogen.name, + visibility = ["//visibility:private"], + ) + return cgogen + +def cgo_genrule( + name, + srcs, + copts = [], + clinkopts = [], + cdeps = [], + **kwargs): + cgogen = _setup_cgo_library( + name = name, + srcs = srcs, + cdeps = cdeps, + copts = copts, + clinkopts = clinkopts, + toolchain = None, + go_tool = None, + ) + _cgo_genrule( + name = name, + srcs = cgogen.go_thunks + [ + cgogen.gotypes, + cgogen.outdir + "/_cgo_import.go", + ], + cgo_object = cgogen.outdir + "/_cgo_object", + **kwargs + ) + +def cgo_library( + name, + srcs, + toolchain = None, + go_tool = None, + copts = [], + clinkopts = [], + cdeps = [], + **kwargs): + """Builds a cgo-enabled go library. + + Args: + name: A unique name for this rule. + srcs: List of Go, C and C++ files that are processed to build a Go library. + Those Go files must contain `import "C"`. + C and C++ files can be anything allowed in `srcs` attribute of + `cc_library`. + copts: Add these flags to the C++ compiler. + clinkopts: Add these flags to the C++ linker. + cdeps: List of C/C++ libraries to be linked into the binary target. + They must be `cc_library` rules. + deps: List of other libraries to be linked to this library target. + data: List of files needed by this rule at runtime. + + NOTE: + `srcs` cannot contain pure-Go files, which do not have `import "C"`. + So you need to define another `go_library` when you build a go package with + both cgo-enabled and pure-Go sources. + + ``` + cgo_library( + name = "cgo_enabled", + srcs = ["cgo-enabled.go", "foo.cc", "bar.S", "baz.a"], + ) + + go_library( + name = "go_default_library", + srcs = ["pure-go.go"], + library = ":cgo_enabled", + ) + ``` + """ + cgogen = _setup_cgo_library( + name = name, + srcs = srcs, + cdeps = cdeps, + copts = copts, + clinkopts = clinkopts, + go_tool = go_tool, + toolchain = toolchain, + ) + + go_library( + name = name, + srcs = cgogen.go_thunks + [ + cgogen.gotypes, + cgogen.outdir + "/_cgo_import.go", + ], + cgo_object = cgogen.outdir + "/_cgo_object", + go_tool = go_tool, + toolchain = toolchain, + **kwargs + ) diff --git a/syntax/walk.go b/syntax/walk.go new file mode 100644 index 0000000..1491149 --- /dev/null +++ b/syntax/walk.go @@ -0,0 +1,163 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// Walk traverses a syntax tree in depth-first order. +// It starts by calling f(n); n must not be nil. +// If f returns true, Walk calls itself +// recursively for each non-nil child of n. +// Walk then calls f(nil). +func Walk(n Node, f func(Node) bool) { + if n == nil { + panic("nil") + } + if !f(n) { + return + } + + // TODO(adonovan): opt: order cases using profile data. + switch n := n.(type) { + case *File: + walkStmts(n.Stmts, f) + + case *ExprStmt: + Walk(n.X, f) + + case *BranchStmt: + // no-op + + case *IfStmt: + Walk(n.Cond, f) + walkStmts(n.True, f) + walkStmts(n.False, f) + + case *AssignStmt: + Walk(n.LHS, f) + Walk(n.RHS, f) + + case *DefStmt: + Walk(n.Name, f) + for _, param := range n.Params { + Walk(param, f) + } + walkStmts(n.Body, f) + + case *ForStmt: + Walk(n.Vars, f) + Walk(n.X, f) + walkStmts(n.Body, f) + + case *ReturnStmt: + if n.Result != nil { + Walk(n.Result, f) + } + + case *LoadStmt: + Walk(n.Module, f) + for _, from := range n.From { + Walk(from, f) + } + for _, to := range n.To { + Walk(to, f) + } + + case *Ident, *Literal: + // no-op + + case *ListExpr: + for _, x := range n.List { + Walk(x, f) + } + + case *ParenExpr: + Walk(n.X, f) + + case *CondExpr: + Walk(n.Cond, f) + Walk(n.True, f) + Walk(n.False, f) + + case *IndexExpr: + Walk(n.X, f) + Walk(n.Y, f) + + case *DictEntry: + Walk(n.Key, f) + Walk(n.Value, f) + + case *SliceExpr: + Walk(n.X, f) + if n.Lo != nil { + Walk(n.Lo, f) + } + if n.Hi != nil { + Walk(n.Hi, f) + } + if n.Step != nil { + Walk(n.Step, f) + } + + case *Comprehension: + Walk(n.Body, f) + for _, clause := range n.Clauses { + Walk(clause, f) + } + + case *IfClause: + Walk(n.Cond, f) + + case *ForClause: + Walk(n.Vars, f) + Walk(n.X, f) + + case *TupleExpr: + for _, x := range n.List { + Walk(x, f) + } + + case *DictExpr: + for _, entry := range n.List { + entry := entry.(*DictEntry) + Walk(entry.Key, f) + Walk(entry.Value, f) + } + + case *UnaryExpr: + if n.X != nil { + Walk(n.X, f) + } + + case *BinaryExpr: + Walk(n.X, f) + Walk(n.Y, f) + + case *DotExpr: + Walk(n.X, f) + Walk(n.Name, f) + + case *CallExpr: + Walk(n.Fn, f) + for _, arg := range n.Args { + Walk(arg, f) + } + + case *LambdaExpr: + for _, param := range n.Params { + Walk(param, f) + } + Walk(n.Body, f) + + default: + panic(n) + } + + f(nil) +} + +func walkStmts(stmts []Stmt, f func(Node) bool) { + for _, stmt := range stmts { + Walk(stmt, f) + } +} diff --git a/syntax/walk_test.go b/syntax/walk_test.go new file mode 100644 index 0000000..00d9784 --- /dev/null +++ b/syntax/walk_test.go @@ -0,0 +1,103 @@ +package syntax_test + +import ( + "bytes" + "fmt" + "log" + "reflect" + "strings" + "testing" + + "go.starlark.net/syntax" +) + +func TestWalk(t *testing.T) { + const src = ` +for x in y: + if x: + pass + else: + f([2*x for x in "abc"]) +` + // TODO(adonovan): test that it finds all syntax.Nodes + // (compare against a reflect-based implementation). + // TODO(adonovan): test that the result of f is used to prune + // the descent. + f, err := syntax.Parse("hello.go", src, 0) + if err != nil { + t.Fatal(err) + } + + var buf bytes.Buffer + var depth int + syntax.Walk(f, func(n syntax.Node) bool { + if n == nil { + depth-- + return true + } + fmt.Fprintf(&buf, "%s%s\n", + strings.Repeat(" ", depth), + strings.TrimPrefix(reflect.TypeOf(n).String(), "*syntax.")) + depth++ + return true + }) + got := buf.String() + want := ` +File + ForStmt + Ident + Ident + IfStmt + Ident + BranchStmt + ExprStmt + CallExpr + Ident + Comprehension + BinaryExpr + Literal + Ident + ForClause + Ident + Literal` + got = strings.TrimSpace(got) + want = strings.TrimSpace(want) + if got != want { + t.Errorf("got %s, want %s", got, want) + } +} + +// ExampleWalk demonstrates the use of Walk to +// enumerate the identifiers in a Starlark source file +// containing a nonsense program with varied grammar. +func ExampleWalk() { + const src = ` +load("library", "a") + +def b(c, *, d=e): + f += {g: h} + i = -(j) + return k.l[m + n] + +for o in [p for q, r in s if t]: + u(lambda: v, w[x:y:z]) +` + f, err := syntax.Parse("hello.star", src, 0) + if err != nil { + log.Fatal(err) + } + + var idents []string + syntax.Walk(f, func(n syntax.Node) bool { + if id, ok := n.(*syntax.Ident); ok { + idents = append(idents, id.Name) + } + return true + }) + fmt.Println(strings.Join(idents, " ")) + + // The identifer 'a' appears in both LoadStmt.From[0] and LoadStmt.To[0]. + + // Output: + // a a b c d e f g h i j k l m n o p q r s t u v w x y z +} |