1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfetch
6
7import (
8	"context"
9	"fmt"
10	"io"
11	"io/fs"
12	"os"
13	"strconv"
14	"time"
15
16	"cmd/go/internal/cfg"
17	"cmd/go/internal/modfetch/codehost"
18	"cmd/go/internal/par"
19	"cmd/go/internal/vcs"
20	web "cmd/go/internal/web"
21
22	"golang.org/x/mod/module"
23)
24
25const traceRepo = false // trace all repo actions, for debugging
26
27// A Repo represents a repository storing all versions of a single module.
28// It must be safe for simultaneous use by multiple goroutines.
29type Repo interface {
30	// ModulePath returns the module path.
31	ModulePath() string
32
33	// CheckReuse checks whether the validation criteria in the origin
34	// are still satisfied on the server corresponding to this module.
35	// If so, the caller can reuse any cached Versions or RevInfo containing
36	// this origin rather than redownloading those from the server.
37	CheckReuse(ctx context.Context, old *codehost.Origin) error
38
39	// Versions lists all known versions with the given prefix.
40	// Pseudo-versions are not included.
41	//
42	// Versions should be returned sorted in semver order
43	// (implementations can use semver.Sort).
44	//
45	// Versions returns a non-nil error only if there was a problem
46	// fetching the list of versions: it may return an empty list
47	// along with a nil error if the list of matching versions
48	// is known to be empty.
49	//
50	// If the underlying repository does not exist,
51	// Versions returns an error matching errors.Is(_, os.NotExist).
52	Versions(ctx context.Context, prefix string) (*Versions, error)
53
54	// Stat returns information about the revision rev.
55	// A revision can be any identifier known to the underlying service:
56	// commit hash, branch, tag, and so on.
57	Stat(ctx context.Context, rev string) (*RevInfo, error)
58
59	// Latest returns the latest revision on the default branch,
60	// whatever that means in the underlying source code repository.
61	// It is only used when there are no tagged versions.
62	Latest(ctx context.Context) (*RevInfo, error)
63
64	// GoMod returns the go.mod file for the given version.
65	GoMod(ctx context.Context, version string) (data []byte, err error)
66
67	// Zip writes a zip file for the given version to dst.
68	Zip(ctx context.Context, dst io.Writer, version string) error
69}
70
71// A Versions describes the available versions in a module repository.
72type Versions struct {
73	Origin *codehost.Origin `json:",omitempty"` // origin information for reuse
74
75	List []string // semver versions
76}
77
78// A RevInfo describes a single revision in a module repository.
79type RevInfo struct {
80	Version string    // suggested version string for this revision
81	Time    time.Time // commit time
82
83	// These fields are used for Stat of arbitrary rev,
84	// but they are not recorded when talking about module versions.
85	Name  string `json:"-"` // complete ID in underlying repository
86	Short string `json:"-"` // shortened ID, for use in pseudo-version
87
88	Origin *codehost.Origin `json:",omitempty"` // provenance for reuse
89}
90
91// Re: module paths, import paths, repository roots, and lookups
92//
93// A module is a collection of Go packages stored in a file tree
94// with a go.mod file at the root of the tree.
95// The go.mod defines the module path, which is the import path
96// corresponding to the root of the file tree.
97// The import path of a directory within that file tree is the module path
98// joined with the name of the subdirectory relative to the root.
99//
100// For example, the module with path rsc.io/qr corresponds to the
101// file tree in the repository https://github.com/rsc/qr.
102// That file tree has a go.mod that says "module rsc.io/qr".
103// The package in the root directory has import path "rsc.io/qr".
104// The package in the gf256 subdirectory has import path "rsc.io/qr/gf256".
105// In this example, "rsc.io/qr" is both a module path and an import path.
106// But "rsc.io/qr/gf256" is only an import path, not a module path:
107// it names an importable package, but not a module.
108//
109// As a special case to incorporate code written before modules were
110// introduced, if a path p resolves using the pre-module "go get" lookup
111// to the root of a source code repository without a go.mod file,
112// that repository is treated as if it had a go.mod in its root directory
113// declaring module path p. (The go.mod is further considered to
114// contain requirements corresponding to any legacy version
115// tracking format such as Gopkg.lock, vendor/vendor.conf, and so on.)
116//
117// The presentation so far ignores the fact that a source code repository
118// has many different versions of a file tree, and those versions may
119// differ in whether a particular go.mod exists and what it contains.
120// In fact there is a well-defined mapping only from a module path, version
121// pair - often written path@version - to a particular file tree.
122// For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of
123// repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod.
124// Because the "go get" import paths rsc.io/qr and github.com/rsc/qr
125// both redirect to the Git repository https://github.com/rsc/qr,
126// github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0
127// but a different module (a different name). In contrast, since v0.2.0
128// of that repository has an explicit go.mod that declares path rsc.io/qr,
129// github.com/rsc/qr@v0.2.0 is an invalid module path, version pair.
130// Before modules, import comments would have had the same effect.
131//
132// The set of import paths associated with a given module path is
133// clearly not fixed: at the least, new directories with new import paths
134// can always be added. But another potential operation is to split a
135// subtree out of a module into its own module. If done carefully,
136// this operation can be done while preserving compatibility for clients.
137// For example, suppose that we want to split rsc.io/qr/gf256 into its
138// own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256.
139// Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory)
140// and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod
141// cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires
142// rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be
143// using an older rsc.io/qr module that includes the gf256 package, but if
144// it adds a requirement on either the newer rsc.io/qr or the newer
145// rsc.io/qr/gf256 module, it will automatically add the requirement
146// on the complementary half, ensuring both that rsc.io/qr/gf256 is
147// available for importing by the build and also that it is only defined
148// by a single module. The gf256 package could move back into the
149// original by another simultaneous release of rsc.io/qr v0.4.0 including
150// the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code
151// in its root directory, along with a new requirement cycle.
152// The ability to shift module boundaries in this way is expected to be
153// important in large-scale program refactorings, similar to the ones
154// described in https://talks.golang.org/2016/refactor.article.
155//
156// The possibility of shifting module boundaries reemphasizes
157// that you must know both the module path and its version
158// to determine the set of packages provided directly by that module.
159//
160// On top of all this, it is possible for a single code repository
161// to contain multiple modules, either in branches or subdirectories,
162// as a limited kind of monorepo. For example rsc.io/qr/v2,
163// the v2.x.x continuation of rsc.io/qr, is expected to be found
164// in v2-tagged commits in https://github.com/rsc/qr, either
165// in the root or in a v2 subdirectory, disambiguated by go.mod.
166// Again the precise file tree corresponding to a module
167// depends on which version we are considering.
168//
169// It is also possible for the underlying repository to change over time,
170// without changing the module path. If I copy the github repo over
171// to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1,
172// then clients of all versions should start fetching from bitbucket
173// instead of github. That is, in contrast to the exact file tree,
174// the location of the source code repository associated with a module path
175// does not depend on the module version. (This is by design, as the whole
176// point of these redirects is to allow package authors to establish a stable
177// name that can be updated as code moves from one service to another.)
178//
179// All of this is important background for the lookup APIs defined in this
180// file.
181//
182// The Lookup function takes a module path and returns a Repo representing
183// that module path. Lookup can do only a little with the path alone.
184// It can check that the path is well-formed (see semver.CheckPath)
185// and it can check that the path can be resolved to a target repository.
186// To avoid version control access except when absolutely necessary,
187// Lookup does not attempt to connect to the repository itself.
188
189var lookupCache par.Cache[lookupCacheKey, Repo]
190
191type lookupCacheKey struct {
192	proxy, path string
193}
194
195// Lookup returns the module with the given module path,
196// fetched through the given proxy.
197//
198// The distinguished proxy "direct" indicates that the path should be fetched
199// from its origin, and "noproxy" indicates that the patch should be fetched
200// directly only if GONOPROXY matches the given path.
201//
202// For the distinguished proxy "off", Lookup always returns a Repo that returns
203// a non-nil error for every method call.
204//
205// A successful return does not guarantee that the module
206// has any defined versions.
207func Lookup(ctx context.Context, proxy, path string) Repo {
208	if traceRepo {
209		defer logCall("Lookup(%q, %q)", proxy, path)()
210	}
211
212	return lookupCache.Do(lookupCacheKey{proxy, path}, func() Repo {
213		return newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) {
214			r, err := lookup(ctx, proxy, path)
215			if err == nil && traceRepo {
216				r = newLoggingRepo(r)
217			}
218			return r, err
219		})
220	})
221}
222
223// lookup returns the module with the given module path.
224func lookup(ctx context.Context, proxy, path string) (r Repo, err error) {
225	if cfg.BuildMod == "vendor" {
226		return nil, errLookupDisabled
227	}
228
229	switch path {
230	case "go", "toolchain":
231		return &toolchainRepo{path, Lookup(ctx, proxy, "golang.org/toolchain")}, nil
232	}
233
234	if module.MatchPrefixPatterns(cfg.GONOPROXY, path) {
235		switch proxy {
236		case "noproxy", "direct":
237			return lookupDirect(ctx, path)
238		default:
239			return nil, errNoproxy
240		}
241	}
242
243	switch proxy {
244	case "off":
245		return errRepo{path, errProxyOff}, nil
246	case "direct":
247		return lookupDirect(ctx, path)
248	case "noproxy":
249		return nil, errUseProxy
250	default:
251		return newProxyRepo(proxy, path)
252	}
253}
254
255type lookupDisabledError struct{}
256
257func (lookupDisabledError) Error() string {
258	if cfg.BuildModReason == "" {
259		return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod)
260	}
261	return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason)
262}
263
264var errLookupDisabled error = lookupDisabledError{}
265
266var (
267	errProxyOff       = notExistErrorf("module lookup disabled by GOPROXY=off")
268	errNoproxy  error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY")
269	errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY")
270)
271
272func lookupDirect(ctx context.Context, path string) (Repo, error) {
273	security := web.SecureOnly
274
275	if module.MatchPrefixPatterns(cfg.GOINSECURE, path) {
276		security = web.Insecure
277	}
278	rr, err := vcs.RepoRootForImportPath(path, vcs.PreferMod, security)
279	if err != nil {
280		// We don't know where to find code for a module with this path.
281		return nil, notExistError{err: err}
282	}
283
284	if rr.VCS.Name == "mod" {
285		// Fetch module from proxy with base URL rr.Repo.
286		return newProxyRepo(rr.Repo, path)
287	}
288
289	code, err := lookupCodeRepo(ctx, rr)
290	if err != nil {
291		return nil, err
292	}
293	return newCodeRepo(code, rr.Root, path)
294}
295
296func lookupCodeRepo(ctx context.Context, rr *vcs.RepoRoot) (codehost.Repo, error) {
297	code, err := codehost.NewRepo(ctx, rr.VCS.Cmd, rr.Repo)
298	if err != nil {
299		if _, ok := err.(*codehost.VCSError); ok {
300			return nil, err
301		}
302		return nil, fmt.Errorf("lookup %s: %v", rr.Root, err)
303	}
304	return code, nil
305}
306
307// A loggingRepo is a wrapper around an underlying Repo
308// that prints a log message at the start and end of each call.
309// It can be inserted when debugging.
310type loggingRepo struct {
311	r Repo
312}
313
314func newLoggingRepo(r Repo) *loggingRepo {
315	return &loggingRepo{r}
316}
317
318// logCall prints a log message using format and args and then
319// also returns a function that will print the same message again,
320// along with the elapsed time.
321// Typical usage is:
322//
323//	defer logCall("hello %s", arg)()
324//
325// Note the final ().
326func logCall(format string, args ...any) func() {
327	start := time.Now()
328	fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...))
329	return func() {
330		fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...))
331	}
332}
333
334func (l *loggingRepo) ModulePath() string {
335	return l.r.ModulePath()
336}
337
338func (l *loggingRepo) CheckReuse(ctx context.Context, old *codehost.Origin) (err error) {
339	defer func() {
340		logCall("CheckReuse[%s]: %v", l.r.ModulePath(), err)
341	}()
342	return l.r.CheckReuse(ctx, old)
343}
344
345func (l *loggingRepo) Versions(ctx context.Context, prefix string) (*Versions, error) {
346	defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)()
347	return l.r.Versions(ctx, prefix)
348}
349
350func (l *loggingRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
351	defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)()
352	return l.r.Stat(ctx, rev)
353}
354
355func (l *loggingRepo) Latest(ctx context.Context) (*RevInfo, error) {
356	defer logCall("Repo[%s]: Latest()", l.r.ModulePath())()
357	return l.r.Latest(ctx)
358}
359
360func (l *loggingRepo) GoMod(ctx context.Context, version string) ([]byte, error) {
361	defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)()
362	return l.r.GoMod(ctx, version)
363}
364
365func (l *loggingRepo) Zip(ctx context.Context, dst io.Writer, version string) error {
366	dstName := "_"
367	if dst, ok := dst.(interface{ Name() string }); ok {
368		dstName = strconv.Quote(dst.Name())
369	}
370	defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)()
371	return l.r.Zip(ctx, dst, version)
372}
373
374// errRepo is a Repo that returns the same error for all operations.
375//
376// It is useful in conjunction with caching, since cache hits will not attempt
377// the prohibited operations.
378type errRepo struct {
379	modulePath string
380	err        error
381}
382
383func (r errRepo) ModulePath() string { return r.modulePath }
384
385func (r errRepo) CheckReuse(ctx context.Context, old *codehost.Origin) error     { return r.err }
386func (r errRepo) Versions(ctx context.Context, prefix string) (*Versions, error) { return nil, r.err }
387func (r errRepo) Stat(ctx context.Context, rev string) (*RevInfo, error)         { return nil, r.err }
388func (r errRepo) Latest(ctx context.Context) (*RevInfo, error)                   { return nil, r.err }
389func (r errRepo) GoMod(ctx context.Context, version string) ([]byte, error)      { return nil, r.err }
390func (r errRepo) Zip(ctx context.Context, dst io.Writer, version string) error   { return r.err }
391
392// A notExistError is like fs.ErrNotExist, but with a custom message
393type notExistError struct {
394	err error
395}
396
397func notExistErrorf(format string, args ...any) error {
398	return notExistError{fmt.Errorf(format, args...)}
399}
400
401func (e notExistError) Error() string {
402	return e.err.Error()
403}
404
405func (notExistError) Is(target error) bool {
406	return target == fs.ErrNotExist
407}
408
409func (e notExistError) Unwrap() error {
410	return e.err
411}
412