1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package codehost
6
7import (
8	"context"
9	"errors"
10	"fmt"
11	"internal/lazyregexp"
12	"io"
13	"io/fs"
14	"os"
15	"path/filepath"
16	"sort"
17	"strconv"
18	"strings"
19	"sync"
20	"time"
21
22	"cmd/go/internal/base"
23	"cmd/go/internal/lockedfile"
24	"cmd/go/internal/par"
25	"cmd/go/internal/str"
26)
27
28// A VCSError indicates an error using a version control system.
29// The implication of a VCSError is that we know definitively where
30// to get the code, but we can't access it due to the error.
31// The caller should report this error instead of continuing to probe
32// other possible module paths.
33//
34// TODO(golang.org/issue/31730): See if we can invert this. (Return a
35// distinguished error for “repo not found” and treat everything else
36// as terminal.)
37type VCSError struct {
38	Err error
39}
40
41func (e *VCSError) Error() string { return e.Err.Error() }
42
43func (e *VCSError) Unwrap() error { return e.Err }
44
45func vcsErrorf(format string, a ...any) error {
46	return &VCSError{Err: fmt.Errorf(format, a...)}
47}
48
49type vcsCacheKey struct {
50	vcs    string
51	remote string
52}
53
54func NewRepo(ctx context.Context, vcs, remote string) (Repo, error) {
55	return vcsRepoCache.Do(vcsCacheKey{vcs, remote}, func() (Repo, error) {
56		repo, err := newVCSRepo(ctx, vcs, remote)
57		if err != nil {
58			return nil, &VCSError{err}
59		}
60		return repo, nil
61	})
62}
63
64var vcsRepoCache par.ErrCache[vcsCacheKey, Repo]
65
66type vcsRepo struct {
67	mu lockedfile.Mutex // protects all commands, so we don't have to decide which are safe on a per-VCS basis
68
69	remote string
70	cmd    *vcsCmd
71	dir    string
72
73	tagsOnce sync.Once
74	tags     map[string]bool
75
76	branchesOnce sync.Once
77	branches     map[string]bool
78
79	fetchOnce sync.Once
80	fetchErr  error
81}
82
83func newVCSRepo(ctx context.Context, vcs, remote string) (Repo, error) {
84	if vcs == "git" {
85		return newGitRepo(ctx, remote, false)
86	}
87	cmd := vcsCmds[vcs]
88	if cmd == nil {
89		return nil, fmt.Errorf("unknown vcs: %s %s", vcs, remote)
90	}
91	if !strings.Contains(remote, "://") {
92		return nil, fmt.Errorf("invalid vcs remote: %s %s", vcs, remote)
93	}
94
95	r := &vcsRepo{remote: remote, cmd: cmd}
96	var err error
97	r.dir, r.mu.Path, err = WorkDir(ctx, vcsWorkDirType+vcs, r.remote)
98	if err != nil {
99		return nil, err
100	}
101
102	if cmd.init == nil {
103		return r, nil
104	}
105
106	unlock, err := r.mu.Lock()
107	if err != nil {
108		return nil, err
109	}
110	defer unlock()
111
112	if _, err := os.Stat(filepath.Join(r.dir, "."+vcs)); err != nil {
113		release, err := base.AcquireNet()
114		if err != nil {
115			return nil, err
116		}
117		_, err = Run(ctx, r.dir, cmd.init(r.remote))
118		release()
119
120		if err != nil {
121			os.RemoveAll(r.dir)
122			return nil, err
123		}
124	}
125	return r, nil
126}
127
128const vcsWorkDirType = "vcs1."
129
130type vcsCmd struct {
131	vcs           string                                                                              // vcs name "hg"
132	init          func(remote string) []string                                                        // cmd to init repo to track remote
133	tags          func(remote string) []string                                                        // cmd to list local tags
134	tagRE         *lazyregexp.Regexp                                                                  // regexp to extract tag names from output of tags cmd
135	branches      func(remote string) []string                                                        // cmd to list local branches
136	branchRE      *lazyregexp.Regexp                                                                  // regexp to extract branch names from output of tags cmd
137	badLocalRevRE *lazyregexp.Regexp                                                                  // regexp of names that must not be served out of local cache without doing fetch first
138	statLocal     func(rev, remote string) []string                                                   // cmd to stat local rev
139	parseStat     func(rev, out string) (*RevInfo, error)                                             // cmd to parse output of statLocal
140	fetch         []string                                                                            // cmd to fetch everything from remote
141	latest        string                                                                              // name of latest commit on remote (tip, HEAD, etc)
142	readFile      func(rev, file, remote string) []string                                             // cmd to read rev's file
143	readZip       func(rev, subdir, remote, target string) []string                                   // cmd to read rev's subdir as zip file
144	doReadZip     func(ctx context.Context, dst io.Writer, workDir, rev, subdir, remote string) error // arbitrary function to read rev's subdir as zip file
145}
146
147var re = lazyregexp.New
148
149var vcsCmds = map[string]*vcsCmd{
150	"hg": {
151		vcs: "hg",
152		init: func(remote string) []string {
153			return []string{"hg", "clone", "-U", "--", remote, "."}
154		},
155		tags: func(remote string) []string {
156			return []string{"hg", "tags", "-q"}
157		},
158		tagRE: re(`(?m)^[^\n]+$`),
159		branches: func(remote string) []string {
160			return []string{"hg", "branches", "-c", "-q"}
161		},
162		branchRE:      re(`(?m)^[^\n]+$`),
163		badLocalRevRE: re(`(?m)^(tip)$`),
164		statLocal: func(rev, remote string) []string {
165			return []string{"hg", "log", "-l1", "-r", rev, "--template", "{node} {date|hgdate} {tags}"}
166		},
167		parseStat: hgParseStat,
168		fetch:     []string{"hg", "pull", "-f"},
169		latest:    "tip",
170		readFile: func(rev, file, remote string) []string {
171			return []string{"hg", "cat", "-r", rev, file}
172		},
173		readZip: func(rev, subdir, remote, target string) []string {
174			pattern := []string{}
175			if subdir != "" {
176				pattern = []string{"-I", subdir + "/**"}
177			}
178			return str.StringList("hg", "archive", "-t", "zip", "--no-decode", "-r", rev, "--prefix=prefix/", pattern, "--", target)
179		},
180	},
181
182	"svn": {
183		vcs:  "svn",
184		init: nil, // no local checkout
185		tags: func(remote string) []string {
186			return []string{"svn", "list", "--", strings.TrimSuffix(remote, "/trunk") + "/tags"}
187		},
188		tagRE: re(`(?m)^(.*?)/?$`),
189		statLocal: func(rev, remote string) []string {
190			suffix := "@" + rev
191			if rev == "latest" {
192				suffix = ""
193			}
194			return []string{"svn", "log", "-l1", "--xml", "--", remote + suffix}
195		},
196		parseStat: svnParseStat,
197		latest:    "latest",
198		readFile: func(rev, file, remote string) []string {
199			return []string{"svn", "cat", "--", remote + "/" + file + "@" + rev}
200		},
201		doReadZip: svnReadZip,
202	},
203
204	"bzr": {
205		vcs: "bzr",
206		init: func(remote string) []string {
207			return []string{"bzr", "branch", "--use-existing-dir", "--", remote, "."}
208		},
209		fetch: []string{
210			"bzr", "pull", "--overwrite-tags",
211		},
212		tags: func(remote string) []string {
213			return []string{"bzr", "tags"}
214		},
215		tagRE:         re(`(?m)^\S+`),
216		badLocalRevRE: re(`^revno:-`),
217		statLocal: func(rev, remote string) []string {
218			return []string{"bzr", "log", "-l1", "--long", "--show-ids", "-r", rev}
219		},
220		parseStat: bzrParseStat,
221		latest:    "revno:-1",
222		readFile: func(rev, file, remote string) []string {
223			return []string{"bzr", "cat", "-r", rev, file}
224		},
225		readZip: func(rev, subdir, remote, target string) []string {
226			extra := []string{}
227			if subdir != "" {
228				extra = []string{"./" + subdir}
229			}
230			return str.StringList("bzr", "export", "--format=zip", "-r", rev, "--root=prefix/", "--", target, extra)
231		},
232	},
233
234	"fossil": {
235		vcs: "fossil",
236		init: func(remote string) []string {
237			return []string{"fossil", "clone", "--", remote, ".fossil"}
238		},
239		fetch: []string{"fossil", "pull", "-R", ".fossil"},
240		tags: func(remote string) []string {
241			return []string{"fossil", "tag", "-R", ".fossil", "list"}
242		},
243		tagRE: re(`XXXTODO`),
244		statLocal: func(rev, remote string) []string {
245			return []string{"fossil", "info", "-R", ".fossil", rev}
246		},
247		parseStat: fossilParseStat,
248		latest:    "trunk",
249		readFile: func(rev, file, remote string) []string {
250			return []string{"fossil", "cat", "-R", ".fossil", "-r", rev, file}
251		},
252		readZip: func(rev, subdir, remote, target string) []string {
253			extra := []string{}
254			if subdir != "" && !strings.ContainsAny(subdir, "*?[],") {
255				extra = []string{"--include", subdir}
256			}
257			// Note that vcsRepo.ReadZip below rewrites this command
258			// to run in a different directory, to work around a fossil bug.
259			return str.StringList("fossil", "zip", "-R", ".fossil", "--name", "prefix", extra, "--", rev, target)
260		},
261	},
262}
263
264func (r *vcsRepo) loadTags(ctx context.Context) {
265	out, err := Run(ctx, r.dir, r.cmd.tags(r.remote))
266	if err != nil {
267		return
268	}
269
270	// Run tag-listing command and extract tags.
271	r.tags = make(map[string]bool)
272	for _, tag := range r.cmd.tagRE.FindAllString(string(out), -1) {
273		if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(tag) {
274			continue
275		}
276		r.tags[tag] = true
277	}
278}
279
280func (r *vcsRepo) loadBranches(ctx context.Context) {
281	if r.cmd.branches == nil {
282		return
283	}
284
285	out, err := Run(ctx, r.dir, r.cmd.branches(r.remote))
286	if err != nil {
287		return
288	}
289
290	r.branches = make(map[string]bool)
291	for _, branch := range r.cmd.branchRE.FindAllString(string(out), -1) {
292		if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(branch) {
293			continue
294		}
295		r.branches[branch] = true
296	}
297}
298
299func (r *vcsRepo) CheckReuse(ctx context.Context, old *Origin, subdir string) error {
300	return fmt.Errorf("vcs %s: CheckReuse: %w", r.cmd.vcs, errors.ErrUnsupported)
301}
302
303func (r *vcsRepo) Tags(ctx context.Context, prefix string) (*Tags, error) {
304	unlock, err := r.mu.Lock()
305	if err != nil {
306		return nil, err
307	}
308	defer unlock()
309
310	r.tagsOnce.Do(func() { r.loadTags(ctx) })
311	tags := &Tags{
312		// None of the other VCS provide a reasonable way to compute TagSum
313		// without downloading the whole repo, so we only include VCS and URL
314		// in the Origin.
315		Origin: &Origin{
316			VCS: r.cmd.vcs,
317			URL: r.remote,
318		},
319		List: []Tag{},
320	}
321	for tag := range r.tags {
322		if strings.HasPrefix(tag, prefix) {
323			tags.List = append(tags.List, Tag{tag, ""})
324		}
325	}
326	sort.Slice(tags.List, func(i, j int) bool {
327		return tags.List[i].Name < tags.List[j].Name
328	})
329	return tags, nil
330}
331
332func (r *vcsRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
333	unlock, err := r.mu.Lock()
334	if err != nil {
335		return nil, err
336	}
337	defer unlock()
338
339	if rev == "latest" {
340		rev = r.cmd.latest
341	}
342	r.branchesOnce.Do(func() { r.loadBranches(ctx) })
343	revOK := (r.cmd.badLocalRevRE == nil || !r.cmd.badLocalRevRE.MatchString(rev)) && !r.branches[rev]
344	if revOK {
345		if info, err := r.statLocal(ctx, rev); err == nil {
346			return info, nil
347		}
348	}
349
350	r.fetchOnce.Do(func() { r.fetch(ctx) })
351	if r.fetchErr != nil {
352		return nil, r.fetchErr
353	}
354	info, err := r.statLocal(ctx, rev)
355	if err != nil {
356		return nil, err
357	}
358	if !revOK {
359		info.Version = info.Name
360	}
361	return info, nil
362}
363
364func (r *vcsRepo) fetch(ctx context.Context) {
365	if len(r.cmd.fetch) > 0 {
366		release, err := base.AcquireNet()
367		if err != nil {
368			r.fetchErr = err
369			return
370		}
371		_, r.fetchErr = Run(ctx, r.dir, r.cmd.fetch)
372		release()
373	}
374}
375
376func (r *vcsRepo) statLocal(ctx context.Context, rev string) (*RevInfo, error) {
377	out, err := Run(ctx, r.dir, r.cmd.statLocal(rev, r.remote))
378	if err != nil {
379		return nil, &UnknownRevisionError{Rev: rev}
380	}
381	info, err := r.cmd.parseStat(rev, string(out))
382	if err != nil {
383		return nil, err
384	}
385	if info.Origin == nil {
386		info.Origin = new(Origin)
387	}
388	info.Origin.VCS = r.cmd.vcs
389	info.Origin.URL = r.remote
390	return info, nil
391}
392
393func (r *vcsRepo) Latest(ctx context.Context) (*RevInfo, error) {
394	return r.Stat(ctx, "latest")
395}
396
397func (r *vcsRepo) ReadFile(ctx context.Context, rev, file string, maxSize int64) ([]byte, error) {
398	if rev == "latest" {
399		rev = r.cmd.latest
400	}
401	_, err := r.Stat(ctx, rev) // download rev into local repo
402	if err != nil {
403		return nil, err
404	}
405
406	// r.Stat acquires r.mu, so lock after that.
407	unlock, err := r.mu.Lock()
408	if err != nil {
409		return nil, err
410	}
411	defer unlock()
412
413	out, err := Run(ctx, r.dir, r.cmd.readFile(rev, file, r.remote))
414	if err != nil {
415		return nil, fs.ErrNotExist
416	}
417	return out, nil
418}
419
420func (r *vcsRepo) RecentTag(ctx context.Context, rev, prefix string, allowed func(string) bool) (tag string, err error) {
421	// We don't technically need to lock here since we're returning an error
422	// unconditionally, but doing so anyway will help to avoid baking in
423	// lock-inversion bugs.
424	unlock, err := r.mu.Lock()
425	if err != nil {
426		return "", err
427	}
428	defer unlock()
429
430	return "", vcsErrorf("vcs %s: RecentTag: %w", r.cmd.vcs, errors.ErrUnsupported)
431}
432
433func (r *vcsRepo) DescendsFrom(ctx context.Context, rev, tag string) (bool, error) {
434	unlock, err := r.mu.Lock()
435	if err != nil {
436		return false, err
437	}
438	defer unlock()
439
440	return false, vcsErrorf("vcs %s: DescendsFrom: %w", r.cmd.vcs, errors.ErrUnsupported)
441}
442
443func (r *vcsRepo) ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
444	if r.cmd.readZip == nil && r.cmd.doReadZip == nil {
445		return nil, vcsErrorf("vcs %s: ReadZip: %w", r.cmd.vcs, errors.ErrUnsupported)
446	}
447
448	unlock, err := r.mu.Lock()
449	if err != nil {
450		return nil, err
451	}
452	defer unlock()
453
454	if rev == "latest" {
455		rev = r.cmd.latest
456	}
457	f, err := os.CreateTemp("", "go-readzip-*.zip")
458	if err != nil {
459		return nil, err
460	}
461	if r.cmd.doReadZip != nil {
462		lw := &limitedWriter{
463			W:               f,
464			N:               maxSize,
465			ErrLimitReached: errors.New("ReadZip: encoded file exceeds allowed size"),
466		}
467		err = r.cmd.doReadZip(ctx, lw, r.dir, rev, subdir, r.remote)
468		if err == nil {
469			_, err = f.Seek(0, io.SeekStart)
470		}
471	} else if r.cmd.vcs == "fossil" {
472		// If you run
473		//	fossil zip -R .fossil --name prefix trunk /tmp/x.zip
474		// fossil fails with "unable to create directory /tmp" [sic].
475		// Change the command to run in /tmp instead,
476		// replacing the -R argument with an absolute path.
477		args := r.cmd.readZip(rev, subdir, r.remote, filepath.Base(f.Name()))
478		for i := range args {
479			if args[i] == ".fossil" {
480				args[i] = filepath.Join(r.dir, ".fossil")
481			}
482		}
483		_, err = Run(ctx, filepath.Dir(f.Name()), args)
484	} else {
485		_, err = Run(ctx, r.dir, r.cmd.readZip(rev, subdir, r.remote, f.Name()))
486	}
487	if err != nil {
488		f.Close()
489		os.Remove(f.Name())
490		return nil, err
491	}
492	return &deleteCloser{f}, nil
493}
494
495// deleteCloser is a file that gets deleted on Close.
496type deleteCloser struct {
497	*os.File
498}
499
500func (d *deleteCloser) Close() error {
501	defer os.Remove(d.File.Name())
502	return d.File.Close()
503}
504
505func hgParseStat(rev, out string) (*RevInfo, error) {
506	f := strings.Fields(out)
507	if len(f) < 3 {
508		return nil, vcsErrorf("unexpected response from hg log: %q", out)
509	}
510	hash := f[0]
511	version := rev
512	if strings.HasPrefix(hash, version) {
513		version = hash // extend to full hash
514	}
515	t, err := strconv.ParseInt(f[1], 10, 64)
516	if err != nil {
517		return nil, vcsErrorf("invalid time from hg log: %q", out)
518	}
519
520	var tags []string
521	for _, tag := range f[3:] {
522		if tag != "tip" {
523			tags = append(tags, tag)
524		}
525	}
526	sort.Strings(tags)
527
528	info := &RevInfo{
529		Origin: &Origin{
530			Hash: hash,
531		},
532		Name:    hash,
533		Short:   ShortenSHA1(hash),
534		Time:    time.Unix(t, 0).UTC(),
535		Version: version,
536		Tags:    tags,
537	}
538	return info, nil
539}
540
541func bzrParseStat(rev, out string) (*RevInfo, error) {
542	var revno int64
543	var tm time.Time
544	for _, line := range strings.Split(out, "\n") {
545		if line == "" || line[0] == ' ' || line[0] == '\t' {
546			// End of header, start of commit message.
547			break
548		}
549		if line[0] == '-' {
550			continue
551		}
552		before, after, found := strings.Cut(line, ":")
553		if !found {
554			// End of header, start of commit message.
555			break
556		}
557		key, val := before, strings.TrimSpace(after)
558		switch key {
559		case "revno":
560			if j := strings.Index(val, " "); j >= 0 {
561				val = val[:j]
562			}
563			i, err := strconv.ParseInt(val, 10, 64)
564			if err != nil {
565				return nil, vcsErrorf("unexpected revno from bzr log: %q", line)
566			}
567			revno = i
568		case "timestamp":
569			j := strings.Index(val, " ")
570			if j < 0 {
571				return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line)
572			}
573			t, err := time.Parse("2006-01-02 15:04:05 -0700", val[j+1:])
574			if err != nil {
575				return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line)
576			}
577			tm = t.UTC()
578		}
579	}
580	if revno == 0 || tm.IsZero() {
581		return nil, vcsErrorf("unexpected response from bzr log: %q", out)
582	}
583
584	info := &RevInfo{
585		Name:    strconv.FormatInt(revno, 10),
586		Short:   fmt.Sprintf("%012d", revno),
587		Time:    tm,
588		Version: rev,
589	}
590	return info, nil
591}
592
593func fossilParseStat(rev, out string) (*RevInfo, error) {
594	for _, line := range strings.Split(out, "\n") {
595		if strings.HasPrefix(line, "uuid:") || strings.HasPrefix(line, "hash:") {
596			f := strings.Fields(line)
597			if len(f) != 5 || len(f[1]) != 40 || f[4] != "UTC" {
598				return nil, vcsErrorf("unexpected response from fossil info: %q", line)
599			}
600			t, err := time.Parse(time.DateTime, f[2]+" "+f[3])
601			if err != nil {
602				return nil, vcsErrorf("unexpected response from fossil info: %q", line)
603			}
604			hash := f[1]
605			version := rev
606			if strings.HasPrefix(hash, version) {
607				version = hash // extend to full hash
608			}
609			info := &RevInfo{
610				Origin: &Origin{
611					Hash: hash,
612				},
613				Name:    hash,
614				Short:   ShortenSHA1(hash),
615				Time:    t,
616				Version: version,
617			}
618			return info, nil
619		}
620	}
621	return nil, vcsErrorf("unexpected response from fossil info: %q", out)
622}
623
624type limitedWriter struct {
625	W               io.Writer
626	N               int64
627	ErrLimitReached error
628}
629
630func (l *limitedWriter) Write(p []byte) (n int, err error) {
631	if l.N > 0 {
632		max := len(p)
633		if l.N < int64(max) {
634			max = int(l.N)
635		}
636		n, err = l.W.Write(p[:max])
637		l.N -= int64(n)
638		if err != nil || n >= len(p) {
639			return n, err
640		}
641	}
642
643	return n, l.ErrLimitReached
644}
645