1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package dirhash defines hashes over directory trees.
6// These hashes are recorded in go.sum files and in the Go checksum database,
7// to allow verifying that a newly-downloaded module has the expected content.
8package dirhash
9
10import (
11	"archive/zip"
12	"crypto/sha256"
13	"encoding/base64"
14	"errors"
15	"fmt"
16	"io"
17	"os"
18	"path/filepath"
19	"sort"
20	"strings"
21)
22
23// DefaultHash is the default hash function used in new go.sum entries.
24var DefaultHash Hash = Hash1
25
26// A Hash is a directory hash function.
27// It accepts a list of files along with a function that opens the content of each file.
28// It opens, reads, hashes, and closes each file and returns the overall directory hash.
29type Hash func(files []string, open func(string) (io.ReadCloser, error)) (string, error)
30
31// Hash1 is the "h1:" directory hash function, using SHA-256.
32//
33// Hash1 is "h1:" followed by the base64-encoded SHA-256 hash of a summary
34// prepared as if by the Unix command:
35//
36//	sha256sum $(find . -type f | sort) | sha256sum
37//
38// More precisely, the hashed summary contains a single line for each file in the list,
39// ordered by sort.Strings applied to the file names, where each line consists of
40// the hexadecimal SHA-256 hash of the file content,
41// two spaces (U+0020), the file name, and a newline (U+000A).
42//
43// File names with newlines (U+000A) are disallowed.
44func Hash1(files []string, open func(string) (io.ReadCloser, error)) (string, error) {
45	h := sha256.New()
46	files = append([]string(nil), files...)
47	sort.Strings(files)
48	for _, file := range files {
49		if strings.Contains(file, "\n") {
50			return "", errors.New("dirhash: filenames with newlines are not supported")
51		}
52		r, err := open(file)
53		if err != nil {
54			return "", err
55		}
56		hf := sha256.New()
57		_, err = io.Copy(hf, r)
58		r.Close()
59		if err != nil {
60			return "", err
61		}
62		fmt.Fprintf(h, "%x  %s\n", hf.Sum(nil), file)
63	}
64	return "h1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)), nil
65}
66
67// HashDir returns the hash of the local file system directory dir,
68// replacing the directory name itself with prefix in the file names
69// used in the hash function.
70func HashDir(dir, prefix string, hash Hash) (string, error) {
71	files, err := DirFiles(dir, prefix)
72	if err != nil {
73		return "", err
74	}
75	osOpen := func(name string) (io.ReadCloser, error) {
76		return os.Open(filepath.Join(dir, strings.TrimPrefix(name, prefix)))
77	}
78	return hash(files, osOpen)
79}
80
81// DirFiles returns the list of files in the tree rooted at dir,
82// replacing the directory name dir with prefix in each name.
83// The resulting names always use forward slashes.
84func DirFiles(dir, prefix string) ([]string, error) {
85	var files []string
86	dir = filepath.Clean(dir)
87	err := filepath.Walk(dir, func(file string, info os.FileInfo, err error) error {
88		if err != nil {
89			return err
90		}
91		if info.IsDir() {
92			return nil
93		} else if file == dir {
94			return fmt.Errorf("%s is not a directory", dir)
95		}
96
97		rel := file
98		if dir != "." {
99			rel = file[len(dir)+1:]
100		}
101		f := filepath.Join(prefix, rel)
102		files = append(files, filepath.ToSlash(f))
103		return nil
104	})
105	if err != nil {
106		return nil, err
107	}
108	return files, nil
109}
110
111// HashZip returns the hash of the file content in the named zip file.
112// Only the file names and their contents are included in the hash:
113// the exact zip file format encoding, compression method,
114// per-file modification times, and other metadata are ignored.
115func HashZip(zipfile string, hash Hash) (string, error) {
116	z, err := zip.OpenReader(zipfile)
117	if err != nil {
118		return "", err
119	}
120	defer z.Close()
121	var files []string
122	zfiles := make(map[string]*zip.File)
123	for _, file := range z.File {
124		files = append(files, file.Name)
125		zfiles[file.Name] = file
126	}
127	zipOpen := func(name string) (io.ReadCloser, error) {
128		f := zfiles[name]
129		if f == nil {
130			return nil, fmt.Errorf("file %q not found in zip", name) // should never happen
131		}
132		return f.Open()
133	}
134	return hash(files, zipOpen)
135}
136