// speculator allows you to preview pull requests to the matrix.org specification. // It serves the following HTTP endpoints: // - / lists open pull requests // - /spec/123 which renders the spec as html at pull request 123. // - /diff/rst/123 which gives a diff of the spec's rst at pull request 123. // - /diff/html/123 which gives a diff of the spec's HTML at pull request 123. // It is currently woefully inefficient, and there is a lot of low hanging fruit for improvement. package main import ( "bytes" "encoding/json" "flag" "fmt" "io" "io/ioutil" "log" "math/rand" "net/http" "net/url" "os" "os/exec" "path" "path/filepath" "regexp" "strconv" "strings" "sync" "syscall" "text/template" "time" "github.com/hashicorp/golang-lru" ) type PullRequest struct { Number int Base Commit Head Commit Title string User User HTMLURL string `json:"html_url"` } type Commit struct { SHA string Repo RequestRepo } type RequestRepo struct { CloneURL string `json:"clone_url"` } type User struct { Login string HTMLURL string `json:"html_url"` } var ( port = flag.Int("port", 9000, "Port on which to listen for HTTP") includesDir = flag.String("includes_dir", "", "Directory containing include files for styling like matrix.org") accessToken = flag.String("access_token", "", "github.com access token") allowedMembers map[string]bool specCache *lru.Cache // string -> map[string][]byte filename -> contents styledSpecCache *lru.Cache // string -> map[string][]byte filename -> contents ) func (u *User) IsTrusted() bool { return allowedMembers[u.Login] } const ( pullsPrefix = "https://api.github.com/repos/matrix-org/matrix-doc/pulls" matrixDocCloneURL = "https://github.com/matrix-org/matrix-doc.git" permissionsOwnerFull = 0700 ) var numericRegex = regexp.MustCompile(`^\d+$`) func accessTokenQuerystring() string { if *accessToken == "" { return "" } return fmt.Sprintf("?access_token=%s", *accessToken) } func gitClone(url string, directory string, shared bool) error { args := []string{"clone", url, directory} if shared { args = append(args, "--shared") } if err := runGitCommand(directory, args); err != nil { return err } return nil } func gitCheckout(path, sha string) error { return runGitCommand(path, []string{"checkout", sha}) } func runGitCommand(path string, args []string) error { cmd := exec.Command("git", args...) cmd.Dir = path var b bytes.Buffer cmd.Stderr = &b if err := cmd.Run(); err != nil { return fmt.Errorf("error running %q: %v (stderr: %s)", strings.Join(cmd.Args, " "), err, b.String()) } return nil } func lookupPullRequest(prNumber string) (*PullRequest, error) { resp, err := http.Get(fmt.Sprintf("%s/%s%s", pullsPrefix, prNumber, accessTokenQuerystring())) defer resp.Body.Close() if err != nil { return nil, fmt.Errorf("error getting pulls: %v", err) } if resp.StatusCode != 200 { body, _ := ioutil.ReadAll(resp.Body) return nil, fmt.Errorf("error getting pull request %s: %v", prNumber, string(body)) } dec := json.NewDecoder(resp.Body) var pr PullRequest if err := dec.Decode(&pr); err != nil { return nil, fmt.Errorf("error decoding pulls: %v", err) } return &pr, nil } func (s *server) lookupBranch(branch string) (string, error) { err := s.updateBase() if err != nil { log.Printf("Error fetching: %v, will use cached branches") } if strings.ToLower(branch) == "head" { branch = "master" } branch = "origin/" + branch sha, err := s.getSHAOf(branch) if err != nil { return "", fmt.Errorf("error getting branch %s: %v", branch, err) } if sha == "" { return "", fmt.Errorf("Unable to get sha for %s", branch) } return sha, nil } func generate(dir string) error { cmd := exec.Command("python", "gendoc.py", "--nodelete") cmd.Dir = path.Join(dir, "scripts") var b bytes.Buffer cmd.Stderr = &b if err := cmd.Run(); err != nil { return fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String()) } // cheekily dump the swagger docs into the gen directory so they can be // served by serveSpec cmd = exec.Command("python", "dump-swagger.py", "-o", "gen/api-docs.json") cmd.Dir = path.Join(dir, "scripts") cmd.Stderr = &b if err := cmd.Run(); err != nil { return fmt.Errorf("error generating api docs: %v\nOutput from dump-swagger:\n%v", err, b.String()) } return nil } func writeError(w http.ResponseWriter, code int, err error) { w.Header().Set("Content-Type", "text/plain") w.WriteHeader(code) io.WriteString(w, fmt.Sprintf("%v\n", err)) } type server struct { mu sync.Mutex // Must be locked around any git command on matrixDocCloneURL matrixDocCloneURL string } func (s *server) updateBase() error { s.mu.Lock() defer s.mu.Unlock() return runGitCommand(s.matrixDocCloneURL, []string{"fetch"}) } // canCheckout returns whether a given sha can currently be checked out from s.matrixDocCloneURL. func (s *server) canCheckout(sha string) bool { s.mu.Lock() defer s.mu.Unlock() return runGitCommand(s.matrixDocCloneURL, []string{"cat-file", "-e", sha + "^{commit}"}) == nil } // generateAt generates spec from repo at sha. // Returns the path where the generation was done. func (s *server) generateAt(sha string) (dst string, err error) { if !s.canCheckout(sha) { err = s.updateBase() if err != nil { return } } dst, err = makeTempDir() if err != nil { return } log.Printf("Generating %s in %s\n", sha, dst) s.mu.Lock() err = gitClone(s.matrixDocCloneURL, dst, true) s.mu.Unlock() if err != nil { return } if err = gitCheckout(dst, sha); err != nil { return } err = generate(dst) return } func (s *server) getSHAOf(ref string) (string, error) { cmd := exec.Command("git", "rev-list", ref, "-n1") cmd.Dir = path.Join(s.matrixDocCloneURL) var b bytes.Buffer cmd.Stdout = &b s.mu.Lock() err := cmd.Run() s.mu.Unlock() if err != nil { return "", fmt.Errorf("error generating spec: %v\nOutput from git:\n%v", err, b.String()) } return strings.TrimSpace(b.String()), nil } // extractPRNumber checks that the path begins with the given base, and returns // the following component. func extractPRNumber(path, base string) (string, error) { if !strings.HasPrefix(path, base+"/") { return "", fmt.Errorf("invalid path passed: %q expect %s/123", path, base) } return strings.Split(path[len(base)+1:], "/")[0], nil } // extractPath extracts the file path within the gen directory which should be served for the request. // Returns one of (file to serve, path to redirect to). // path is the actual path being requested, e.g. "/spec/head/client_server.html". // base is the base path of the handler, including a trailing slash, before the PR number, e.g. "/spec/". func extractPath(path, base string) (string, string) { // Assumes exactly one flat directory // Count slashes in /spec/head/client_server.html // base is /spec/ // +1 for the PR number - /spec/head // +1 for the path-part after the slash after the PR number max := strings.Count(base, "/") + 2 parts := strings.SplitN(path, "/", max) if len(parts) < max { // Path is base/pr - redirect to base/pr/index.html return "", path + "/index.html" } if parts[max-1] == "" { // Path is base/pr/ - serve index.html return "index.html", "" } // Path is base/pr/file.html - serve file return parts[max-1], "" } func (s *server) serveSpec(w http.ResponseWriter, req *http.Request) { var sha string var styleLikeMatrixDotOrg = req.URL.Query().Get("matrixdotorgstyle") != "" if styleLikeMatrixDotOrg && *includesDir == "" { writeError(w, 500, fmt.Errorf("Cannot style like matrix.org - no include dir specified")) return } // we use URL.EscapedPath() to get hold of the %-encoded version of the // path, so that we can handle branch names with slashes in. urlPath := req.URL.EscapedPath() if urlPath == "/spec" { // special treatment for /spec - redirect to /spec/HEAD/ s.redirectTo(w, req, "/spec/HEAD/") return } if !strings.HasPrefix(urlPath, "/spec/") { writeError(w, 500, fmt.Errorf("invalid path passed: %q expect /spec/...", urlPath)) } splits := strings.SplitN(urlPath[6:], "/", 2) if len(splits) == 1 { // "/spec/foo" - redirect to "/spec/foo/" (so that relative links from the index work) if splits[0] == "" { s.redirectTo(w, req, "/spec/HEAD/") } else { s.redirectTo(w, req, urlPath+"/") } return } // now we have: // splits[0] is a PR#, or a branch name // splits[1] is the file to serve branchName, _ := url.QueryUnescape(splits[0]) requestedPath, _ := url.QueryUnescape(splits[1]) if requestedPath == "" { requestedPath = "index.html" } if numericRegex.MatchString(branchName) { // PR number pr, err := lookupPullRequest(branchName) if err != nil { writeError(w, 400, err) return } // We're going to run whatever Python is specified in the pull request, which // may do bad things, so only trust people we trust. if err := checkAuth(pr); err != nil { writeError(w, 403, err) return } sha = pr.Head.SHA log.Printf("Serving pr %s (%s)\n", branchName, sha) } else if strings.ToLower(branchName) == "head" || branchName == "master" || strings.HasPrefix(branchName, "drafts/") { branchSHA, err := s.lookupBranch(branchName) if err != nil { writeError(w, 400, err) return } sha = branchSHA log.Printf("Serving branch %s (%s)\n", branchName, sha) } else { writeError(w, 404, fmt.Errorf("invalid branch name")) return } var cache = specCache if styleLikeMatrixDotOrg { cache = styledSpecCache } var pathToContent map[string][]byte if cached, ok := cache.Get(sha); ok { pathToContent = cached.(map[string][]byte) } else { dst, err := s.generateAt(sha) defer os.RemoveAll(dst) if err != nil { writeError(w, 500, err) return } pathToContent = make(map[string][]byte) scriptsdir := path.Join(dst, "scripts") base := path.Join(scriptsdir, "gen") walker := func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } rel, err := filepath.Rel(base, path) if err != nil { return fmt.Errorf("Failed to get relative path of %s: %v", path, err) } if styleLikeMatrixDotOrg { cmd := exec.Command("./add-matrix-org-stylings.pl", *includesDir, path) cmd.Dir = scriptsdir var b bytes.Buffer cmd.Stderr = &b if err := cmd.Run(); err != nil { return fmt.Errorf("error styling spec: %v\nOutput:\n%v", err, b.String()) } } bytes, err := ioutil.ReadFile(path) if err != nil { return fmt.Errorf("Error reading spec: %v", err) } pathToContent[rel] = bytes return nil } err = filepath.Walk(base, walker) if err != nil { writeError(w, 500, err) return } cache.Add(sha, pathToContent) } if requestedPath == "api-docs.json" { // allow other swagger UIs access to our swagger w.Header().Set("Access-Control-Allow-Origin", "*") } if b, ok := pathToContent[requestedPath]; ok { w.Write(b) return } if requestedPath == "index.html" { // Fall back to single-page spec for old PRs if b, ok := pathToContent["specification.html"]; ok { w.Write(b) return } } w.WriteHeader(404) w.Write([]byte("Not found")) } func (s *server) redirectTo(w http.ResponseWriter, req *http.Request, path string) { u := *req.URL u.Scheme = "http" u.Host = req.Host u.Path = path w.Header().Set("Location", u.String()) w.WriteHeader(302) } func checkAuth(pr *PullRequest) error { if !pr.User.IsTrusted() { return fmt.Errorf("%q is not a trusted pull requester", pr.User.Login) } return nil } func (s *server) serveRSTDiff(w http.ResponseWriter, req *http.Request) { prNumber, err := extractPRNumber(req.URL.Path, "/diff/rst") if err != nil { writeError(w, 400, err) return } pr, err := lookupPullRequest(prNumber) if err != nil { writeError(w, 400, err) return } // We're going to run whatever Python is specified in the pull request, which // may do bad things, so only trust people we trust. if err := checkAuth(pr); err != nil { writeError(w, 403, err) return } base, err := s.generateAt(pr.Base.SHA) defer os.RemoveAll(base) if err != nil { writeError(w, 500, err) return } head, err := s.generateAt(pr.Head.SHA) defer os.RemoveAll(head) if err != nil { writeError(w, 500, err) return } diffCmd := exec.Command("diff", "-r", "-u", path.Join(base, "scripts", "tmp"), path.Join(head, "scripts", "tmp")) var diff bytes.Buffer diffCmd.Stdout = &diff if err := ignoreExitCodeOne(diffCmd.Run()); err != nil { writeError(w, 500, fmt.Errorf("error running diff: %v", err)) return } w.Write(diff.Bytes()) } func (s *server) serveHTMLDiff(w http.ResponseWriter, req *http.Request) { prNumber, err := extractPRNumber(req.URL.Path, "/diff/html") if err != nil { writeError(w, 400, err) return } pr, err := lookupPullRequest(prNumber) if err != nil { writeError(w, 400, err) return } // We're going to run whatever Python is specified in the pull request, which // may do bad things, so only trust people we trust. if err := checkAuth(pr); err != nil { writeError(w, 403, err) return } base, err := s.generateAt(pr.Base.SHA) defer os.RemoveAll(base) if err != nil { writeError(w, 500, err) return } head, err := s.generateAt(pr.Head.SHA) defer os.RemoveAll(head) if err != nil { writeError(w, 500, err) return } htmlDiffer, err := findHTMLDiffer() if err != nil { writeError(w, 500, fmt.Errorf("could not find HTML differ")) return } requestedPath, redirect := extractPath(req.URL.Path, "/diff/spec/") if redirect != "" { s.redirectTo(w, req, redirect) return } cmd := exec.Command(htmlDiffer, path.Join(base, "scripts", "gen", requestedPath), path.Join(head, "scripts", "gen", requestedPath)) var stdout bytes.Buffer var stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { writeError(w, 500, fmt.Errorf("error running HTML differ: %v\nOutput:\n%v", err, stderr.String())) return } w.Write(stdout.Bytes()) } func findHTMLDiffer() (string, error) { wd, err := os.Getwd() if err != nil { return "", err } differ := path.Join(wd, "htmldiff.pl") if _, err := os.Stat(differ); err == nil { return differ, nil } return "", fmt.Errorf("unable to find htmldiff.pl") } func getPulls() ([]PullRequest, error) { resp, err := http.Get(fmt.Sprintf("%s%s", pullsPrefix, accessTokenQuerystring())) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != 200 { body, _ := ioutil.ReadAll(resp.Body) return nil, fmt.Errorf("error getting pull requests: %v", string(body)) } dec := json.NewDecoder(resp.Body) var pulls []PullRequest err = dec.Decode(&pulls) return pulls, err } // getBranches returns a list of the upstream branch names. // It attempts to `git fetch` before doing so. func (s *server) getBranches() ([]string, error) { err := s.updateBase() if err != nil { log.Printf("Error fetching: %v, will use cached branches") } cmd := exec.Command("git", "branch", "-r") cmd.Dir = path.Join(s.matrixDocCloneURL) var b bytes.Buffer cmd.Stdout = &b s.mu.Lock() err = cmd.Run() s.mu.Unlock() if err != nil { return nil, fmt.Errorf("Error reading branch names: %v. Output from git:\n%v", err, b.String()) } branches := []string{} for _, b := range strings.Split(b.String(), "\n") { b = strings.TrimSpace(b) if strings.HasPrefix(b, "origin/") { branches = append(branches, b[7:]) } } return branches, nil } func (srv *server) makeIndex(w http.ResponseWriter, req *http.Request) { pulls, err := getPulls() if err != nil { writeError(w, 500, err) return } branches, err := srv.getBranches() if err != nil { writeError(w, 500, err) return } // write our stuff into a buffer so that we can change our minds // and write a 500 if it all goes wrong. var b bytes.Buffer b.Write([]byte(`