// speculator allows you to preview pull requests to the matrix.org specification. // It serves the following HTTP endpoints: // - / lists open pull requests // - /spec/123 which renders the spec as html at pull request 123. // - /diff/rst/123 which gives a diff of the spec's rst at pull request 123. // - /diff/html/123 which gives a diff of the spec's HTML at pull request 123. // It is currently woefully inefficient, and there is a lot of low hanging fruit for improvement. package main import ( "bytes" "encoding/json" "flag" "fmt" "io" "io/ioutil" "log" "math/rand" "net/http" "net/url" "os" "os/exec" "path" "strconv" "strings" "sync" "syscall" "time" "github.com/hashicorp/golang-lru" ) type PullRequest struct { Number int Base Commit Head Commit Title string User User HTMLURL string `json:"html_url"` } type Commit struct { SHA string Repo RequestRepo } type RequestRepo struct { CloneURL string `json:"clone_url"` } type User struct { Login string HTMLURL string `json:"html_url"` } var ( port = flag.Int("port", 9000, "Port on which to listen for HTTP") includesDir = flag.String("includes_dir", "", "Directory containing include files for styling like matrix.org") allowedMembers map[string]bool specCache *lru.Cache // string -> map[string][]byte filename -> contents styledSpecCache *lru.Cache // string -> map[string][]byte filename -> contents ) func (u *User) IsTrusted() bool { return allowedMembers[u.Login] } const ( pullsPrefix = "https://api.github.com/repos/matrix-org/matrix-doc/pulls" matrixDocCloneURL = "https://github.com/matrix-org/matrix-doc.git" permissionsOwnerFull = 0700 ) func gitClone(url string, shared bool) (string, error) { directory := path.Join("/tmp/matrix-doc", strconv.FormatInt(rand.Int63(), 10)) if err := os.MkdirAll(directory, permissionsOwnerFull); err != nil { return "", fmt.Errorf("error making directory %s: %v", directory, err) } args := []string{"clone", url, directory} if shared { args = append(args, "--shared") } if err := runGitCommand(directory, args); err != nil { return "", err } return directory, nil } func gitCheckout(path, sha string) error { return runGitCommand(path, []string{"checkout", sha}) } func runGitCommand(path string, args []string) error { cmd := exec.Command("git", args...) cmd.Dir = path var b bytes.Buffer cmd.Stderr = &b if err := cmd.Run(); err != nil { return fmt.Errorf("error running %q: %v (stderr: %s)", strings.Join(cmd.Args, " "), err, b.String()) } return nil } func lookupPullRequest(url url.URL, pathPrefix string) (*PullRequest, error) { if !strings.HasPrefix(url.Path, pathPrefix+"/") { return nil, fmt.Errorf("invalid path passed: %s expect %s/123", url.Path, pathPrefix) } prNumber := strings.Split(url.Path[len(pathPrefix)+1:], "/")[0] resp, err := http.Get(fmt.Sprintf("%s/%s", pullsPrefix, prNumber)) defer resp.Body.Close() if err != nil { return nil, fmt.Errorf("error getting pulls: %v", err) } dec := json.NewDecoder(resp.Body) var pr PullRequest if err := dec.Decode(&pr); err != nil { return nil, fmt.Errorf("error decoding pulls: %v", err) } return &pr, nil } func generate(dir string) error { cmd := exec.Command("python", "gendoc.py", "--nodelete") cmd.Dir = path.Join(dir, "scripts") var b bytes.Buffer cmd.Stderr = &b if err := cmd.Run(); err != nil { return fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String()) } return nil } func writeError(w http.ResponseWriter, code int, err error) { w.Header().Set("Content-Type", "text/plain") w.WriteHeader(code) io.WriteString(w, fmt.Sprintf("%v\n", err)) } type server struct { mu sync.Mutex // Must be locked around any git command on matrixDocCloneURL matrixDocCloneURL string } func (s *server) updateBase() error { s.mu.Lock() defer s.mu.Unlock() return runGitCommand(s.matrixDocCloneURL, []string{"fetch"}) } // canCheckout returns whether a given sha can currently be checked out from s.matrixDocCloneURL. func (s *server) canCheckout(sha string) bool { s.mu.Lock() defer s.mu.Unlock() return runGitCommand(s.matrixDocCloneURL, []string{"cat-file", "-e", sha + "^{commit}"}) == nil } // generateAt generates spec from repo at sha. // Returns the path where the generation was done. func (s *server) generateAt(sha string) (dst string, err error) { if !s.canCheckout(sha) { err = s.updateBase() if err != nil { return } } s.mu.Lock() dst, err = gitClone(s.matrixDocCloneURL, true) s.mu.Unlock() if err != nil { return } if err = gitCheckout(dst, sha); err != nil { return } err = generate(dst) return } func (s *server) getSHAOf(ref string) (string, error) { cmd := exec.Command("git", "rev-list", ref, "-n1") cmd.Dir = path.Join(s.matrixDocCloneURL) var b bytes.Buffer cmd.Stdout = &b s.mu.Lock() err := cmd.Run() s.mu.Unlock() if err != nil { return "", fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String()) } return strings.TrimSpace(b.String()), nil } // extractPath extracts the file path within the gen directory which should be served for the request. // Returns one of (file to serve, path to redirect to). // path is the actual path being requested, e.g. "/spec/head/client_server.html". // base is the base path of the handler, including a trailing slash, before the PR number, e.g. "/spec/". func extractPath(path, base string) (string, string) { // Assumes exactly one flat directory // Count slashes in /spec/head/client_server.html // base is /spec/ // +1 for the PR number - /spec/head // +1 for the path-part after the slash after the PR number max := strings.Count(base, "/") + 2 parts := strings.SplitN(path, "/", max) if len(parts) < max { // Path is base/pr - redirect to base/pr/index.html return "", path + "/index.html" } if parts[max-1] == "" { // Path is base/pr/ - serve index.html return "index.html", "" } // Path is base/pr/file.html - serve file return parts[max-1], "" } func (s *server) serveSpec(w http.ResponseWriter, req *http.Request) { var sha string var styleLikeMatrixDotOrg = req.URL.Query().Get("matrixdotorgstyle") != "" if styleLikeMatrixDotOrg && *includesDir == "" { writeError(w, 500, fmt.Errorf("Cannot style like matrix.org - no include dir specified")) return } if strings.HasPrefix(strings.ToLower(req.URL.Path), "/spec/head") { // err may be non-nil here but if headSha is non-empty we will serve a possibly-stale result in favour of erroring. // This is to deal with cases like where github is down but we still want to serve the spec. if headSha, err := s.lookupHeadSHA(); headSha == "" { writeError(w, 500, err) return } else { sha = headSha } } else { pr, err := lookupPullRequest(*req.URL, "/spec") if err != nil { writeError(w, 400, err) return } // We're going to run whatever Python is specified in the pull request, which // may do bad things, so only trust people we trust. if err := checkAuth(pr); err != nil { writeError(w, 403, err) return } sha = pr.Head.SHA } var cache = specCache if styleLikeMatrixDotOrg { cache = styledSpecCache } var pathToContent map[string][]byte if cached, ok := cache.Get(sha); ok { pathToContent = cached.(map[string][]byte) } else { dst, err := s.generateAt(sha) defer os.RemoveAll(dst) if err != nil { writeError(w, 500, err) return } if styleLikeMatrixDotOrg { cmd := exec.Command("./add-matrix-org-stylings.sh", *includesDir) cmd.Dir = path.Join(dst, "scripts") var b bytes.Buffer cmd.Stderr = &b if err := cmd.Run(); err != nil { writeError(w, 500, fmt.Errorf("error styling spec: %v\nOutput:\n%v", err, b.String())) return } } fis, err := ioutil.ReadDir(path.Join(dst, "scripts", "gen")) if err != nil { writeError(w, 500, fmt.Errorf("Error reading directory: %v", err)) } pathToContent = make(map[string][]byte) for _, fi := range fis { b, err := ioutil.ReadFile(path.Join(dst, "scripts", "gen", fi.Name())) if err != nil { writeError(w, 500, fmt.Errorf("Error reading spec: %v", err)) return } pathToContent[fi.Name()] = b } cache.Add(sha, pathToContent) } requestedPath, redirect := extractPath(req.URL.Path, "/spec/") if redirect != "" { s.redirectTo(w, req, redirect) return } if b, ok := pathToContent[requestedPath]; ok { w.Write(b) return } if requestedPath == "index.html" { // Fall back to single-page spec for old PRs if b, ok := pathToContent["specification.html"]; ok { w.Write(b) return } } w.WriteHeader(404) w.Write([]byte("Not found")) } func (s *server) redirectTo(w http.ResponseWriter, req *http.Request, path string) { req.URL.Path = path w.Header().Set("Location", req.URL.String()) w.WriteHeader(302) } // lookupHeadSHA looks up what origin/master's HEAD SHA is. // It attempts to `git fetch` before doing so. // If this fails, it may still return a stale sha, but will also return an error. func (s *server) lookupHeadSHA() (sha string, retErr error) { retErr = s.updateBase() if retErr != nil { log.Printf("Error fetching: %v, attempting to fall back to current known value", retErr) } originHead, err := s.getSHAOf("origin/master") if err != nil { retErr = err } sha = originHead if retErr != nil && originHead != "" { log.Printf("Successfully fell back to possibly stale sha: %s", sha) } return } func checkAuth(pr *PullRequest) error { if !pr.User.IsTrusted() { return fmt.Errorf("%q is not a trusted pull requester", pr.User.Login) } return nil } func (s *server) serveRSTDiff(w http.ResponseWriter, req *http.Request) { pr, err := lookupPullRequest(*req.URL, "/diff/rst") if err != nil { writeError(w, 400, err) return } // We're going to run whatever Python is specified in the pull request, which // may do bad things, so only trust people we trust. if err := checkAuth(pr); err != nil { writeError(w, 403, err) return } base, err := s.generateAt(pr.Base.SHA) defer os.RemoveAll(base) if err != nil { writeError(w, 500, err) return } head, err := s.generateAt(pr.Head.SHA) defer os.RemoveAll(head) if err != nil { writeError(w, 500, err) return } diffCmd := exec.Command("diff", "-r", "-u", path.Join(base, "scripts", "tmp"), path.Join(head, "scripts", "tmp")) var diff bytes.Buffer diffCmd.Stdout = &diff if err := ignoreExitCodeOne(diffCmd.Run()); err != nil { writeError(w, 500, fmt.Errorf("error running diff: %v", err)) return } w.Write(diff.Bytes()) } func (s *server) serveHTMLDiff(w http.ResponseWriter, req *http.Request) { pr, err := lookupPullRequest(*req.URL, "/diff/html") if err != nil { writeError(w, 400, err) return } // We're going to run whatever Python is specified in the pull request, which // may do bad things, so only trust people we trust. if err := checkAuth(pr); err != nil { writeError(w, 403, err) return } base, err := s.generateAt(pr.Base.SHA) defer os.RemoveAll(base) if err != nil { writeError(w, 500, err) return } head, err := s.generateAt(pr.Head.SHA) defer os.RemoveAll(head) if err != nil { writeError(w, 500, err) return } htmlDiffer, err := findHTMLDiffer() if err != nil { writeError(w, 500, fmt.Errorf("could not find HTML differ")) return } requestedPath, redirect := extractPath(req.URL.Path, "/diff/spec/") if redirect != "" { s.redirectTo(w, req, redirect) return } cmd := exec.Command(htmlDiffer, path.Join(base, "scripts", "gen", requestedPath), path.Join(head, "scripts", "gen", requestedPath)) var b bytes.Buffer cmd.Stdout = &b if err := cmd.Run(); err != nil { writeError(w, 500, fmt.Errorf("error running HTML differ: %v", err)) return } w.Write(b.Bytes()) } func findHTMLDiffer() (string, error) { wd, err := os.Getwd() if err != nil { return "", err } differ := path.Join(wd, "htmldiff.pl") if _, err := os.Stat(differ); err == nil { return differ, nil } return "", fmt.Errorf("unable to find htmldiff.pl") } func listPulls(w http.ResponseWriter, req *http.Request) { resp, err := http.Get(pullsPrefix) if err != nil { writeError(w, 500, err) return } defer resp.Body.Close() dec := json.NewDecoder(resp.Body) var pulls []PullRequest if err := dec.Decode(&pulls); err != nil { writeError(w, 500, err) return } if len(pulls) == 0 { io.WriteString(w, "No pull requests found") return } s := "
` if *includesDir != "" { s += `