from __future__ import annotations # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import enum import itertools import json import logging import re from collections import defaultdict from dataclasses import dataclass from functools import lru_cache from pathlib import Path from devscripts.utils import read_file, run_process, write_file BASE_URL = 'https://github.com' LOCATION_PATH = Path(__file__).parent HASH_LENGTH = 7 logger = logging.getLogger(__name__) class CommitGroup(enum.Enum): PRIORITY = 'Important' CORE = 'Core' EXTRACTOR = 'Extractor' DOWNLOADER = 'Downloader' POSTPROCESSOR = 'Postprocessor' MISC = 'Misc.' @classmethod @property def ignorable_prefixes(cls): return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream') @classmethod @lru_cache def commit_lookup(cls): return { name: group for group, names in { cls.PRIORITY: {'priority'}, cls.CORE: { 'aes', 'cache', 'compat_utils', 'compat', 'cookies', 'core', 'dependencies', 'jsinterp', 'outtmpl', 'plugins', 'update', 'upstream', 'utils', }, cls.MISC: { 'build', 'cleanup', 'devscripts', 'docs', 'misc', 'test', }, cls.EXTRACTOR: {'extractor'}, cls.DOWNLOADER: {'downloader'}, cls.POSTPROCESSOR: {'postprocessor'}, }.items() for name in names } @classmethod def get(cls, value): result = cls.commit_lookup().get(value) if result: logger.debug(f'Mapped {value!r} => {result.name}') return result @dataclass class Commit: hash: str | None short: str authors: list[str] def __str__(self): result = f'{self.short!r}' if self.hash: result += f' ({self.hash[:HASH_LENGTH]})' if self.authors: authors = ', '.join(self.authors) result += f' by {authors}' return result @dataclass class CommitInfo: details: str | None sub_details: tuple[str, ...] message: str issues: list[str] commit: Commit fixes: list[Commit] def key(self): return ((self.details or '').lower(), self.sub_details, self.message) def unique(items): return sorted({item.strip().lower(): item for item in items if item}.values()) class Changelog: MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) ALWAYS_SHOWN = (CommitGroup.PRIORITY,) def __init__(self, groups, repo, collapsible=False): self._groups = groups self._repo = repo self._collapsible = collapsible def __str__(self): return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') def _format_groups(self, groups): first = True for item in CommitGroup: if self._collapsible and item not in self.ALWAYS_SHOWN and first: first = False yield '\n

Changelog

\n' group = groups[item] if group: yield self.format_module(item.value, group) if self._collapsible: yield '\n
' def format_module(self, name, group): result = f'\n#### {name} changes\n' if name else '\n' return result + '\n'.join(self._format_group(group)) def _format_group(self, group): sorted_group = sorted(group, key=CommitInfo.key) detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) for _, items in detail_groups: items = list(items) details = items[0].details if details == 'cleanup': items = self._prepare_cleanup_misc_items(items) prefix = '-' if details: if len(items) == 1: prefix = f'- **{details}**:' else: yield f'- **{details}**' prefix = '\t-' sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) for sub_details, entries in sub_detail_groups: if not sub_details: for entry in entries: yield f'{prefix} {self.format_single_change(entry)}' continue entries = list(entries) sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}' if len(entries) == 1: yield f'{sub_prefix}: {self.format_single_change(entries[0])}' continue yield sub_prefix for entry in entries: yield f'\t{prefix} {self.format_single_change(entry)}' def _prepare_cleanup_misc_items(self, items): cleanup_misc_items = defaultdict(list) sorted_items = [] for item in items: if self.MISC_RE.search(item.message): cleanup_misc_items[tuple(item.commit.authors)].append(item) else: sorted_items.append(item) for commit_infos in cleanup_misc_items.values(): sorted_items.append(CommitInfo( 'cleanup', ('Miscellaneous',), ', '.join( self._format_message_link(None, info.commit.hash).strip() for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')), [], Commit(None, '', commit_infos[0].commit.authors), [])) return sorted_items def format_single_change(self, info): message = self._format_message_link(info.message, info.commit.hash) if info.issues: message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1) if info.commit.authors: message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1) if info.fixes: fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) if authors != info.commit.authors: fix_message = f'{fix_message} by {self._format_authors(authors)}' message = message.replace('\n', f' (With fixes in {fix_message})\n', 1) return message[:-1] def _format_message_link(self, message, hash): assert message or hash, 'Improperly defined commit message or override' message = message if message else hash[:HASH_LENGTH] if not hash: return f'{message}\n' return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1) def _format_issues(self, issues): return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) @staticmethod def _format_authors(authors): return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) @property def repo_url(self): return f'{BASE_URL}/{self._repo}' class CommitRange: COMMAND = 'git' COMMIT_SEPARATOR = '-----' AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) MESSAGE_RE = re.compile(r''' (?:\[(?P[^\]]+)\]\ )? (?:(?P`?[^:`]+`?): )? (?P.+?) (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? ''', re.VERBOSE | re.DOTALL) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') def __init__(self, start, end, default_author=None): self._start, self._end = start, end self._commits, self._fixes = self._get_commits_and_fixes(default_author) self._commits_added = [] def __iter__(self): return iter(itertools.chain(self._commits.values(), self._commits_added)) def __len__(self): return len(self._commits) + len(self._commits_added) def __contains__(self, commit): if isinstance(commit, Commit): if not commit.hash: return False commit = commit.hash return commit in self._commits def _get_commits_and_fixes(self, default_author): result = run_process( self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', f'{self._start}..{self._end}' if self._start else self._end).stdout commits = {} fixes = defaultdict(list) lines = iter(result.splitlines(False)) for i, commit_hash in enumerate(lines): short = next(lines) skip = short.startswith('Release ') or short == '[version] update' authors = [default_author] if default_author else [] for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): match = self.AUTHOR_INDICATOR_RE.match(line) if match: authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) commit = Commit(commit_hash, short, authors) if skip and (self._start or not i): logger.debug(f'Skipped commit: {commit}') continue elif skip: logger.debug(f'Reached Release commit, breaking: {commit}') break fix_match = self.FIXES_RE.search(commit.short) if fix_match: commitish = fix_match.group(1) fixes[commitish].append(commit) commits[commit.hash] = commit for commitish, fix_commits in fixes.items(): if commitish in commits: hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') for fix_commit in fix_commits: del commits[fix_commit.hash] else: logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') return commits, fixes def apply_overrides(self, overrides): for override in overrides: when = override.get('when') if when and when not in self and when != self._start: logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') continue override_hash = override.get('hash') or when if override['action'] == 'add': commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) logger.info(f'ADD {commit}') self._commits_added.append(commit) elif override['action'] == 'remove': if override_hash in self._commits: logger.info(f'REMOVE {self._commits[override_hash]}') del self._commits[override_hash] elif override['action'] == 'change': if override_hash not in self._commits: continue commit = Commit(override_hash, override['short'], override.get('authors') or []) logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') self._commits[commit.hash] = commit self._commits = {key: value for key, value in reversed(self._commits.items())} def groups(self): group_dict = defaultdict(list) for commit in self: upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) if upstream_re: commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}' match = self.MESSAGE_RE.fullmatch(commit.short) if not match: logger.error(f'Error parsing short commit message: {commit.short!r}') continue prefix, sub_details_alt, message, issues = match.groups() issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] if prefix: groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(','))) group = next(iter(filter(None, groups)), None) details = ', '.join(unique(details)) sub_details = list(itertools.chain.from_iterable(sub_details)) else: group = CommitGroup.CORE details = None sub_details = [] if sub_details_alt: sub_details.append(sub_details_alt) sub_details = tuple(unique(sub_details)) if not group: if self.EXTRACTOR_INDICATOR_RE.search(commit.short): group = CommitGroup.EXTRACTOR else: group = CommitGroup.POSTPROCESSOR logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}') commit_info = CommitInfo( details, sub_details, message.strip(), issues, commit, self._fixes[commit.hash]) logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') group_dict[group].append(commit_info) return group_dict @staticmethod def details_from_prefix(prefix): if not prefix: return CommitGroup.CORE, None, () prefix, _, details = prefix.partition('/') prefix = prefix.strip() details = details.strip() group = CommitGroup.get(prefix.lower()) if group is CommitGroup.PRIORITY: prefix, _, details = details.partition('/') if not details and prefix and prefix not in CommitGroup.ignorable_prefixes: logger.debug(f'Replaced details with {prefix!r}') details = prefix or None if details == 'common': details = None if details: details, *sub_details = details.split(':') else: sub_details = [] return group, details, sub_details def get_new_contributors(contributors_path, commits): contributors = set() if contributors_path.exists(): for line in read_file(contributors_path).splitlines(): author, _, _ = line.strip().partition(' (') authors = author.split('/') contributors.update(map(str.casefold, authors)) new_contributors = set() for commit in commits: for author in commit.authors: author_folded = author.casefold() if author_folded not in contributors: contributors.add(author_folded) new_contributors.add(author) return sorted(new_contributors, key=str.casefold) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description='Create a changelog markdown from a git commit range') parser.add_argument( 'commitish', default='HEAD', nargs='?', help='The commitish to create the range from (default: %(default)s)') parser.add_argument( '-v', '--verbosity', action='count', default=0, help='increase verbosity (can be used twice)') parser.add_argument( '-c', '--contributors', action='store_true', help='update CONTRIBUTORS file (default: %(default)s)') parser.add_argument( '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', help='path to the CONTRIBUTORS file') parser.add_argument( '--no-override', action='store_true', help='skip override json in commit generation (default: %(default)s)') parser.add_argument( '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', help='path to the changelog_override.json file') parser.add_argument( '--default-author', default='pukkandan', help='the author to use without a author indicator (default: %(default)s)') parser.add_argument( '--repo', default='yt-dlp/yt-dlp', help='the github repository to use for the operations (default: %(default)s)') parser.add_argument( '--collapsible', action='store_true', help='make changelog collapsible (default: %(default)s)') args = parser.parse_args() logging.basicConfig( datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) commits = CommitRange(None, args.commitish, args.default_author) if not args.no_override: if args.override_path.exists(): overrides = json.loads(read_file(args.override_path)) commits.apply_overrides(overrides) else: logger.warning(f'File {args.override_path.as_posix()} does not exist') logger.info(f'Loaded {len(commits)} commits') new_contributors = get_new_contributors(args.contributors_path, commits) if new_contributors: if args.contributors: write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') logger.info(f'New contributors: {", ".join(new_contributors)}') print(Changelog(commits.groups(), args.repo, args.collapsible))