xref: /aosp_15_r20/external/mbedtls/scripts/assemble_changelog.py (revision 62c56f9862f102b96d72393aff6076c951fb8148)
1*62c56f98SSadaf Ebrahimi#!/usr/bin/env python3
2*62c56f98SSadaf Ebrahimi
3*62c56f98SSadaf Ebrahimi"""Assemble Mbed TLS change log entries into the change log file.
4*62c56f98SSadaf Ebrahimi
5*62c56f98SSadaf EbrahimiAdd changelog entries to the first level-2 section.
6*62c56f98SSadaf EbrahimiCreate a new level-2 section for unreleased changes if needed.
7*62c56f98SSadaf EbrahimiRemove the input files unless --keep-entries is specified.
8*62c56f98SSadaf Ebrahimi
9*62c56f98SSadaf EbrahimiIn each level-3 section, entries are sorted in chronological order
10*62c56f98SSadaf Ebrahimi(oldest first). From oldest to newest:
11*62c56f98SSadaf Ebrahimi* Merged entry files are sorted according to their merge date (date of
12*62c56f98SSadaf Ebrahimi  the merge commit that brought the commit that created the file into
13*62c56f98SSadaf Ebrahimi  the target branch).
14*62c56f98SSadaf Ebrahimi* Committed but unmerged entry files are sorted according to the date
15*62c56f98SSadaf Ebrahimi  of the commit that adds them.
16*62c56f98SSadaf Ebrahimi* Uncommitted entry files are sorted according to their modification time.
17*62c56f98SSadaf Ebrahimi
18*62c56f98SSadaf EbrahimiYou must run this program from within a git working directory.
19*62c56f98SSadaf Ebrahimi"""
20*62c56f98SSadaf Ebrahimi
21*62c56f98SSadaf Ebrahimi# Copyright The Mbed TLS Contributors
22*62c56f98SSadaf Ebrahimi# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
23*62c56f98SSadaf Ebrahimi
24*62c56f98SSadaf Ebrahimiimport argparse
25*62c56f98SSadaf Ebrahimifrom collections import OrderedDict, namedtuple
26*62c56f98SSadaf Ebrahimiimport datetime
27*62c56f98SSadaf Ebrahimiimport functools
28*62c56f98SSadaf Ebrahimiimport glob
29*62c56f98SSadaf Ebrahimiimport os
30*62c56f98SSadaf Ebrahimiimport re
31*62c56f98SSadaf Ebrahimiimport subprocess
32*62c56f98SSadaf Ebrahimiimport sys
33*62c56f98SSadaf Ebrahimi
34*62c56f98SSadaf Ebrahimiclass InputFormatError(Exception):
35*62c56f98SSadaf Ebrahimi    def __init__(self, filename, line_number, message, *args, **kwargs):
36*62c56f98SSadaf Ebrahimi        message = '{}:{}: {}'.format(filename, line_number,
37*62c56f98SSadaf Ebrahimi                                     message.format(*args, **kwargs))
38*62c56f98SSadaf Ebrahimi        super().__init__(message)
39*62c56f98SSadaf Ebrahimi
40*62c56f98SSadaf Ebrahimiclass CategoryParseError(Exception):
41*62c56f98SSadaf Ebrahimi    def __init__(self, line_offset, error_message):
42*62c56f98SSadaf Ebrahimi        self.line_offset = line_offset
43*62c56f98SSadaf Ebrahimi        self.error_message = error_message
44*62c56f98SSadaf Ebrahimi        super().__init__('{}: {}'.format(line_offset, error_message))
45*62c56f98SSadaf Ebrahimi
46*62c56f98SSadaf Ebrahimiclass LostContent(Exception):
47*62c56f98SSadaf Ebrahimi    def __init__(self, filename, line):
48*62c56f98SSadaf Ebrahimi        message = ('Lost content from {}: "{}"'.format(filename, line))
49*62c56f98SSadaf Ebrahimi        super().__init__(message)
50*62c56f98SSadaf Ebrahimi
51*62c56f98SSadaf Ebrahimi# The category names we use in the changelog.
52*62c56f98SSadaf Ebrahimi# If you edit this, update ChangeLog.d/README.md.
53*62c56f98SSadaf EbrahimiSTANDARD_CATEGORIES = (
54*62c56f98SSadaf Ebrahimi    'API changes',
55*62c56f98SSadaf Ebrahimi    'Default behavior changes',
56*62c56f98SSadaf Ebrahimi    'Requirement changes',
57*62c56f98SSadaf Ebrahimi    'New deprecations',
58*62c56f98SSadaf Ebrahimi    'Removals',
59*62c56f98SSadaf Ebrahimi    'Features',
60*62c56f98SSadaf Ebrahimi    'Security',
61*62c56f98SSadaf Ebrahimi    'Bugfix',
62*62c56f98SSadaf Ebrahimi    'Changes',
63*62c56f98SSadaf Ebrahimi)
64*62c56f98SSadaf Ebrahimi
65*62c56f98SSadaf Ebrahimi# The maximum line length for an entry
66*62c56f98SSadaf EbrahimiMAX_LINE_LENGTH = 80
67*62c56f98SSadaf Ebrahimi
68*62c56f98SSadaf EbrahimiCategoryContent = namedtuple('CategoryContent', [
69*62c56f98SSadaf Ebrahimi    'name', 'title_line', # Title text and line number of the title
70*62c56f98SSadaf Ebrahimi    'body', 'body_line', # Body text and starting line number of the body
71*62c56f98SSadaf Ebrahimi])
72*62c56f98SSadaf Ebrahimi
73*62c56f98SSadaf Ebrahimiclass ChangelogFormat:
74*62c56f98SSadaf Ebrahimi    """Virtual class documenting how to write a changelog format class."""
75*62c56f98SSadaf Ebrahimi
76*62c56f98SSadaf Ebrahimi    @classmethod
77*62c56f98SSadaf Ebrahimi    def extract_top_version(cls, changelog_file_content):
78*62c56f98SSadaf Ebrahimi        """Split out the top version section.
79*62c56f98SSadaf Ebrahimi
80*62c56f98SSadaf Ebrahimi        If the top version is already released, create a new top
81*62c56f98SSadaf Ebrahimi        version section for an unreleased version.
82*62c56f98SSadaf Ebrahimi
83*62c56f98SSadaf Ebrahimi        Return ``(header, top_version_title, top_version_body, trailer)``
84*62c56f98SSadaf Ebrahimi        where the "top version" is the existing top version section if it's
85*62c56f98SSadaf Ebrahimi        for unreleased changes, and a newly created section otherwise.
86*62c56f98SSadaf Ebrahimi        To assemble the changelog after modifying top_version_body,
87*62c56f98SSadaf Ebrahimi        concatenate the four pieces.
88*62c56f98SSadaf Ebrahimi        """
89*62c56f98SSadaf Ebrahimi        raise NotImplementedError
90*62c56f98SSadaf Ebrahimi
91*62c56f98SSadaf Ebrahimi    @classmethod
92*62c56f98SSadaf Ebrahimi    def version_title_text(cls, version_title):
93*62c56f98SSadaf Ebrahimi        """Return the text of a formatted version section title."""
94*62c56f98SSadaf Ebrahimi        raise NotImplementedError
95*62c56f98SSadaf Ebrahimi
96*62c56f98SSadaf Ebrahimi    @classmethod
97*62c56f98SSadaf Ebrahimi    def split_categories(cls, version_body):
98*62c56f98SSadaf Ebrahimi        """Split a changelog version section body into categories.
99*62c56f98SSadaf Ebrahimi
100*62c56f98SSadaf Ebrahimi        Return a list of `CategoryContent` the name is category title
101*62c56f98SSadaf Ebrahimi        without any formatting.
102*62c56f98SSadaf Ebrahimi        """
103*62c56f98SSadaf Ebrahimi        raise NotImplementedError
104*62c56f98SSadaf Ebrahimi
105*62c56f98SSadaf Ebrahimi    @classmethod
106*62c56f98SSadaf Ebrahimi    def format_category(cls, title, body):
107*62c56f98SSadaf Ebrahimi        """Construct the text of a category section from its title and body."""
108*62c56f98SSadaf Ebrahimi        raise NotImplementedError
109*62c56f98SSadaf Ebrahimi
110*62c56f98SSadaf Ebrahimiclass TextChangelogFormat(ChangelogFormat):
111*62c56f98SSadaf Ebrahimi    """The traditional Mbed TLS changelog format."""
112*62c56f98SSadaf Ebrahimi
113*62c56f98SSadaf Ebrahimi    _unreleased_version_text = '= Mbed TLS x.x.x branch released xxxx-xx-xx'
114*62c56f98SSadaf Ebrahimi    @classmethod
115*62c56f98SSadaf Ebrahimi    def is_released_version(cls, title):
116*62c56f98SSadaf Ebrahimi        # Look for an incomplete release date
117*62c56f98SSadaf Ebrahimi        return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
118*62c56f98SSadaf Ebrahimi
119*62c56f98SSadaf Ebrahimi    _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
120*62c56f98SSadaf Ebrahimi                                 re.DOTALL)
121*62c56f98SSadaf Ebrahimi    @classmethod
122*62c56f98SSadaf Ebrahimi    def extract_top_version(cls, changelog_file_content):
123*62c56f98SSadaf Ebrahimi        """A version section starts with a line starting with '='."""
124*62c56f98SSadaf Ebrahimi        m = re.search(cls._top_version_re, changelog_file_content)
125*62c56f98SSadaf Ebrahimi        top_version_start = m.start(1)
126*62c56f98SSadaf Ebrahimi        top_version_end = m.end(2)
127*62c56f98SSadaf Ebrahimi        top_version_title = m.group(1)
128*62c56f98SSadaf Ebrahimi        top_version_body = m.group(2)
129*62c56f98SSadaf Ebrahimi        if cls.is_released_version(top_version_title):
130*62c56f98SSadaf Ebrahimi            top_version_end = top_version_start
131*62c56f98SSadaf Ebrahimi            top_version_title = cls._unreleased_version_text + '\n\n'
132*62c56f98SSadaf Ebrahimi            top_version_body = ''
133*62c56f98SSadaf Ebrahimi        return (changelog_file_content[:top_version_start],
134*62c56f98SSadaf Ebrahimi                top_version_title, top_version_body,
135*62c56f98SSadaf Ebrahimi                changelog_file_content[top_version_end:])
136*62c56f98SSadaf Ebrahimi
137*62c56f98SSadaf Ebrahimi    @classmethod
138*62c56f98SSadaf Ebrahimi    def version_title_text(cls, version_title):
139*62c56f98SSadaf Ebrahimi        return re.sub(r'\n.*', version_title, re.DOTALL)
140*62c56f98SSadaf Ebrahimi
141*62c56f98SSadaf Ebrahimi    _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE)
142*62c56f98SSadaf Ebrahimi    @classmethod
143*62c56f98SSadaf Ebrahimi    def split_categories(cls, version_body):
144*62c56f98SSadaf Ebrahimi        """A category title is a line with the title in column 0."""
145*62c56f98SSadaf Ebrahimi        if not version_body:
146*62c56f98SSadaf Ebrahimi            return []
147*62c56f98SSadaf Ebrahimi        title_matches = list(re.finditer(cls._category_title_re, version_body))
148*62c56f98SSadaf Ebrahimi        if not title_matches or title_matches[0].start() != 0:
149*62c56f98SSadaf Ebrahimi            # There is junk before the first category.
150*62c56f98SSadaf Ebrahimi            raise CategoryParseError(0, 'Junk found where category expected')
151*62c56f98SSadaf Ebrahimi        title_starts = [m.start(1) for m in title_matches]
152*62c56f98SSadaf Ebrahimi        body_starts = [m.end(0) for m in title_matches]
153*62c56f98SSadaf Ebrahimi        body_ends = title_starts[1:] + [len(version_body)]
154*62c56f98SSadaf Ebrahimi        bodies = [version_body[body_start:body_end].rstrip('\n') + '\n'
155*62c56f98SSadaf Ebrahimi                  for (body_start, body_end) in zip(body_starts, body_ends)]
156*62c56f98SSadaf Ebrahimi        title_lines = [version_body[:pos].count('\n') for pos in title_starts]
157*62c56f98SSadaf Ebrahimi        body_lines = [version_body[:pos].count('\n') for pos in body_starts]
158*62c56f98SSadaf Ebrahimi        return [CategoryContent(title_match.group(1), title_line,
159*62c56f98SSadaf Ebrahimi                                body, body_line)
160*62c56f98SSadaf Ebrahimi                for title_match, title_line, body, body_line
161*62c56f98SSadaf Ebrahimi                in zip(title_matches, title_lines, bodies, body_lines)]
162*62c56f98SSadaf Ebrahimi
163*62c56f98SSadaf Ebrahimi    @classmethod
164*62c56f98SSadaf Ebrahimi    def format_category(cls, title, body):
165*62c56f98SSadaf Ebrahimi        # `split_categories` ensures that each body ends with a newline.
166*62c56f98SSadaf Ebrahimi        # Make sure that there is additionally a blank line between categories.
167*62c56f98SSadaf Ebrahimi        if not body.endswith('\n\n'):
168*62c56f98SSadaf Ebrahimi            body += '\n'
169*62c56f98SSadaf Ebrahimi        return title + '\n' + body
170*62c56f98SSadaf Ebrahimi
171*62c56f98SSadaf Ebrahimiclass ChangeLog:
172*62c56f98SSadaf Ebrahimi    """An Mbed TLS changelog.
173*62c56f98SSadaf Ebrahimi
174*62c56f98SSadaf Ebrahimi    A changelog file consists of some header text followed by one or
175*62c56f98SSadaf Ebrahimi    more version sections. The version sections are in reverse
176*62c56f98SSadaf Ebrahimi    chronological order. Each version section consists of a title and a body.
177*62c56f98SSadaf Ebrahimi
178*62c56f98SSadaf Ebrahimi    The body of a version section consists of zero or more category
179*62c56f98SSadaf Ebrahimi    subsections. Each category subsection consists of a title and a body.
180*62c56f98SSadaf Ebrahimi
181*62c56f98SSadaf Ebrahimi    A changelog entry file has the same format as the body of a version section.
182*62c56f98SSadaf Ebrahimi
183*62c56f98SSadaf Ebrahimi    A `ChangelogFormat` object defines the concrete syntax of the changelog.
184*62c56f98SSadaf Ebrahimi    Entry files must have the same format as the changelog file.
185*62c56f98SSadaf Ebrahimi    """
186*62c56f98SSadaf Ebrahimi
187*62c56f98SSadaf Ebrahimi    # Only accept dotted version numbers (e.g. "3.1", not "3").
188*62c56f98SSadaf Ebrahimi    # Refuse ".x" in a version number where x is a letter: this indicates
189*62c56f98SSadaf Ebrahimi    # a version that is not yet released. Something like "3.1a" is accepted.
190*62c56f98SSadaf Ebrahimi    _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+')
191*62c56f98SSadaf Ebrahimi    _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]')
192*62c56f98SSadaf Ebrahimi    _only_url_re = re.compile(r'^\s*\w+://\S+\s*$')
193*62c56f98SSadaf Ebrahimi    _has_url_re = re.compile(r'.*://.*')
194*62c56f98SSadaf Ebrahimi
195*62c56f98SSadaf Ebrahimi    def add_categories_from_text(self, filename, line_offset,
196*62c56f98SSadaf Ebrahimi                                 text, allow_unknown_category):
197*62c56f98SSadaf Ebrahimi        """Parse a version section or entry file."""
198*62c56f98SSadaf Ebrahimi        try:
199*62c56f98SSadaf Ebrahimi            categories = self.format.split_categories(text)
200*62c56f98SSadaf Ebrahimi        except CategoryParseError as e:
201*62c56f98SSadaf Ebrahimi            raise InputFormatError(filename, line_offset + e.line_offset,
202*62c56f98SSadaf Ebrahimi                                   e.error_message)
203*62c56f98SSadaf Ebrahimi        for category in categories:
204*62c56f98SSadaf Ebrahimi            if not allow_unknown_category and \
205*62c56f98SSadaf Ebrahimi               category.name not in self.categories:
206*62c56f98SSadaf Ebrahimi                raise InputFormatError(filename,
207*62c56f98SSadaf Ebrahimi                                       line_offset + category.title_line,
208*62c56f98SSadaf Ebrahimi                                       'Unknown category: "{}"',
209*62c56f98SSadaf Ebrahimi                                       category.name)
210*62c56f98SSadaf Ebrahimi
211*62c56f98SSadaf Ebrahimi            body_split = category.body.splitlines()
212*62c56f98SSadaf Ebrahimi
213*62c56f98SSadaf Ebrahimi            for line_number, line in enumerate(body_split, 1):
214*62c56f98SSadaf Ebrahimi                if not self._only_url_re.match(line) and \
215*62c56f98SSadaf Ebrahimi                   len(line) > MAX_LINE_LENGTH:
216*62c56f98SSadaf Ebrahimi                    long_url_msg = '. URL exceeding length limit must be alone in its line.' \
217*62c56f98SSadaf Ebrahimi                        if self._has_url_re.match(line) else ""
218*62c56f98SSadaf Ebrahimi                    raise InputFormatError(filename,
219*62c56f98SSadaf Ebrahimi                                           category.body_line + line_number,
220*62c56f98SSadaf Ebrahimi                                           'Line is longer than allowed: '
221*62c56f98SSadaf Ebrahimi                                           'Length {} (Max {}){}',
222*62c56f98SSadaf Ebrahimi                                           len(line), MAX_LINE_LENGTH,
223*62c56f98SSadaf Ebrahimi                                           long_url_msg)
224*62c56f98SSadaf Ebrahimi
225*62c56f98SSadaf Ebrahimi            self.categories[category.name] += category.body
226*62c56f98SSadaf Ebrahimi
227*62c56f98SSadaf Ebrahimi    def __init__(self, input_stream, changelog_format):
228*62c56f98SSadaf Ebrahimi        """Create a changelog object.
229*62c56f98SSadaf Ebrahimi
230*62c56f98SSadaf Ebrahimi        Populate the changelog object from the content of the file
231*62c56f98SSadaf Ebrahimi        input_stream.
232*62c56f98SSadaf Ebrahimi        """
233*62c56f98SSadaf Ebrahimi        self.format = changelog_format
234*62c56f98SSadaf Ebrahimi        whole_file = input_stream.read()
235*62c56f98SSadaf Ebrahimi        (self.header,
236*62c56f98SSadaf Ebrahimi         self.top_version_title, top_version_body,
237*62c56f98SSadaf Ebrahimi         self.trailer) = self.format.extract_top_version(whole_file)
238*62c56f98SSadaf Ebrahimi        # Split the top version section into categories.
239*62c56f98SSadaf Ebrahimi        self.categories = OrderedDict()
240*62c56f98SSadaf Ebrahimi        for category in STANDARD_CATEGORIES:
241*62c56f98SSadaf Ebrahimi            self.categories[category] = ''
242*62c56f98SSadaf Ebrahimi        offset = (self.header + self.top_version_title).count('\n') + 1
243*62c56f98SSadaf Ebrahimi        self.add_categories_from_text(input_stream.name, offset,
244*62c56f98SSadaf Ebrahimi                                      top_version_body, True)
245*62c56f98SSadaf Ebrahimi
246*62c56f98SSadaf Ebrahimi    def add_file(self, input_stream):
247*62c56f98SSadaf Ebrahimi        """Add changelog entries from a file.
248*62c56f98SSadaf Ebrahimi        """
249*62c56f98SSadaf Ebrahimi        self.add_categories_from_text(input_stream.name, 1,
250*62c56f98SSadaf Ebrahimi                                      input_stream.read(), False)
251*62c56f98SSadaf Ebrahimi
252*62c56f98SSadaf Ebrahimi    def write(self, filename):
253*62c56f98SSadaf Ebrahimi        """Write the changelog to the specified file.
254*62c56f98SSadaf Ebrahimi        """
255*62c56f98SSadaf Ebrahimi        with open(filename, 'w', encoding='utf-8') as out:
256*62c56f98SSadaf Ebrahimi            out.write(self.header)
257*62c56f98SSadaf Ebrahimi            out.write(self.top_version_title)
258*62c56f98SSadaf Ebrahimi            for title, body in self.categories.items():
259*62c56f98SSadaf Ebrahimi                if not body:
260*62c56f98SSadaf Ebrahimi                    continue
261*62c56f98SSadaf Ebrahimi                out.write(self.format.format_category(title, body))
262*62c56f98SSadaf Ebrahimi            out.write(self.trailer)
263*62c56f98SSadaf Ebrahimi
264*62c56f98SSadaf Ebrahimi
265*62c56f98SSadaf Ebrahimi@functools.total_ordering
266*62c56f98SSadaf Ebrahimiclass EntryFileSortKey:
267*62c56f98SSadaf Ebrahimi    """This classes defines an ordering on changelog entry files: older < newer.
268*62c56f98SSadaf Ebrahimi
269*62c56f98SSadaf Ebrahimi    * Merged entry files are sorted according to their merge date (date of
270*62c56f98SSadaf Ebrahimi      the merge commit that brought the commit that created the file into
271*62c56f98SSadaf Ebrahimi      the target branch).
272*62c56f98SSadaf Ebrahimi    * Committed but unmerged entry files are sorted according to the date
273*62c56f98SSadaf Ebrahimi      of the commit that adds them.
274*62c56f98SSadaf Ebrahimi    * Uncommitted entry files are sorted according to their modification time.
275*62c56f98SSadaf Ebrahimi
276*62c56f98SSadaf Ebrahimi    This class assumes that the file is in a git working directory with
277*62c56f98SSadaf Ebrahimi    the target branch checked out.
278*62c56f98SSadaf Ebrahimi    """
279*62c56f98SSadaf Ebrahimi
280*62c56f98SSadaf Ebrahimi    # Categories of files. A lower number is considered older.
281*62c56f98SSadaf Ebrahimi    MERGED = 0
282*62c56f98SSadaf Ebrahimi    COMMITTED = 1
283*62c56f98SSadaf Ebrahimi    LOCAL = 2
284*62c56f98SSadaf Ebrahimi
285*62c56f98SSadaf Ebrahimi    @staticmethod
286*62c56f98SSadaf Ebrahimi    def creation_hash(filename):
287*62c56f98SSadaf Ebrahimi        """Return the git commit id at which the given file was created.
288*62c56f98SSadaf Ebrahimi
289*62c56f98SSadaf Ebrahimi        Return None if the file was never checked into git.
290*62c56f98SSadaf Ebrahimi        """
291*62c56f98SSadaf Ebrahimi        hashes = subprocess.check_output(['git', 'log', '--format=%H',
292*62c56f98SSadaf Ebrahimi                                          '--follow',
293*62c56f98SSadaf Ebrahimi                                          '--', filename])
294*62c56f98SSadaf Ebrahimi        m = re.search('(.+)$', hashes.decode('ascii'))
295*62c56f98SSadaf Ebrahimi        if not m:
296*62c56f98SSadaf Ebrahimi            # The git output is empty. This means that the file was
297*62c56f98SSadaf Ebrahimi            # never checked in.
298*62c56f98SSadaf Ebrahimi            return None
299*62c56f98SSadaf Ebrahimi        # The last commit in the log is the oldest one, which is when the
300*62c56f98SSadaf Ebrahimi        # file was created.
301*62c56f98SSadaf Ebrahimi        return m.group(0)
302*62c56f98SSadaf Ebrahimi
303*62c56f98SSadaf Ebrahimi    @staticmethod
304*62c56f98SSadaf Ebrahimi    def list_merges(some_hash, target, *options):
305*62c56f98SSadaf Ebrahimi        """List merge commits from some_hash to target.
306*62c56f98SSadaf Ebrahimi
307*62c56f98SSadaf Ebrahimi        Pass options to git to select which commits are included.
308*62c56f98SSadaf Ebrahimi        """
309*62c56f98SSadaf Ebrahimi        text = subprocess.check_output(['git', 'rev-list',
310*62c56f98SSadaf Ebrahimi                                        '--merges', *options,
311*62c56f98SSadaf Ebrahimi                                        '..'.join([some_hash, target])])
312*62c56f98SSadaf Ebrahimi        return text.decode('ascii').rstrip('\n').split('\n')
313*62c56f98SSadaf Ebrahimi
314*62c56f98SSadaf Ebrahimi    @classmethod
315*62c56f98SSadaf Ebrahimi    def merge_hash(cls, some_hash):
316*62c56f98SSadaf Ebrahimi        """Return the git commit id at which the given commit was merged.
317*62c56f98SSadaf Ebrahimi
318*62c56f98SSadaf Ebrahimi        Return None if the given commit was never merged.
319*62c56f98SSadaf Ebrahimi        """
320*62c56f98SSadaf Ebrahimi        target = 'HEAD'
321*62c56f98SSadaf Ebrahimi        # List the merges from some_hash to the target in two ways.
322*62c56f98SSadaf Ebrahimi        # The ancestry list is the ones that are both descendants of
323*62c56f98SSadaf Ebrahimi        # some_hash and ancestors of the target.
324*62c56f98SSadaf Ebrahimi        ancestry = frozenset(cls.list_merges(some_hash, target,
325*62c56f98SSadaf Ebrahimi                                             '--ancestry-path'))
326*62c56f98SSadaf Ebrahimi        # The first_parents list only contains merges that are directly
327*62c56f98SSadaf Ebrahimi        # on the target branch. We want it in reverse order (oldest first).
328*62c56f98SSadaf Ebrahimi        first_parents = cls.list_merges(some_hash, target,
329*62c56f98SSadaf Ebrahimi                                        '--first-parent', '--reverse')
330*62c56f98SSadaf Ebrahimi        # Look for the oldest merge commit that's both on the direct path
331*62c56f98SSadaf Ebrahimi        # and directly on the target branch. That's the place where some_hash
332*62c56f98SSadaf Ebrahimi        # was merged on the target branch. See
333*62c56f98SSadaf Ebrahimi        # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
334*62c56f98SSadaf Ebrahimi        for commit in first_parents:
335*62c56f98SSadaf Ebrahimi            if commit in ancestry:
336*62c56f98SSadaf Ebrahimi                return commit
337*62c56f98SSadaf Ebrahimi        return None
338*62c56f98SSadaf Ebrahimi
339*62c56f98SSadaf Ebrahimi    @staticmethod
340*62c56f98SSadaf Ebrahimi    def commit_timestamp(commit_id):
341*62c56f98SSadaf Ebrahimi        """Return the timestamp of the given commit."""
342*62c56f98SSadaf Ebrahimi        text = subprocess.check_output(['git', 'show', '-s',
343*62c56f98SSadaf Ebrahimi                                        '--format=%ct',
344*62c56f98SSadaf Ebrahimi                                        commit_id])
345*62c56f98SSadaf Ebrahimi        return datetime.datetime.utcfromtimestamp(int(text))
346*62c56f98SSadaf Ebrahimi
347*62c56f98SSadaf Ebrahimi    @staticmethod
348*62c56f98SSadaf Ebrahimi    def file_timestamp(filename):
349*62c56f98SSadaf Ebrahimi        """Return the modification timestamp of the given file."""
350*62c56f98SSadaf Ebrahimi        mtime = os.stat(filename).st_mtime
351*62c56f98SSadaf Ebrahimi        return datetime.datetime.fromtimestamp(mtime)
352*62c56f98SSadaf Ebrahimi
353*62c56f98SSadaf Ebrahimi    def __init__(self, filename):
354*62c56f98SSadaf Ebrahimi        """Determine position of the file in the changelog entry order.
355*62c56f98SSadaf Ebrahimi
356*62c56f98SSadaf Ebrahimi        This constructor returns an object that can be used with comparison
357*62c56f98SSadaf Ebrahimi        operators, with `sort` and `sorted`, etc. Older entries are sorted
358*62c56f98SSadaf Ebrahimi        before newer entries.
359*62c56f98SSadaf Ebrahimi        """
360*62c56f98SSadaf Ebrahimi        self.filename = filename
361*62c56f98SSadaf Ebrahimi        creation_hash = self.creation_hash(filename)
362*62c56f98SSadaf Ebrahimi        if not creation_hash:
363*62c56f98SSadaf Ebrahimi            self.category = self.LOCAL
364*62c56f98SSadaf Ebrahimi            self.datetime = self.file_timestamp(filename)
365*62c56f98SSadaf Ebrahimi            return
366*62c56f98SSadaf Ebrahimi        merge_hash = self.merge_hash(creation_hash)
367*62c56f98SSadaf Ebrahimi        if not merge_hash:
368*62c56f98SSadaf Ebrahimi            self.category = self.COMMITTED
369*62c56f98SSadaf Ebrahimi            self.datetime = self.commit_timestamp(creation_hash)
370*62c56f98SSadaf Ebrahimi            return
371*62c56f98SSadaf Ebrahimi        self.category = self.MERGED
372*62c56f98SSadaf Ebrahimi        self.datetime = self.commit_timestamp(merge_hash)
373*62c56f98SSadaf Ebrahimi
374*62c56f98SSadaf Ebrahimi    def sort_key(self):
375*62c56f98SSadaf Ebrahimi        """"Return a concrete sort key for this entry file sort key object.
376*62c56f98SSadaf Ebrahimi
377*62c56f98SSadaf Ebrahimi        ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
378*62c56f98SSadaf Ebrahimi        """
379*62c56f98SSadaf Ebrahimi        return (self.category, self.datetime, self.filename)
380*62c56f98SSadaf Ebrahimi
381*62c56f98SSadaf Ebrahimi    def __eq__(self, other):
382*62c56f98SSadaf Ebrahimi        return self.sort_key() == other.sort_key()
383*62c56f98SSadaf Ebrahimi
384*62c56f98SSadaf Ebrahimi    def __lt__(self, other):
385*62c56f98SSadaf Ebrahimi        return self.sort_key() < other.sort_key()
386*62c56f98SSadaf Ebrahimi
387*62c56f98SSadaf Ebrahimi
388*62c56f98SSadaf Ebrahimidef check_output(generated_output_file, main_input_file, merged_files):
389*62c56f98SSadaf Ebrahimi    """Make sanity checks on the generated output.
390*62c56f98SSadaf Ebrahimi
391*62c56f98SSadaf Ebrahimi    The intent of these sanity checks is to have reasonable confidence
392*62c56f98SSadaf Ebrahimi    that no content has been lost.
393*62c56f98SSadaf Ebrahimi
394*62c56f98SSadaf Ebrahimi    The sanity check is that every line that is present in an input file
395*62c56f98SSadaf Ebrahimi    is also present in an output file. This is not perfect but good enough
396*62c56f98SSadaf Ebrahimi    for now.
397*62c56f98SSadaf Ebrahimi    """
398*62c56f98SSadaf Ebrahimi    with open(generated_output_file, 'r', encoding='utf-8') as fd:
399*62c56f98SSadaf Ebrahimi        generated_output = set(fd)
400*62c56f98SSadaf Ebrahimi        for line in open(main_input_file, 'r', encoding='utf-8'):
401*62c56f98SSadaf Ebrahimi            if line not in generated_output:
402*62c56f98SSadaf Ebrahimi                raise LostContent('original file', line)
403*62c56f98SSadaf Ebrahimi        for merged_file in merged_files:
404*62c56f98SSadaf Ebrahimi            for line in open(merged_file, 'r', encoding='utf-8'):
405*62c56f98SSadaf Ebrahimi                if line not in generated_output:
406*62c56f98SSadaf Ebrahimi                    raise LostContent(merged_file, line)
407*62c56f98SSadaf Ebrahimi
408*62c56f98SSadaf Ebrahimidef finish_output(changelog, output_file, input_file, merged_files):
409*62c56f98SSadaf Ebrahimi    """Write the changelog to the output file.
410*62c56f98SSadaf Ebrahimi
411*62c56f98SSadaf Ebrahimi    The input file and the list of merged files are used only for sanity
412*62c56f98SSadaf Ebrahimi    checks on the output.
413*62c56f98SSadaf Ebrahimi    """
414*62c56f98SSadaf Ebrahimi    if os.path.exists(output_file) and not os.path.isfile(output_file):
415*62c56f98SSadaf Ebrahimi        # The output is a non-regular file (e.g. pipe). Write to it directly.
416*62c56f98SSadaf Ebrahimi        output_temp = output_file
417*62c56f98SSadaf Ebrahimi    else:
418*62c56f98SSadaf Ebrahimi        # The output is a regular file. Write to a temporary file,
419*62c56f98SSadaf Ebrahimi        # then move it into place atomically.
420*62c56f98SSadaf Ebrahimi        output_temp = output_file + '.tmp'
421*62c56f98SSadaf Ebrahimi    changelog.write(output_temp)
422*62c56f98SSadaf Ebrahimi    check_output(output_temp, input_file, merged_files)
423*62c56f98SSadaf Ebrahimi    if output_temp != output_file:
424*62c56f98SSadaf Ebrahimi        os.rename(output_temp, output_file)
425*62c56f98SSadaf Ebrahimi
426*62c56f98SSadaf Ebrahimidef remove_merged_entries(files_to_remove):
427*62c56f98SSadaf Ebrahimi    for filename in files_to_remove:
428*62c56f98SSadaf Ebrahimi        os.remove(filename)
429*62c56f98SSadaf Ebrahimi
430*62c56f98SSadaf Ebrahimidef list_files_to_merge(options):
431*62c56f98SSadaf Ebrahimi    """List the entry files to merge, oldest first.
432*62c56f98SSadaf Ebrahimi
433*62c56f98SSadaf Ebrahimi    "Oldest" is defined by `EntryFileSortKey`.
434*62c56f98SSadaf Ebrahimi    """
435*62c56f98SSadaf Ebrahimi    files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
436*62c56f98SSadaf Ebrahimi    files_to_merge.sort(key=EntryFileSortKey)
437*62c56f98SSadaf Ebrahimi    return files_to_merge
438*62c56f98SSadaf Ebrahimi
439*62c56f98SSadaf Ebrahimidef merge_entries(options):
440*62c56f98SSadaf Ebrahimi    """Merge changelog entries into the changelog file.
441*62c56f98SSadaf Ebrahimi
442*62c56f98SSadaf Ebrahimi    Read the changelog file from options.input.
443*62c56f98SSadaf Ebrahimi    Read entries to merge from the directory options.dir.
444*62c56f98SSadaf Ebrahimi    Write the new changelog to options.output.
445*62c56f98SSadaf Ebrahimi    Remove the merged entries if options.keep_entries is false.
446*62c56f98SSadaf Ebrahimi    """
447*62c56f98SSadaf Ebrahimi    with open(options.input, 'r', encoding='utf-8') as input_file:
448*62c56f98SSadaf Ebrahimi        changelog = ChangeLog(input_file, TextChangelogFormat)
449*62c56f98SSadaf Ebrahimi    files_to_merge = list_files_to_merge(options)
450*62c56f98SSadaf Ebrahimi    if not files_to_merge:
451*62c56f98SSadaf Ebrahimi        sys.stderr.write('There are no pending changelog entries.\n')
452*62c56f98SSadaf Ebrahimi        return
453*62c56f98SSadaf Ebrahimi    for filename in files_to_merge:
454*62c56f98SSadaf Ebrahimi        with open(filename, 'r', encoding='utf-8') as input_file:
455*62c56f98SSadaf Ebrahimi            changelog.add_file(input_file)
456*62c56f98SSadaf Ebrahimi    finish_output(changelog, options.output, options.input, files_to_merge)
457*62c56f98SSadaf Ebrahimi    if not options.keep_entries:
458*62c56f98SSadaf Ebrahimi        remove_merged_entries(files_to_merge)
459*62c56f98SSadaf Ebrahimi
460*62c56f98SSadaf Ebrahimidef show_file_timestamps(options):
461*62c56f98SSadaf Ebrahimi    """List the files to merge and their timestamp.
462*62c56f98SSadaf Ebrahimi
463*62c56f98SSadaf Ebrahimi    This is only intended for debugging purposes.
464*62c56f98SSadaf Ebrahimi    """
465*62c56f98SSadaf Ebrahimi    files = list_files_to_merge(options)
466*62c56f98SSadaf Ebrahimi    for filename in files:
467*62c56f98SSadaf Ebrahimi        ts = EntryFileSortKey(filename)
468*62c56f98SSadaf Ebrahimi        print(ts.category, ts.datetime, filename)
469*62c56f98SSadaf Ebrahimi
470*62c56f98SSadaf Ebrahimidef set_defaults(options):
471*62c56f98SSadaf Ebrahimi    """Add default values for missing options."""
472*62c56f98SSadaf Ebrahimi    output_file = getattr(options, 'output', None)
473*62c56f98SSadaf Ebrahimi    if output_file is None:
474*62c56f98SSadaf Ebrahimi        options.output = options.input
475*62c56f98SSadaf Ebrahimi    if getattr(options, 'keep_entries', None) is None:
476*62c56f98SSadaf Ebrahimi        options.keep_entries = (output_file is not None)
477*62c56f98SSadaf Ebrahimi
478*62c56f98SSadaf Ebrahimidef main():
479*62c56f98SSadaf Ebrahimi    """Command line entry point."""
480*62c56f98SSadaf Ebrahimi    parser = argparse.ArgumentParser(description=__doc__)
481*62c56f98SSadaf Ebrahimi    parser.add_argument('--dir', '-d', metavar='DIR',
482*62c56f98SSadaf Ebrahimi                        default='ChangeLog.d',
483*62c56f98SSadaf Ebrahimi                        help='Directory to read entries from'
484*62c56f98SSadaf Ebrahimi                             ' (default: ChangeLog.d)')
485*62c56f98SSadaf Ebrahimi    parser.add_argument('--input', '-i', metavar='FILE',
486*62c56f98SSadaf Ebrahimi                        default='ChangeLog',
487*62c56f98SSadaf Ebrahimi                        help='Existing changelog file to read from and augment'
488*62c56f98SSadaf Ebrahimi                             ' (default: ChangeLog)')
489*62c56f98SSadaf Ebrahimi    parser.add_argument('--keep-entries',
490*62c56f98SSadaf Ebrahimi                        action='store_true', dest='keep_entries', default=None,
491*62c56f98SSadaf Ebrahimi                        help='Keep the files containing entries'
492*62c56f98SSadaf Ebrahimi                             ' (default: remove them if --output/-o is not specified)')
493*62c56f98SSadaf Ebrahimi    parser.add_argument('--no-keep-entries',
494*62c56f98SSadaf Ebrahimi                        action='store_false', dest='keep_entries',
495*62c56f98SSadaf Ebrahimi                        help='Remove the files containing entries after they are merged'
496*62c56f98SSadaf Ebrahimi                             ' (default: remove them if --output/-o is not specified)')
497*62c56f98SSadaf Ebrahimi    parser.add_argument('--output', '-o', metavar='FILE',
498*62c56f98SSadaf Ebrahimi                        help='Output changelog file'
499*62c56f98SSadaf Ebrahimi                             ' (default: overwrite the input)')
500*62c56f98SSadaf Ebrahimi    parser.add_argument('--list-files-only',
501*62c56f98SSadaf Ebrahimi                        action='store_true',
502*62c56f98SSadaf Ebrahimi                        help=('Only list the files that would be processed '
503*62c56f98SSadaf Ebrahimi                              '(with some debugging information)'))
504*62c56f98SSadaf Ebrahimi    options = parser.parse_args()
505*62c56f98SSadaf Ebrahimi    set_defaults(options)
506*62c56f98SSadaf Ebrahimi    if options.list_files_only:
507*62c56f98SSadaf Ebrahimi        show_file_timestamps(options)
508*62c56f98SSadaf Ebrahimi        return
509*62c56f98SSadaf Ebrahimi    merge_entries(options)
510*62c56f98SSadaf Ebrahimi
511*62c56f98SSadaf Ebrahimiif __name__ == '__main__':
512*62c56f98SSadaf Ebrahimi    main()
513