1#!/usr/bin/env python
2
3from __future__ import print_function
4
5desc = """Generate the difference of two YAML files into a new YAML file (works on
6pair of directories too).  A new attribute 'Added' is set to True or False
7depending whether the entry is added or removed from the first input to the
8next.
9
10The tools requires PyYAML."""
11
12import yaml
13
14# Try to use the C parser.
15try:
16    from yaml import CLoader as Loader
17except ImportError:
18    from yaml import Loader
19
20import optrecord
21import argparse
22from collections import defaultdict
23
24if __name__ == "__main__":
25    parser = argparse.ArgumentParser(description=desc)
26    parser.add_argument(
27        "yaml_dir_or_file_1",
28        help="An optimization record file or a directory searched for optimization "
29        "record files that are used as the old version for the comparison",
30    )
31    parser.add_argument(
32        "yaml_dir_or_file_2",
33        help="An optimization record file or a directory searched for optimization "
34        "record files that are used as the new version for the comparison",
35    )
36    parser.add_argument(
37        "--jobs",
38        "-j",
39        default=None,
40        type=int,
41        help="Max job count (defaults to %(default)s, the current CPU count)",
42    )
43    parser.add_argument(
44        "--max-size",
45        "-m",
46        default=100000,
47        type=int,
48        help="Maximum number of remarks stored in an output file",
49    )
50    parser.add_argument(
51        "--no-progress-indicator",
52        "-n",
53        action="store_true",
54        default=False,
55        help="Do not display any indicator of how many YAML files were read.",
56    )
57    parser.add_argument("--output", "-o", default="diff{}.opt.yaml")
58    args = parser.parse_args()
59
60    files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1)
61    files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2)
62
63    print_progress = not args.no_progress_indicator
64    all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress)
65    all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress)
66
67    added = set(all_remarks2.values()) - set(all_remarks1.values())
68    removed = set(all_remarks1.values()) - set(all_remarks2.values())
69
70    for r in added:
71        r.Added = True
72    for r in removed:
73        r.Added = False
74
75    result = list(added | removed)
76    for r in result:
77        r.recover_yaml_structure()
78
79    for i in range(0, len(result), args.max_size):
80        with open(args.output.format(i / args.max_size), "w") as stream:
81            yaml.dump_all(result[i : i + args.max_size], stream)
82