xref: /aosp_15_r20/external/zucchini/zucchini_gen.cc (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/zucchini/zucchini_gen.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <algorithm>
11 #include <map>
12 #include <memory>
13 #include <string>
14 #include <utility>
15 
16 #include "base/logging.h"
17 #include "base/numerics/safe_conversions.h"
18 #include "components/zucchini/disassembler.h"
19 #include "components/zucchini/element_detection.h"
20 #include "components/zucchini/encoded_view.h"
21 #include "components/zucchini/ensemble_matcher.h"
22 #include "components/zucchini/equivalence_map.h"
23 #include "components/zucchini/heuristic_ensemble_matcher.h"
24 #include "components/zucchini/image_index.h"
25 #include "components/zucchini/imposed_ensemble_matcher.h"
26 #include "components/zucchini/patch_writer.h"
27 #include "components/zucchini/reference_bytes_mixer.h"
28 #include "components/zucchini/suffix_array.h"
29 #include "components/zucchini/targets_affinity.h"
30 
31 namespace zucchini {
32 
33 namespace {
34 
35 // Parameters for patch generation.
36 constexpr double kMinEquivalenceSimilarity = 12.0;
37 constexpr double kMinLabelAffinity = 64.0;
38 
39 }  // namespace
40 
FindExtraTargets(const TargetPool & projected_old_targets,const TargetPool & new_targets)41 std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets,
42                                        const TargetPool& new_targets) {
43   std::vector<offset_t> extra_targets;
44   std::set_difference(
45       new_targets.begin(), new_targets.end(), projected_old_targets.begin(),
46       projected_old_targets.end(), std::back_inserter(extra_targets));
47   return extra_targets;
48 }
49 
50 // Label matching (between "old" and "new") can guide EquivalenceMap
51 // construction; but EquivalenceMap induces Label matching. This apparent "chick
52 // and egg" problem is solved by alternating 2 steps |num_iterations| times:
53 // - Associate targets based on previous EquivalenceMap. Note on the first
54 //   iteration, EquivalenceMap is empty, resulting in a no-op.
55 // - Construct refined EquivalenceMap based on new targets associations.
CreateEquivalenceMap(const ImageIndex & old_image_index,const ImageIndex & new_image_index,int num_iterations)56 EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index,
57                                     const ImageIndex& new_image_index,
58                                     int num_iterations) {
59   size_t pool_count = old_image_index.PoolCount();
60   // |target_affinities| is outside the loop to reduce allocation.
61   std::vector<TargetsAffinity> target_affinities(pool_count);
62 
63   EquivalenceMap equivalence_map;
64   for (int i = 0; i < num_iterations; ++i) {
65     EncodedView old_view(old_image_index);
66     EncodedView new_view(new_image_index);
67 
68     // Associate targets from "old" to "new" image based on |equivalence_map|
69     // for each reference pool.
70     for (const auto& old_pool_tag_and_targets :
71          old_image_index.target_pools()) {
72       PoolTag pool_tag = old_pool_tag_and_targets.first;
73       target_affinities[pool_tag.value()].InferFromSimilarities(
74           equivalence_map, old_pool_tag_and_targets.second.targets(),
75           new_image_index.pool(pool_tag).targets());
76 
77       // Creates labels for strongly associated targets.
78       std::vector<uint32_t> old_labels;
79       std::vector<uint32_t> new_labels;
80       size_t label_bound = target_affinities[pool_tag.value()].AssignLabels(
81           kMinLabelAffinity, &old_labels, &new_labels);
82       old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
83       new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
84     }
85     // Build equivalence map, where references in "old" and "new" that share
86     // common semantics (i.e., their respective targets were associated earlier
87     // on) are considered equivalent.
88     equivalence_map.Build(
89         MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()),
90         old_view, new_view, target_affinities, kMinEquivalenceSimilarity);
91   }
92 
93   return equivalence_map;
94 }
95 
GenerateEquivalencesAndExtraData(ConstBufferView new_image,const EquivalenceMap & equivalence_map,PatchElementWriter * patch_writer)96 bool GenerateEquivalencesAndExtraData(ConstBufferView new_image,
97                                       const EquivalenceMap& equivalence_map,
98                                       PatchElementWriter* patch_writer) {
99   // Make 2 passes through |equivalence_map| to reduce write churn.
100   // Pass 1: Write all equivalences.
101   EquivalenceSink equivalences_sink;
102   for (const EquivalenceCandidate& candidate : equivalence_map)
103     equivalences_sink.PutNext(candidate.eq);
104   patch_writer->SetEquivalenceSink(std::move(equivalences_sink));
105 
106   // Pass 2: Write data in gaps in |new_image| before / between  after
107   // |equivalence_map| as "extra data".
108   ExtraDataSink extra_data_sink;
109   offset_t dst_offset = 0;
110   for (const EquivalenceCandidate& candidate : equivalence_map) {
111     extra_data_sink.PutNext(
112         new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]);
113     dst_offset = candidate.eq.dst_end();
114     DCHECK_LE(dst_offset, new_image.size());
115   }
116   extra_data_sink.PutNext(
117       new_image[{dst_offset, new_image.size() - dst_offset}]);
118   patch_writer->SetExtraDataSink(std::move(extra_data_sink));
119   return true;
120 }
121 
GenerateRawDelta(ConstBufferView old_image,ConstBufferView new_image,const EquivalenceMap & equivalence_map,const ImageIndex & new_image_index,ReferenceBytesMixer * reference_bytes_mixer,PatchElementWriter * patch_writer)122 bool GenerateRawDelta(ConstBufferView old_image,
123                       ConstBufferView new_image,
124                       const EquivalenceMap& equivalence_map,
125                       const ImageIndex& new_image_index,
126                       ReferenceBytesMixer* reference_bytes_mixer,
127                       PatchElementWriter* patch_writer) {
128   RawDeltaSink raw_delta_sink;
129 
130   // Visit |equivalence_map| blocks in |new_image| order. Find and emit all
131   // bytewise differences.
132   offset_t base_copy_offset = 0;
133   for (const EquivalenceCandidate& candidate : equivalence_map) {
134     Equivalence equivalence = candidate.eq;
135     // For each bytewise delta from |old_image| to |new_image|, compute "copy
136     // offset" and pass it along with delta to the sink.
137     for (offset_t i = 0; i < equivalence.length;) {
138       if (new_image_index.IsReference(equivalence.dst_offset + i)) {
139         DCHECK(new_image_index.IsToken(equivalence.dst_offset + i));
140         TypeTag type_tag =
141             new_image_index.LookupType(equivalence.dst_offset + i);
142 
143         // Reference delta has its own flow. On some architectures (e.g., x86)
144         // this does not involve raw delta, so we skip. On other architectures
145         // (e.g., ARM) references are mixed with other bits that may change, so
146         // we need to "mix" data and store some changed bits into raw delta.
147         int num_bytes = reference_bytes_mixer->NumBytes(type_tag.value());
148         if (num_bytes) {
149           ConstBufferView mixed_ref_bytes = reference_bytes_mixer->Mix(
150               type_tag.value(), old_image, equivalence.src_offset + i,
151               new_image, equivalence.dst_offset + i);
152           for (int j = 0; j < num_bytes; ++j) {
153             int8_t diff =
154                 mixed_ref_bytes[j] - old_image[equivalence.src_offset + i + j];
155             if (diff)
156               raw_delta_sink.PutNext({base_copy_offset + i + j, diff});
157           }
158         }
159         i += new_image_index.refs(type_tag).width();
160         DCHECK_LE(i, equivalence.length);
161       } else {
162         int8_t diff = new_image[equivalence.dst_offset + i] -
163                       old_image[equivalence.src_offset + i];
164         if (diff)
165           raw_delta_sink.PutNext({base_copy_offset + i, diff});
166         ++i;
167       }
168     }
169     base_copy_offset += equivalence.length;
170   }
171   patch_writer->SetRawDeltaSink(std::move(raw_delta_sink));
172   return true;
173 }
174 
GenerateReferencesDelta(const ReferenceSet & src_refs,const ReferenceSet & dst_refs,const TargetPool & projected_target_pool,const OffsetMapper & offset_mapper,const EquivalenceMap & equivalence_map,ReferenceDeltaSink * reference_delta_sink)175 bool GenerateReferencesDelta(const ReferenceSet& src_refs,
176                              const ReferenceSet& dst_refs,
177                              const TargetPool& projected_target_pool,
178                              const OffsetMapper& offset_mapper,
179                              const EquivalenceMap& equivalence_map,
180                              ReferenceDeltaSink* reference_delta_sink) {
181   size_t ref_width = src_refs.width();
182   auto dst_ref = dst_refs.begin();
183 
184   // For each equivalence, for each covered |dst_ref| and the matching
185   // |src_ref|, emit the delta between the respective target labels. Note: By
186   // construction, each reference location (with |ref_width|) lies either
187   // completely inside an equivalence or completely outside. We perform
188   // "straddle checks" throughout to verify this assertion.
189   for (const auto& candidate : equivalence_map) {
190     const Equivalence equiv = candidate.eq;
191     // Increment |dst_ref| until it catches up to |equiv|.
192     while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset)
193       ++dst_ref;
194     if (dst_ref == dst_refs.end())
195       break;
196     if (dst_ref->location >= equiv.dst_end())
197       continue;
198     // Straddle check.
199     DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end());
200 
201     offset_t src_loc =
202         equiv.src_offset + (dst_ref->location - equiv.dst_offset);
203     auto src_ref = std::lower_bound(
204         src_refs.begin(), src_refs.end(), src_loc,
205         [](const Reference& a, offset_t b) { return a.location < b; });
206     for (; dst_ref != dst_refs.end() &&
207            dst_ref->location + ref_width <= equiv.dst_end();
208          ++dst_ref, ++src_ref) {
209       // Local offset of |src_ref| should match that of |dst_ref|.
210       DCHECK_EQ(src_ref->location - equiv.src_offset,
211                 dst_ref->location - equiv.dst_offset);
212       offset_t old_offset = src_ref->target;
213       offset_t new_estimated_offset =
214           offset_mapper.ExtendedForwardProject(old_offset);
215       offset_t new_estimated_key =
216           projected_target_pool.KeyForNearestOffset(new_estimated_offset);
217       offset_t new_offset = dst_ref->target;
218       offset_t new_key = projected_target_pool.KeyForOffset(new_offset);
219 
220       reference_delta_sink->PutNext(
221           static_cast<int32_t>(new_key - new_estimated_key));
222     }
223     if (dst_ref == dst_refs.end())
224       break;  // Done.
225     // Straddle check.
226     DCHECK_GE(dst_ref->location, equiv.dst_end());
227   }
228   return true;
229 }
230 
GenerateExtraTargets(const std::vector<offset_t> & extra_targets,PoolTag pool_tag,PatchElementWriter * patch_writer)231 bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets,
232                           PoolTag pool_tag,
233                           PatchElementWriter* patch_writer) {
234   TargetSink target_sink;
235   for (offset_t target : extra_targets)
236     target_sink.PutNext(target);
237   patch_writer->SetTargetSink(pool_tag, std::move(target_sink));
238   return true;
239 }
240 
GenerateRawElement(const std::vector<offset_t> & old_sa,ConstBufferView old_image,ConstBufferView new_image,PatchElementWriter * patch_writer)241 bool GenerateRawElement(const std::vector<offset_t>& old_sa,
242                         ConstBufferView old_image,
243                         ConstBufferView new_image,
244                         PatchElementWriter* patch_writer) {
245   ImageIndex old_image_index(old_image);
246   ImageIndex new_image_index(new_image);
247 
248   EquivalenceMap equivalences;
249   equivalences.Build(old_sa, EncodedView(old_image_index),
250                      EncodedView(new_image_index), {},
251                      kMinEquivalenceSimilarity);
252 
253   patch_writer->SetReferenceDeltaSink({});
254 
255   ReferenceBytesMixer no_op_bytes_mixer;
256   return GenerateEquivalencesAndExtraData(new_image, equivalences,
257                                           patch_writer) &&
258          GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
259                           &no_op_bytes_mixer, patch_writer);
260 }
261 
GenerateExecutableElement(ExecutableType exe_type,ConstBufferView old_image,ConstBufferView new_image,PatchElementWriter * patch_writer)262 bool GenerateExecutableElement(ExecutableType exe_type,
263                                ConstBufferView old_image,
264                                ConstBufferView new_image,
265                                PatchElementWriter* patch_writer) {
266   // Initialize Disassemblers.
267   std::unique_ptr<Disassembler> old_disasm =
268       MakeDisassemblerOfType(old_image, exe_type);
269   std::unique_ptr<Disassembler> new_disasm =
270       MakeDisassemblerOfType(new_image, exe_type);
271   if (!old_disasm || !new_disasm) {
272     LOG(ERROR) << "Failed to create Disassembler.";
273     return false;
274   }
275   DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType());
276 
277   // Initialize ImageIndexes.
278   ImageIndex old_image_index(old_image);
279   ImageIndex new_image_index(new_image);
280   if (!old_image_index.Initialize(old_disasm.get()) ||
281       !new_image_index.Initialize(new_disasm.get())) {
282     LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?";
283     return false;
284   }
285   DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount());
286 
287   EquivalenceMap equivalences =
288       CreateEquivalenceMap(old_image_index, new_image_index,
289                            new_disasm->num_equivalence_iterations());
290   OffsetMapper offset_mapper(equivalences,
291                              base::checked_cast<offset_t>(old_image.size()),
292                              base::checked_cast<offset_t>(new_image.size()));
293 
294   ReferenceDeltaSink reference_delta_sink;
295   for (const auto& old_targets : old_image_index.target_pools()) {
296     PoolTag pool_tag = old_targets.first;
297     TargetPool projected_old_targets = old_targets.second;
298     projected_old_targets.FilterAndProject(offset_mapper);
299     std::vector<offset_t> extra_target =
300         FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag));
301     projected_old_targets.InsertTargets(extra_target);
302 
303     if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer))
304       return false;
305     for (TypeTag type_tag : old_targets.second.types()) {
306       if (!GenerateReferencesDelta(old_image_index.refs(type_tag),
307                                    new_image_index.refs(type_tag),
308                                    projected_old_targets, offset_mapper,
309                                    equivalences, &reference_delta_sink)) {
310         return false;
311       }
312     }
313   }
314   patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink));
315   std::unique_ptr<ReferenceBytesMixer> reference_bytes_mixer =
316       ReferenceBytesMixer::Create(*old_disasm, *new_disasm);
317   return GenerateEquivalencesAndExtraData(new_image, equivalences,
318                                           patch_writer) &&
319          GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
320                           reference_bytes_mixer.get(), patch_writer);
321 }
322 
GenerateBufferCommon(ConstBufferView old_image,ConstBufferView new_image,std::unique_ptr<EnsembleMatcher> matcher,EnsemblePatchWriter * patch_writer)323 status::Code GenerateBufferCommon(ConstBufferView old_image,
324                                   ConstBufferView new_image,
325                                   std::unique_ptr<EnsembleMatcher> matcher,
326                                   EnsemblePatchWriter* patch_writer) {
327   if (!matcher->RunMatch(old_image, new_image)) {
328     LOG(INFO) << "RunMatch() failed, generating raw patch.";
329     return GenerateBufferRaw(old_image, new_image, patch_writer);
330   }
331 
332   const std::vector<ElementMatch>& matches = matcher->matches();
333   LOG(INFO) << "Matching: Found " << matches.size()
334             << " nontrivial matches and " << matcher->num_identical()
335             << " identical matches.";
336   size_t num_elements = matches.size();
337   if (num_elements == 0) {
338     LOG(INFO) << "No nontrival matches, generating raw patch.";
339     return GenerateBufferRaw(old_image, new_image, patch_writer);
340   }
341 
342   // "Gaps" are |new_image| bytes not covered by new_elements in |matches|.
343   // These are treated as raw data, and patched against the entire |old_image|.
344 
345   // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter
346   // results so elements and "gap" results can be computed separately (to reduce
347   // peak memory usage), and later, properly serialized to |patch_writer|
348   // ordered by "new" offset.
349   std::map<offset_t, PatchElementWriter> patch_element_map;
350 
351   // Variables to track element patching successes.
352   std::vector<BufferRegion> covered_new_regions;
353   size_t covered_new_bytes = 0;
354 
355   // Process elements first, since non-fatal failures may turn some into gaps.
356   for (const ElementMatch& match : matches) {
357     BufferRegion new_region = match.new_element.region();
358     LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi()
359               << ")";
360 
361     auto it_and_success = patch_element_map.emplace(
362         base::checked_cast<offset_t>(new_region.lo()), match);
363     DCHECK(it_and_success.second);
364     PatchElementWriter& patch_element = it_and_success.first->second;
365 
366     ConstBufferView old_sub_image = old_image[match.old_element.region()];
367     ConstBufferView new_sub_image = new_image[new_region];
368     if (GenerateExecutableElement(match.exe_type(), old_sub_image,
369                                   new_sub_image, &patch_element)) {
370       covered_new_regions.push_back(new_region);
371       covered_new_bytes += new_region.size;
372     } else {
373       LOG(INFO) << "Fall back to raw patching.";
374       patch_element_map.erase(it_and_success.first);
375     }
376   }
377 
378   if (covered_new_bytes < new_image.size()) {
379     // Process all "gaps", which are patched against the entire "old" image. To
380     // compute equivalence maps, "gaps" share a common suffix array
381     // |old_sa_raw|, whose lifetime is kept separated from elements' suffix
382     // arrays to reduce peak memory.
383     Element entire_old_element(old_image.local_region(), kExeTypeNoOp);
384     ImageIndex old_image_index(old_image);
385     EncodedView old_view_raw(old_image_index);
386     std::vector<offset_t> old_sa_raw =
387         MakeSuffixArray<InducedSuffixSort>(old_view_raw, size_t(256));
388 
389     offset_t gap_lo = 0;
390     // Add sentinel that points to end of "new" file, to simplify gap iteration.
391     covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0});
392 
393     for (const BufferRegion& covered : covered_new_regions) {
394       offset_t gap_hi = base::checked_cast<offset_t>(covered.lo());
395       DCHECK_GE(gap_hi, gap_lo);
396       offset_t gap_size = gap_hi - gap_lo;
397       if (gap_size > 0) {
398         LOG(INFO) << "--- Gap   [" << gap_lo << "," << gap_hi << ")";
399 
400         ElementMatch gap_match{{entire_old_element, kExeTypeNoOp},
401                                {{gap_lo, gap_size}, kExeTypeNoOp}};
402         auto it_and_success = patch_element_map.emplace(gap_lo, gap_match);
403         DCHECK(it_and_success.second);
404         PatchElementWriter& patch_element = it_and_success.first->second;
405 
406         ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}];
407         if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image,
408                                 &patch_element)) {
409           return status::kStatusFatal;
410         }
411       }
412       gap_lo = base::checked_cast<offset_t>(covered.hi());
413     }
414   }
415 
416   // Write all PatchElementWriter sorted by "new" offset.
417   for (auto& new_lo_and_patch_element : patch_element_map)
418     patch_writer->AddElement(std::move(new_lo_and_patch_element.second));
419 
420   return status::kStatusSuccess;
421 }
422 
423 /******** Exported Functions ********/
424 
GenerateBuffer(ConstBufferView old_image,ConstBufferView new_image,EnsemblePatchWriter * patch_writer)425 status::Code GenerateBuffer(ConstBufferView old_image,
426                             ConstBufferView new_image,
427                             EnsemblePatchWriter* patch_writer) {
428   return GenerateBufferCommon(
429       old_image, new_image, std::make_unique<HeuristicEnsembleMatcher>(nullptr),
430       patch_writer);
431 }
432 
GenerateBufferImposed(ConstBufferView old_image,ConstBufferView new_image,std::string imposed_matches,EnsemblePatchWriter * patch_writer)433 status::Code GenerateBufferImposed(ConstBufferView old_image,
434                                    ConstBufferView new_image,
435                                    std::string imposed_matches,
436                                    EnsemblePatchWriter* patch_writer) {
437   if (imposed_matches.empty())
438     return GenerateBuffer(old_image, new_image, patch_writer);
439 
440   return GenerateBufferCommon(
441       old_image, new_image,
442       std::make_unique<ImposedEnsembleMatcher>(imposed_matches), patch_writer);
443 }
444 
GenerateBufferRaw(ConstBufferView old_image,ConstBufferView new_image,EnsemblePatchWriter * patch_writer)445 status::Code GenerateBufferRaw(ConstBufferView old_image,
446                                ConstBufferView new_image,
447                                EnsemblePatchWriter* patch_writer) {
448   ImageIndex old_image_index(old_image);
449   EncodedView old_view(old_image_index);
450   std::vector<offset_t> old_sa =
451       MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
452 
453   PatchElementWriter patch_element(
454       {Element(old_image.local_region()), Element(new_image.local_region())});
455   if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element))
456     return status::kStatusFatal;
457   patch_writer->AddElement(std::move(patch_element));
458   return status::kStatusSuccess;
459 }
460 
461 }  // namespace zucchini
462