xref: /aosp_15_r20/external/angle/src/image_util/AstcDecompressor.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2022 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // AstcDecompressorImpl.cpp: Decodes ASTC-encoded textures.
8 
9 #include <array>
10 #include <future>
11 #include <unordered_map>
12 
13 #include "astcenc.h"
14 #include "common/SimpleMutex.h"
15 #include "common/WorkerThread.h"
16 #include "image_util/AstcDecompressor.h"
17 
18 namespace angle
19 {
20 namespace
21 {
22 
23 const astcenc_swizzle kSwizzle = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
24 
25 // Used by std::unique_ptr to release the context when the pointer is destroyed
26 struct AstcencContextDeleter
27 {
operator ()angle::__anon98277cd50111::AstcencContextDeleter28     void operator()(astcenc_context *c) { astcenc_context_free(c); }
29 };
30 
31 using AstcencContextUniquePtr = std::unique_ptr<astcenc_context, AstcencContextDeleter>;
32 
33 // Returns the max number of threads to use when using multithreaded decompression
MaxThreads()34 uint32_t MaxThreads()
35 {
36     static const uint32_t numThreads = std::min(16u, std::thread::hardware_concurrency());
37     return numThreads;
38 }
39 
40 // Creates a new astcenc_context and wraps it in a smart pointer.
41 // It is not needed to call astcenc_context_free() on the returned pointer.
42 // blockWith, blockSize: ASTC block size for the context
43 // Error: (output param) Where to put the error status. Must not be null.
44 // Returns nullptr in case of error.
MakeDecoderContext(uint32_t blockWidth,uint32_t blockHeight,astcenc_error * error)45 AstcencContextUniquePtr MakeDecoderContext(uint32_t blockWidth,
46                                            uint32_t blockHeight,
47                                            astcenc_error *error)
48 {
49     astcenc_config config = {};
50     *error =
51         // TODO(gregschlom): Do we need a special case for sRGB images? (And pass
52         //                   ASTCENC_PRF_LDR_SRGB here?)
53         astcenc_config_init(ASTCENC_PRF_LDR, blockWidth, blockHeight, 1, ASTCENC_PRE_FASTEST,
54                             ASTCENC_FLG_DECOMPRESS_ONLY, &config);
55     if (*error != ASTCENC_SUCCESS)
56     {
57         return nullptr;
58     }
59 
60     astcenc_context *context;
61     *error = astcenc_context_alloc(&config, MaxThreads(), &context);
62     if (*error != ASTCENC_SUCCESS)
63     {
64         return nullptr;
65     }
66     return AstcencContextUniquePtr(context);
67 }
68 
69 // Returns whether the ASTC decompressor can be used on this machine. It might not be available if
70 // the CPU doesn't support AVX2 instructions for example. Since this call is a bit expensive and
71 // never changes, the result should be cached.
IsAstcDecompressorAvailable()72 bool IsAstcDecompressorAvailable()
73 {
74     astcenc_error error;
75     // Try getting an arbitrary context. If it works, the decompressor is available.
76     AstcencContextUniquePtr context = MakeDecoderContext(5, 5, &error);
77     return context != nullptr;
78 }
79 
80 // Caches and manages astcenc_context objects.
81 //
82 // Each context is fairly large (around 30 MB) and takes a while to construct, so it's important to
83 // reuse them as much as possible.
84 //
85 // While context objects can be reused across multiple threads, they must be used sequentially. To
86 // avoid having to lock and manage access between threads, we keep one cache per thread. This avoids
87 // any concurrency issues, at the cost of extra memory.
88 //
89 // Currently, there is no eviction strategy. Each cache could grow to a maximum of ~400 MB in size
90 // since they are 13 possible ASTC block sizes.
91 //
92 // Thread-safety: not thread safe.
93 class AstcDecompressorContextCache
94 {
95   public:
96     // Returns a context object for a given ASTC block size, along with the error code if the
97     // context initialization failed.
98     // In this case, the context will be null, and the status code will be non-zero.
get(uint32_t blockWidth,uint32_t blockHeight)99     std::pair<astcenc_context *, astcenc_error> get(uint32_t blockWidth, uint32_t blockHeight)
100     {
101         Value &value = mContexts[{blockWidth, blockHeight}];
102         if (value.context == nullptr)
103         {
104             value.context = MakeDecoderContext(blockWidth, blockHeight, &value.error);
105         }
106         return {value.context.get(), value.error};
107     }
108 
109   private:
110     // Holds the data we use as the cache key
111     struct Key
112     {
113         uint32_t blockWidth;
114         uint32_t blockHeight;
115 
operator ==angle::__anon98277cd50111::AstcDecompressorContextCache::Key116         bool operator==(const Key &other) const
117         {
118             return blockWidth == other.blockWidth && blockHeight == other.blockHeight;
119         }
120     };
121 
122     struct Value
123     {
124         AstcencContextUniquePtr context = nullptr;
125         astcenc_error error             = ASTCENC_SUCCESS;
126     };
127 
128     // Computes the hash of a Key
129     struct KeyHash
130     {
operator ()angle::__anon98277cd50111::AstcDecompressorContextCache::KeyHash131         std::size_t operator()(const Key &k) const
132         {
133             // blockWidth and blockHeight are < 256 (actually, < 16), so this is safe
134             return k.blockWidth << 8 | k.blockHeight;
135         }
136     };
137 
138     std::unordered_map<Key, Value, KeyHash> mContexts;
139 };
140 
141 struct DecompressTask : public Closure
142 {
DecompressTaskangle::__anon98277cd50111::DecompressTask143     DecompressTask(astcenc_context *context,
144                    uint32_t threadIndex,
145                    const uint8_t *data,
146                    size_t dataLength,
147                    astcenc_image *image)
148         : context(context),
149           threadIndex(threadIndex),
150           data(data),
151           dataLength(dataLength),
152           image(image)
153     {}
154 
operator ()angle::__anon98277cd50111::DecompressTask155     void operator()() override
156     {
157         result = astcenc_decompress_image(context, data, dataLength, image, &kSwizzle, threadIndex);
158     }
159 
160     astcenc_context *context;
161     uint32_t threadIndex;
162     const uint8_t *data;
163     size_t dataLength;
164     astcenc_image *image;
165     astcenc_error result;
166 };
167 
168 // Performs ASTC decompression of an image on the CPU
169 class AstcDecompressorImpl : public AstcDecompressor
170 {
171   public:
AstcDecompressorImpl()172     AstcDecompressorImpl()
173         : AstcDecompressor(), mContextCache(std::make_unique<AstcDecompressorContextCache>())
174     {
175         mTasks.reserve(MaxThreads());
176         mWaitEvents.reserve(MaxThreads());
177     }
178 
179     ~AstcDecompressorImpl() override = default;
180 
available() const181     bool available() const override
182     {
183         static bool available = IsAstcDecompressorAvailable();
184         return available;
185     }
186 
decompress(std::shared_ptr<WorkerThreadPool> singleThreadPool,std::shared_ptr<WorkerThreadPool> multiThreadPool,const uint32_t imgWidth,const uint32_t imgHeight,const uint32_t blockWidth,const uint32_t blockHeight,const uint8_t * input,size_t inputLength,uint8_t * output)187     int32_t decompress(std::shared_ptr<WorkerThreadPool> singleThreadPool,
188                        std::shared_ptr<WorkerThreadPool> multiThreadPool,
189                        const uint32_t imgWidth,
190                        const uint32_t imgHeight,
191                        const uint32_t blockWidth,
192                        const uint32_t blockHeight,
193                        const uint8_t *input,
194                        size_t inputLength,
195                        uint8_t *output) override
196     {
197         // A given astcenc context can only decompress one image at a time, which we why we keep
198         // this mutex locked the whole time.
199         std::lock_guard global_lock(mMutex);
200 
201         auto [context, context_status] = mContextCache->get(blockWidth, blockHeight);
202         if (context_status != ASTCENC_SUCCESS)
203             return context_status;
204 
205         astcenc_image image;
206         image.dim_x     = imgWidth;
207         image.dim_y     = imgHeight;
208         image.dim_z     = 1;
209         image.data_type = ASTCENC_TYPE_U8;
210         image.data      = reinterpret_cast<void **>(&output);
211 
212         // For smaller images the overhead of multithreading exceeds the benefits.
213         const bool singleThreaded = (imgHeight <= 32 && imgWidth <= 32) || !multiThreadPool;
214 
215         std::shared_ptr<WorkerThreadPool> &threadPool =
216             singleThreaded ? singleThreadPool : multiThreadPool;
217         const uint32_t threadCount = singleThreaded ? 1 : MaxThreads();
218 
219         mTasks.clear();
220         mWaitEvents.clear();
221 
222         for (uint32_t i = 0; i < threadCount; ++i)
223         {
224             mTasks.push_back(
225                 std::make_shared<DecompressTask>(context, i, input, inputLength, &image));
226             mWaitEvents.push_back(threadPool->postWorkerTask(mTasks[i]));
227         }
228         WaitableEvent::WaitMany(&mWaitEvents);
229         astcenc_decompress_reset(context);
230 
231         for (auto &task : mTasks)
232         {
233             if (task->result != ASTCENC_SUCCESS)
234                 return task->result;
235         }
236         return ASTCENC_SUCCESS;
237     }
238 
getStatusString(int32_t statusCode) const239     const char *getStatusString(int32_t statusCode) const override
240     {
241         const char *msg = astcenc_get_error_string((astcenc_error)statusCode);
242         return msg ? msg : "ASTCENC_UNKNOWN_STATUS";
243     }
244 
245   private:
246     std::unique_ptr<AstcDecompressorContextCache> mContextCache;
247     angle::SimpleMutex mMutex;  // Locked while calling `decode()`
248     std::vector<std::shared_ptr<DecompressTask>> mTasks;
249     std::vector<std::shared_ptr<WaitableEvent>> mWaitEvents;
250 };
251 
252 }  // namespace
253 
get()254 AstcDecompressor &AstcDecompressor::get()
255 {
256     static auto *instance = new AstcDecompressorImpl();
257     return *instance;
258 }
259 
260 }  // namespace angle
261