1 //
2 // Copyright 2022 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // AstcDecompressorImpl.cpp: Decodes ASTC-encoded textures.
8
9 #include <array>
10 #include <future>
11 #include <unordered_map>
12
13 #include "astcenc.h"
14 #include "common/SimpleMutex.h"
15 #include "common/WorkerThread.h"
16 #include "image_util/AstcDecompressor.h"
17
18 namespace angle
19 {
20 namespace
21 {
22
23 const astcenc_swizzle kSwizzle = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
24
25 // Used by std::unique_ptr to release the context when the pointer is destroyed
26 struct AstcencContextDeleter
27 {
operator ()angle::__anon98277cd50111::AstcencContextDeleter28 void operator()(astcenc_context *c) { astcenc_context_free(c); }
29 };
30
31 using AstcencContextUniquePtr = std::unique_ptr<astcenc_context, AstcencContextDeleter>;
32
33 // Returns the max number of threads to use when using multithreaded decompression
MaxThreads()34 uint32_t MaxThreads()
35 {
36 static const uint32_t numThreads = std::min(16u, std::thread::hardware_concurrency());
37 return numThreads;
38 }
39
40 // Creates a new astcenc_context and wraps it in a smart pointer.
41 // It is not needed to call astcenc_context_free() on the returned pointer.
42 // blockWith, blockSize: ASTC block size for the context
43 // Error: (output param) Where to put the error status. Must not be null.
44 // Returns nullptr in case of error.
MakeDecoderContext(uint32_t blockWidth,uint32_t blockHeight,astcenc_error * error)45 AstcencContextUniquePtr MakeDecoderContext(uint32_t blockWidth,
46 uint32_t blockHeight,
47 astcenc_error *error)
48 {
49 astcenc_config config = {};
50 *error =
51 // TODO(gregschlom): Do we need a special case for sRGB images? (And pass
52 // ASTCENC_PRF_LDR_SRGB here?)
53 astcenc_config_init(ASTCENC_PRF_LDR, blockWidth, blockHeight, 1, ASTCENC_PRE_FASTEST,
54 ASTCENC_FLG_DECOMPRESS_ONLY, &config);
55 if (*error != ASTCENC_SUCCESS)
56 {
57 return nullptr;
58 }
59
60 astcenc_context *context;
61 *error = astcenc_context_alloc(&config, MaxThreads(), &context);
62 if (*error != ASTCENC_SUCCESS)
63 {
64 return nullptr;
65 }
66 return AstcencContextUniquePtr(context);
67 }
68
69 // Returns whether the ASTC decompressor can be used on this machine. It might not be available if
70 // the CPU doesn't support AVX2 instructions for example. Since this call is a bit expensive and
71 // never changes, the result should be cached.
IsAstcDecompressorAvailable()72 bool IsAstcDecompressorAvailable()
73 {
74 astcenc_error error;
75 // Try getting an arbitrary context. If it works, the decompressor is available.
76 AstcencContextUniquePtr context = MakeDecoderContext(5, 5, &error);
77 return context != nullptr;
78 }
79
80 // Caches and manages astcenc_context objects.
81 //
82 // Each context is fairly large (around 30 MB) and takes a while to construct, so it's important to
83 // reuse them as much as possible.
84 //
85 // While context objects can be reused across multiple threads, they must be used sequentially. To
86 // avoid having to lock and manage access between threads, we keep one cache per thread. This avoids
87 // any concurrency issues, at the cost of extra memory.
88 //
89 // Currently, there is no eviction strategy. Each cache could grow to a maximum of ~400 MB in size
90 // since they are 13 possible ASTC block sizes.
91 //
92 // Thread-safety: not thread safe.
93 class AstcDecompressorContextCache
94 {
95 public:
96 // Returns a context object for a given ASTC block size, along with the error code if the
97 // context initialization failed.
98 // In this case, the context will be null, and the status code will be non-zero.
get(uint32_t blockWidth,uint32_t blockHeight)99 std::pair<astcenc_context *, astcenc_error> get(uint32_t blockWidth, uint32_t blockHeight)
100 {
101 Value &value = mContexts[{blockWidth, blockHeight}];
102 if (value.context == nullptr)
103 {
104 value.context = MakeDecoderContext(blockWidth, blockHeight, &value.error);
105 }
106 return {value.context.get(), value.error};
107 }
108
109 private:
110 // Holds the data we use as the cache key
111 struct Key
112 {
113 uint32_t blockWidth;
114 uint32_t blockHeight;
115
operator ==angle::__anon98277cd50111::AstcDecompressorContextCache::Key116 bool operator==(const Key &other) const
117 {
118 return blockWidth == other.blockWidth && blockHeight == other.blockHeight;
119 }
120 };
121
122 struct Value
123 {
124 AstcencContextUniquePtr context = nullptr;
125 astcenc_error error = ASTCENC_SUCCESS;
126 };
127
128 // Computes the hash of a Key
129 struct KeyHash
130 {
operator ()angle::__anon98277cd50111::AstcDecompressorContextCache::KeyHash131 std::size_t operator()(const Key &k) const
132 {
133 // blockWidth and blockHeight are < 256 (actually, < 16), so this is safe
134 return k.blockWidth << 8 | k.blockHeight;
135 }
136 };
137
138 std::unordered_map<Key, Value, KeyHash> mContexts;
139 };
140
141 struct DecompressTask : public Closure
142 {
DecompressTaskangle::__anon98277cd50111::DecompressTask143 DecompressTask(astcenc_context *context,
144 uint32_t threadIndex,
145 const uint8_t *data,
146 size_t dataLength,
147 astcenc_image *image)
148 : context(context),
149 threadIndex(threadIndex),
150 data(data),
151 dataLength(dataLength),
152 image(image)
153 {}
154
operator ()angle::__anon98277cd50111::DecompressTask155 void operator()() override
156 {
157 result = astcenc_decompress_image(context, data, dataLength, image, &kSwizzle, threadIndex);
158 }
159
160 astcenc_context *context;
161 uint32_t threadIndex;
162 const uint8_t *data;
163 size_t dataLength;
164 astcenc_image *image;
165 astcenc_error result;
166 };
167
168 // Performs ASTC decompression of an image on the CPU
169 class AstcDecompressorImpl : public AstcDecompressor
170 {
171 public:
AstcDecompressorImpl()172 AstcDecompressorImpl()
173 : AstcDecompressor(), mContextCache(std::make_unique<AstcDecompressorContextCache>())
174 {
175 mTasks.reserve(MaxThreads());
176 mWaitEvents.reserve(MaxThreads());
177 }
178
179 ~AstcDecompressorImpl() override = default;
180
available() const181 bool available() const override
182 {
183 static bool available = IsAstcDecompressorAvailable();
184 return available;
185 }
186
decompress(std::shared_ptr<WorkerThreadPool> singleThreadPool,std::shared_ptr<WorkerThreadPool> multiThreadPool,const uint32_t imgWidth,const uint32_t imgHeight,const uint32_t blockWidth,const uint32_t blockHeight,const uint8_t * input,size_t inputLength,uint8_t * output)187 int32_t decompress(std::shared_ptr<WorkerThreadPool> singleThreadPool,
188 std::shared_ptr<WorkerThreadPool> multiThreadPool,
189 const uint32_t imgWidth,
190 const uint32_t imgHeight,
191 const uint32_t blockWidth,
192 const uint32_t blockHeight,
193 const uint8_t *input,
194 size_t inputLength,
195 uint8_t *output) override
196 {
197 // A given astcenc context can only decompress one image at a time, which we why we keep
198 // this mutex locked the whole time.
199 std::lock_guard global_lock(mMutex);
200
201 auto [context, context_status] = mContextCache->get(blockWidth, blockHeight);
202 if (context_status != ASTCENC_SUCCESS)
203 return context_status;
204
205 astcenc_image image;
206 image.dim_x = imgWidth;
207 image.dim_y = imgHeight;
208 image.dim_z = 1;
209 image.data_type = ASTCENC_TYPE_U8;
210 image.data = reinterpret_cast<void **>(&output);
211
212 // For smaller images the overhead of multithreading exceeds the benefits.
213 const bool singleThreaded = (imgHeight <= 32 && imgWidth <= 32) || !multiThreadPool;
214
215 std::shared_ptr<WorkerThreadPool> &threadPool =
216 singleThreaded ? singleThreadPool : multiThreadPool;
217 const uint32_t threadCount = singleThreaded ? 1 : MaxThreads();
218
219 mTasks.clear();
220 mWaitEvents.clear();
221
222 for (uint32_t i = 0; i < threadCount; ++i)
223 {
224 mTasks.push_back(
225 std::make_shared<DecompressTask>(context, i, input, inputLength, &image));
226 mWaitEvents.push_back(threadPool->postWorkerTask(mTasks[i]));
227 }
228 WaitableEvent::WaitMany(&mWaitEvents);
229 astcenc_decompress_reset(context);
230
231 for (auto &task : mTasks)
232 {
233 if (task->result != ASTCENC_SUCCESS)
234 return task->result;
235 }
236 return ASTCENC_SUCCESS;
237 }
238
getStatusString(int32_t statusCode) const239 const char *getStatusString(int32_t statusCode) const override
240 {
241 const char *msg = astcenc_get_error_string((astcenc_error)statusCode);
242 return msg ? msg : "ASTCENC_UNKNOWN_STATUS";
243 }
244
245 private:
246 std::unique_ptr<AstcDecompressorContextCache> mContextCache;
247 angle::SimpleMutex mMutex; // Locked while calling `decode()`
248 std::vector<std::shared_ptr<DecompressTask>> mTasks;
249 std::vector<std::shared_ptr<WaitableEvent>> mWaitEvents;
250 };
251
252 } // namespace
253
get()254 AstcDecompressor &AstcDecompressor::get()
255 {
256 static auto *instance = new AstcDecompressorImpl();
257 return *instance;
258 }
259
260 } // namespace angle
261