1 /*
2 * Copyright 2023 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "thread_sched.h"
8 #include "u_cpu_detect.h"
9 #include "u_debug.h"
10
11 DEBUG_GET_ONCE_BOOL_OPTION(pin_threads, "mesa_pin_threads", false)
12
13 bool
util_thread_scheduler_enabled(void)14 util_thread_scheduler_enabled(void)
15 {
16 #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
17 return util_get_cpu_caps()->num_L3_caches > 1 ||
18 debug_get_option_pin_threads();
19 #else
20 return false;
21 #endif
22 }
23
24 void
util_thread_scheduler_init_state(unsigned * state)25 util_thread_scheduler_init_state(unsigned *state)
26 {
27 *state = UINT32_MAX;
28
29 util_thread_sched_apply_policy(thrd_current(), UTIL_THREAD_APP_CALLER, 0,
30 NULL); /* keep as NULL */
31 }
32
33 /**
34 * Apply the optimal thread scheduling policy for the given thread.
35 *
36 * "name" determines which thread the policy is being applied to.
37 *
38 * "app_thread_cpu" is the CPU where the application thread currently
39 * resides.
40 *
41 * "sched_state" is a per-gl_context state that this function uses to track
42 * what happened in previous invocations.
43 */
44 bool
util_thread_sched_apply_policy(thrd_t thread,enum util_thread_name name,unsigned app_thread_cpu,unsigned * sched_state)45 util_thread_sched_apply_policy(thrd_t thread, enum util_thread_name name,
46 unsigned app_thread_cpu, unsigned *sched_state)
47 {
48 #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
49 if (debug_get_option_pin_threads()) {
50 /* Pin threads to a specific CPU. This is done only once. *sched_state
51 * is true if this is the first time we are doing it.
52 */
53 if (sched_state && !*sched_state)
54 return false;
55
56 /* Each thread is assigned to a different CPU. */
57 unsigned mask = BITFIELD_BIT(name);
58 if (sched_state)
59 *sched_state = 0;
60 return util_set_thread_affinity(thread, &mask, NULL, 32);
61 }
62
63 /* Don't do anything for the app thread with the L3 chasing policy. */
64 if (name == UTIL_THREAD_APP_CALLER)
65 return false;
66
67 /* Move Mesa threads to the L3 core complex where the app thread
68 * resides. We call this "L3 chasing".
69 *
70 * This improves multithreading performance by up to 33% on Ryzen 3900X.
71 */
72 const struct util_cpu_caps_t *caps = util_get_cpu_caps();
73 int L3_cache = caps->cpu_to_L3[app_thread_cpu];
74
75 /* Don't do anything if the app thread hasn't moved to a different
76 * core complex. (*sched_state contains the last set L3 index)
77 */
78 if (L3_cache == U_CPU_INVALID_L3 ||
79 (sched_state && L3_cache == *sched_state))
80 return false;
81
82 /* Apply the policy. */
83 if (sched_state)
84 *sched_state = L3_cache;
85
86 return util_set_thread_affinity(thread, caps->L3_affinity_mask[L3_cache],
87 NULL, caps->num_cpu_mask_bits);
88 #else
89 return false;
90 #endif
91 }
92