1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15# ============================================================================== 16"""Auto-detects machine configurations and outputs the results to shell or file. 17 18Supports linux only currently. 19 20Usage: 21 python config_detector.py [--save_output] [--filename] [--debug] 22 23Example command: 24 python config_detector.py --save_output=True --filename=configs.json 25 --debug=False 26 27Flag option(s): 28 save_output (True | False) Save output to a file. 29 (Default: True) 30 filename <file_name>.json Filename(.json) for storing configs. 31 (Default: `configs.json`) 32 debug (True | False) View debug and stderr messages. 33 (Default: False) 34 35The following machine configuration will be detected: 36 Platform Operating system (linux | macos | windows) 37 CPU CPU type (e.g. `GenuineIntel`) 38 CPU architecture Processor type (32-bit | 64-bit) 39 CPU ISA CPU instruction set (e.g. `sse4`, `sse4_1`, `avx`) 40 Distribution Operating system distribution (e.g. Ubuntu) 41 Distribution version Operating system distribution version (e.g. 14.04) 42 GPU GPU type (e.g. `Tesla K80`) 43 GPU count Number of GPU's available 44 CUDA version CUDA version by default (e.g. `10.1`) 45 CUDA version all CUDA version(s) all available 46 cuDNN version cuDNN version (e.g. `7.5.0`) 47 GCC version GCC version (e.g. `7.3.0`) 48 GLIBC version GLIBC version (e.g. `2.24`) 49 libstdc++ version libstdc++ version (e.g. `3.4.25`) 50 51Output: 52 Shell output (print) 53 A table containing status and info on all configurations will be 54 printed out to shell. 55 56 Configuration file (.json): 57 Depending on `--save_output` option, this script outputs a .json file 58 (in the same directory) containing all user machine configurations 59 that were detected. 60""" 61# pylint: disable=broad-except 62import collections 63import json 64import re 65import subprocess 66import sys 67 68from absl import app 69from absl import flags 70 71from tensorflow.tools.tensorflow_builder.config_detector.data import cuda_compute_capability 72 73FLAGS = flags.FLAGS 74# Define all flags 75flags.DEFINE_boolean("save_output", True, "Save output to a file. [True/False]") 76flags.DEFINE_string("filename", "configs.json", "Output filename.") 77flags.DEFINE_boolean("debug", False, "View debug messages. [True/False]") 78 79# For linux: commands for retrieving user machine configs. 80cmds_linux = { 81 "cpu_type": ( 82 "cat /proc/cpuinfo 2>&1 | grep 'vendor' | uniq"), 83 "cpu_arch": ( 84 "uname -m"), 85 "distrib": ( 86 "cat /etc/*-release | grep DISTRIB_ID* | sed 's/^.*=//'"), 87 "distrib_ver": ( 88 "cat /etc/*-release | grep DISTRIB_RELEASE* | sed 's/^.*=//'"), 89 "gpu_type": ( 90 "sudo lshw -C display | grep product:* | sed 's/^.*: //'"), 91 "gpu_type_no_sudo": 92 r"lspci | grep 'VGA compatible\|3D controller' | cut -d' ' -f 1 | " 93 r"xargs -i lspci -v -s {} | head -n 2 | tail -1 | " 94 r"awk '{print $(NF-2), $(NF-1), $NF}'", 95 "gpu_count": ( 96 "sudo lshw -C display | grep *-display:* | wc -l"), 97 "gpu_count_no_sudo": ( 98 r"lspci | grep 'VGA compatible\|3D controller' | wc -l"), 99 "cuda_ver_all": ( 100 "ls -d /usr/local/cuda* 2> /dev/null"), 101 "cuda_ver_dflt": ( 102 ["nvcc --version 2> /dev/null", 103 "cat /usr/local/cuda/version.txt 2> /dev/null | awk '{print $NF}'"]), 104 "cudnn_ver": ( 105 ["whereis cudnn.h", 106 "cat `awk '{print $2}'` | grep CUDNN_MAJOR -A 2 | echo " 107 "`awk '{print $NF}'` | awk '{print $1, $2, $3}' | sed 's/ /./g'"]), 108 "gcc_ver": ( 109 "gcc --version | awk '{print $NF}' | head -n 1"), 110 "glibc_ver": ( 111 "ldd --version | tail -n+1 | head -n 1 | awk '{print $NF}'"), 112 "libstdcpp_ver": 113 "strings $(/sbin/ldconfig -p | grep libstdc++ | head -n 1 | " 114 "awk '{print $NF}') | grep LIBCXX | tail -2 | head -n 1", 115 "cpu_isa": ( 116 "cat /proc/cpuinfo | grep flags | head -n 1"), 117} 118 119cmds_all = { 120 "linux": cmds_linux, 121} 122 123# Global variable(s). 124PLATFORM = None 125GPU_TYPE = None 126PATH_TO_DIR = "tensorflow/tools/tensorflow_builder/config_detector" 127 128 129def run_shell_cmd(args): 130 """Executes shell commands and returns output. 131 132 Args: 133 args: String of shell commands to run. 134 135 Returns: 136 Tuple output (stdoutdata, stderrdata) from running the shell commands. 137 """ 138 proc = subprocess.Popen( 139 args, 140 shell=True, 141 stdout=subprocess.PIPE, 142 stderr=subprocess.STDOUT 143 ) 144 return proc.communicate() 145 146 147def get_platform(): 148 """Retrieves platform information. 149 150 Currently the script only support linux. If other platoforms such as Windows 151 or MacOS is detected, it throws an error and terminates. 152 153 Returns: 154 String that is platform type. 155 e.g. 'linux' 156 """ 157 global PLATFORM 158 cmd = "uname" 159 out, err = run_shell_cmd(cmd) 160 platform_detected = out.strip().lower() 161 if platform_detected != "linux": 162 if err and FLAGS.debug: 163 print("Error in detecting platform:\n %s" % str(err)) 164 165 print("Error: Detected unsupported operating system.\nStopping...") 166 sys.exit(1) 167 else: 168 PLATFORM = platform_detected 169 170 return PLATFORM 171 172 173def get_cpu_type(): 174 """Retrieves CPU (type) information. 175 176 Returns: 177 String that is name of the CPU. 178 e.g. 'GenuineIntel' 179 """ 180 key = "cpu_type" 181 out, err = run_shell_cmd(cmds_all[PLATFORM][key]) 182 cpu_detected = out.split(b":")[1].strip() 183 if err and FLAGS.debug: 184 print("Error in detecting CPU type:\n %s" % str(err)) 185 186 return cpu_detected 187 188 189def get_cpu_arch(): 190 """Retrieves processor architecture type (32-bit or 64-bit). 191 192 Returns: 193 String that is CPU architecture. 194 e.g. 'x86_64' 195 """ 196 key = "cpu_arch" 197 out, err = run_shell_cmd(cmds_all[PLATFORM][key]) 198 if err and FLAGS.debug: 199 print("Error in detecting CPU arch:\n %s" % str(err)) 200 201 return out.strip(b"\n") 202 203 204def get_distrib(): 205 """Retrieves distribution name of the operating system. 206 207 Returns: 208 String that is the name of distribution. 209 e.g. 'Ubuntu' 210 """ 211 key = "distrib" 212 out, err = run_shell_cmd(cmds_all[PLATFORM][key]) 213 if err and FLAGS.debug: 214 print("Error in detecting distribution:\n %s" % str(err)) 215 216 return out.strip(b"\n") 217 218 219def get_distrib_version(): 220 """Retrieves distribution version of the operating system. 221 222 Returns: 223 String that is the distribution version. 224 e.g. '14.04' 225 """ 226 key = "distrib_ver" 227 out, err = run_shell_cmd(cmds_all[PLATFORM][key]) 228 if err and FLAGS.debug: 229 print( 230 "Error in detecting distribution version:\n %s" % str(err) 231 ) 232 233 return out.strip(b"\n") 234 235 236def get_gpu_type(): 237 """Retrieves GPU type. 238 239 Returns: 240 String that is the name of the detected NVIDIA GPU. 241 e.g. 'Tesla K80' 242 243 'unknown' will be returned if detected GPU type is an unknown name. 244 Unknown name refers to any GPU name that is not specified in this page: 245 https://developer.nvidia.com/cuda-gpus 246 """ 247 global GPU_TYPE 248 key = "gpu_type_no_sudo" 249 gpu_dict = cuda_compute_capability.retrieve_from_golden() 250 out, err = run_shell_cmd(cmds_all[PLATFORM][key]) 251 ret_val = out.split(b" ") 252 gpu_id = ret_val[0] 253 if err and FLAGS.debug: 254 print("Error in detecting GPU type:\n %s" % str(err)) 255 256 if not isinstance(ret_val, list): 257 GPU_TYPE = "unknown" 258 return gpu_id, GPU_TYPE 259 else: 260 if "[" or "]" in ret_val[1]: 261 gpu_release = ret_val[1].replace(b"[", b"") + b" " 262 gpu_release += ret_val[2].replace(b"]", b"").strip(b"\n") 263 else: 264 gpu_release = ret_val[1].replace("\n", " ") 265 266 if gpu_release not in gpu_dict: 267 GPU_TYPE = "unknown" 268 else: 269 GPU_TYPE = gpu_release 270 271 return gpu_id, GPU_TYPE 272 273 274def get_gpu_count(): 275 """Retrieves total number of GPU's available in the system. 276 277 Returns: 278 Integer that is the total # of GPU's found. 279 """ 280 key = "gpu_count_no_sudo" 281 out, err = run_shell_cmd(cmds_all[PLATFORM][key]) 282 if err and FLAGS.debug: 283 print("Error in detecting GPU count:\n %s" % str(err)) 284 285 return out.strip(b"\n") 286 287 288def get_cuda_version_all(): 289 """Retrieves all additional CUDA versions available (other than default). 290 291 For retrieving default CUDA version, use `get_cuda_version` function. 292 293 stderr is silenced by default. Setting FLAGS.debug mode will not enable it. 294 Remove `2> /dev/null` command from `cmds_linux['cuda_ver_dflt']` to enable 295 stderr. 296 297 Returns: 298 List of all CUDA versions found (except default version). 299 e.g. ['10.1', '10.2'] 300 """ 301 key = "cuda_ver_all" 302 out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key]) 303 ret_val = out.split(b"\n") 304 filtered = [] 305 for item in ret_val: 306 if item not in ["\n", ""]: 307 filtered.append(item) 308 309 all_vers = [] 310 for item in filtered: 311 ver_re = re.search(r".*/cuda(\-[\d]+\.[\d]+)?", item.decode("utf-8")) 312 if ver_re.group(1): 313 all_vers.append(ver_re.group(1).strip("-")) 314 315 if err and FLAGS.debug: 316 print("Error in detecting CUDA version:\n %s" % str(err)) 317 318 return all_vers 319 320 321def get_cuda_version_default(): 322 """Retrieves default CUDA version. 323 324 Default version is the version found in `/usr/local/cuda/` installation. 325 326 stderr is silenced by default. Setting FLAGS.debug mode will not enable it. 327 Remove `2> /dev/null` command from `cmds_linux['cuda_ver_dflt']` to enable 328 stderr. 329 330 It iterates through two types of version retrieval method: 331 1) Using `nvcc`: If `nvcc` is not available, then it uses next method. 332 2) Read version file (`version.txt`) found in CUDA install directory. 333 334 Returns: 335 String that is the default CUDA version. 336 e.g. '10.1' 337 """ 338 key = "cuda_ver_dflt" 339 out = "" 340 cmd_list = cmds_all[PLATFORM.lower()][key] 341 for i, cmd in enumerate(cmd_list): 342 try: 343 out, err = run_shell_cmd(cmd) 344 if not out: 345 raise Exception(err) 346 347 except Exception as e: 348 if FLAGS.debug: 349 print("\nWarning: Encountered issue while retrieving default CUDA " 350 "version. (%s) Trying a different method...\n" % e) 351 352 if i == len(cmd_list) - 1: 353 if FLAGS.debug: 354 print("Error: Cannot retrieve CUDA default version.\nStopping...") 355 356 else: 357 pass 358 359 return out.strip("\n") 360 361 362def get_cuda_compute_capability(source_from_url=False): 363 """Retrieves CUDA compute capability based on the detected GPU type. 364 365 This function uses the `cuda_compute_capability` module to retrieve the 366 corresponding CUDA compute capability for the given GPU type. 367 368 Args: 369 source_from_url: Boolean deciding whether to source compute capability 370 from NVIDIA website or from a local golden file. 371 372 Returns: 373 List of all supported CUDA compute capabilities for the given GPU type. 374 e.g. ['3.5', '3.7'] 375 """ 376 if not GPU_TYPE: 377 if FLAGS.debug: 378 print("Warning: GPU_TYPE is empty. " 379 "Make sure to call `get_gpu_type()` first.") 380 381 elif GPU_TYPE == "unknown": 382 if FLAGS.debug: 383 print("Warning: Unknown GPU is detected. " 384 "Skipping CUDA compute capability retrieval.") 385 386 else: 387 if source_from_url: 388 cuda_compute_capa = cuda_compute_capability.retrieve_from_web() 389 else: 390 cuda_compute_capa = cuda_compute_capability.retrieve_from_golden() 391 392 return cuda_compute_capa[GPU_TYPE] 393 return 394 395 396def get_cudnn_version(): 397 """Retrieves the version of cuDNN library detected. 398 399 Returns: 400 String that is the version of cuDNN library detected. 401 e.g. '7.5.0' 402 """ 403 key = "cudnn_ver" 404 cmds = cmds_all[PLATFORM.lower()][key] 405 out, err = run_shell_cmd(cmds[0]) 406 if err and FLAGS.debug: 407 print("Error in finding `cudnn.h`:\n %s" % str(err)) 408 409 if len(out.split(b" ")) > 1: 410 cmd = cmds[0] + " | " + cmds[1] 411 out_re, err_re = run_shell_cmd(cmd) 412 if err_re and FLAGS.debug: 413 print("Error in detecting cuDNN version:\n %s" % str(err_re)) 414 415 return out_re.strip(b"\n") 416 else: 417 return 418 419 420def get_gcc_version(): 421 """Retrieves version of GCC detected. 422 423 Returns: 424 String that is the version of GCC. 425 e.g. '7.3.0' 426 """ 427 key = "gcc_ver" 428 out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key]) 429 if err and FLAGS.debug: 430 print("Error in detecting GCC version:\n %s" % str(err)) 431 432 return out.strip(b"\n") 433 434 435def get_glibc_version(): 436 """Retrieves version of GLIBC detected. 437 438 Returns: 439 String that is the version of GLIBC. 440 e.g. '2.24' 441 """ 442 key = "glibc_ver" 443 out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key]) 444 if err and FLAGS.debug: 445 print("Error in detecting GCC version:\n %s" % str(err)) 446 447 return out.strip(b"\n") 448 449 450def get_libstdcpp_version(): 451 """Retrieves version of libstdc++ detected. 452 453 Returns: 454 String that is the version of libstdc++. 455 e.g. '3.4.25' 456 """ 457 key = "libstdcpp_ver" 458 out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key]) 459 if err and FLAGS.debug: 460 print("Error in detecting libstdc++ version:\n %s" % str(err)) 461 462 ver = out.split(b"_")[-1].replace(b"\n", b"") 463 return ver 464 465 466def get_cpu_isa_version(): 467 """Retrieves all Instruction Set Architecture(ISA) available. 468 469 Required ISA(s): 'avx', 'avx2', 'avx512f', 'sse4', 'sse4_1' 470 471 Returns: 472 Tuple 473 (list of available ISA, list of missing ISA) 474 """ 475 key = "cpu_isa" 476 out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key]) 477 if err and FLAGS.debug: 478 print("Error in detecting supported ISA:\n %s" % str(err)) 479 480 ret_val = out 481 required_isa = ["avx", "avx2", "avx512f", "sse4", "sse4_1"] 482 found = [] 483 missing = [] 484 for isa in required_isa: 485 for sys_isa in ret_val.split(b" "): 486 if isa == sys_isa: 487 if isa not in found: 488 found.append(isa) 489 490 missing = list(set(required_isa) - set(found)) 491 return found, missing 492 493 494def get_python_version(): 495 """Retrieves default Python version. 496 497 Returns: 498 String that is the version of default Python. 499 e.g. '2.7.4' 500 """ 501 ver = str(sys.version_info) 502 mmm = re.search(r".*major=([\d]), minor=([\d]), micro=([\d]+),.*", ver) 503 return mmm.group(1) + "." + mmm.group(2) + "." + mmm.group(3) 504 505 506def get_all_configs(): 507 """Runs all functions for detecting user machine configurations. 508 509 Returns: 510 Tuple 511 (List of all configurations found, 512 List of all missing configurations, 513 List of all configurations found with warnings, 514 Dict of all configurations) 515 """ 516 all_functions = collections.OrderedDict( 517 [("Platform", get_platform()), 518 ("CPU", get_cpu_type()), 519 ("CPU arch", get_cpu_arch()), 520 ("Distribution", get_distrib()), 521 ("Distribution version", get_distrib_version()), 522 ("GPU", get_gpu_type()[1]), 523 ("GPU count", get_gpu_count()), 524 ("CUDA version (default)", get_cuda_version_default()), 525 ("CUDA versions (all)", get_cuda_version_all()), 526 ("CUDA compute capability", 527 get_cuda_compute_capability(get_gpu_type()[1])), 528 ("cuDNN version", get_cudnn_version()), 529 ("GCC version", get_gcc_version()), 530 ("Python version (default)", get_python_version()), 531 ("GNU C Lib (glibc) version", get_glibc_version()), 532 ("libstdc++ version", get_libstdcpp_version()), 533 ("CPU ISA (min requirement)", get_cpu_isa_version())] 534 ) 535 configs_found = [] 536 json_data = {} 537 missing = [] 538 warning = [] 539 for config, call_func in all_functions.items(): 540 ret_val = call_func 541 if not ret_val: 542 configs_found.append([config, "\033[91m\033[1mMissing\033[0m"]) 543 missing.append([config]) 544 json_data[config] = "" 545 elif ret_val == "unknown": 546 configs_found.append([config, "\033[93m\033[1mUnknown type\033[0m"]) 547 warning.append([config, ret_val]) 548 json_data[config] = "unknown" 549 550 else: 551 if "ISA" in config: 552 if not ret_val[1]: 553 # Not missing any required ISA 554 configs_found.append([config, ret_val[0]]) 555 json_data[config] = ret_val[0] 556 else: 557 configs_found.append([ 558 config, 559 "\033[91m\033[1mMissing " + str(ret_val[1][1:-1]) + "\033[0m" 560 ]) 561 missing.append( 562 [config, 563 "\n\t=> Found %s but missing %s" 564 % (str(ret_val[0]), str(ret_val[1]))] 565 ) 566 json_data[config] = ret_val[0] 567 568 else: 569 configs_found.append([config, ret_val]) 570 json_data[config] = ret_val 571 572 return (configs_found, missing, warning, json_data) 573 574 575def print_all_configs(configs, missing, warning): 576 """Prints the status and info on all configurations in a table format. 577 578 Args: 579 configs: List of all configurations found. 580 missing: List of all configurations that are missing. 581 warning: List of all configurations found with warnings. 582 """ 583 print_text = "" 584 llen = 65 # line length 585 for i, row in enumerate(configs): 586 if i != 0: 587 print_text += "-" * llen + "\n" 588 589 if isinstance(row[1], list): 590 val = ", ".join(row[1]) 591 else: 592 val = row[1] 593 594 print_text += " {: <28}".format(row[0]) + " {: <25}".format(val) + "\n" 595 596 print_text += "="*llen 597 print("\n\n {: ^32} {: ^25}".format("Configuration(s)", 598 "Detected value(s)")) 599 print("="*llen) 600 print(print_text) 601 602 if missing: 603 print("\n * ERROR: The following configurations are missing:") 604 for m in missing: 605 print(" ", *m) 606 607 if warning: 608 print("\n * WARNING: The following configurations could cause issues:") 609 for w in warning: 610 print(" ", *w) 611 612 if not missing and not warning: 613 print("\n * INFO: Successfully found all configurations.") 614 615 print("\n") 616 617 618def save_to_file(json_data, filename): 619 """Saves all detected configuration(s) into a JSON file. 620 621 Args: 622 json_data: Dict of all configurations found. 623 filename: String that is the name of the output JSON file. 624 """ 625 if filename[-5:] != ".json": 626 print("filename: %s" % filename) 627 filename += ".json" 628 629 with open(PATH_TO_DIR + "/" + filename, "w") as f: 630 json.dump(json_data, f, sort_keys=True, indent=4) 631 632 print(" Successfully wrote configs to file `%s`.\n" % (filename)) 633 634 635def manage_all_configs(save_results, filename): 636 """Manages configuration detection and retrieval based on user input. 637 638 Args: 639 save_results: Boolean indicating whether to save the results to a file. 640 filename: String that is the name of the output JSON file. 641 """ 642 # Get all configs 643 all_configs = get_all_configs() 644 # Print all configs based on user input 645 print_all_configs(all_configs[0], all_configs[1], all_configs[2]) 646 # Save all configs to a file based on user request 647 if save_results: 648 save_to_file(all_configs[3], filename) 649 650 651def main(argv): 652 if len(argv) > 3: 653 raise app.UsageError("Too many command-line arguments.") 654 655 manage_all_configs( 656 save_results=FLAGS.save_output, 657 filename=FLAGS.filename, 658 ) 659 660 661if __name__ == "__main__": 662 app.run(main) 663