1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15# ==============================================================================
16"""Auto-detects machine configurations and outputs the results to shell or file.
17
18Supports linux only currently.
19
20Usage:
21  python config_detector.py [--save_output] [--filename] [--debug]
22
23Example command:
24  python config_detector.py --save_output=True --filename=configs.json
25  --debug=False
26
27Flag option(s):
28  save_output  (True | False)       Save output to a file.
29                                    (Default: True)
30  filename     <file_name>.json     Filename(.json) for storing configs.
31                                    (Default: `configs.json`)
32  debug        (True | False)       View debug and stderr messages.
33                                    (Default: False)
34
35The following machine configuration will be detected:
36  Platform              Operating system (linux | macos | windows)
37  CPU                   CPU type (e.g. `GenuineIntel`)
38  CPU architecture      Processor type (32-bit | 64-bit)
39  CPU ISA               CPU instruction set (e.g. `sse4`, `sse4_1`, `avx`)
40  Distribution          Operating system distribution (e.g. Ubuntu)
41  Distribution version  Operating system distribution version (e.g. 14.04)
42  GPU                   GPU type (e.g. `Tesla K80`)
43  GPU count             Number of GPU's available
44  CUDA version          CUDA version by default (e.g. `10.1`)
45  CUDA version all      CUDA version(s) all available
46  cuDNN version         cuDNN version (e.g. `7.5.0`)
47  GCC version           GCC version (e.g. `7.3.0`)
48  GLIBC version         GLIBC version (e.g. `2.24`)
49  libstdc++ version     libstdc++ version (e.g. `3.4.25`)
50
51Output:
52  Shell output (print)
53      A table containing status and info on all configurations will be
54      printed out to shell.
55
56  Configuration file (.json):
57      Depending on `--save_output` option, this script outputs a .json file
58      (in the same directory) containing all user machine configurations
59      that were detected.
60"""
61# pylint: disable=broad-except
62import collections
63import json
64import re
65import subprocess
66import sys
67
68from absl import app
69from absl import flags
70
71from tensorflow.tools.tensorflow_builder.config_detector.data import cuda_compute_capability
72
73FLAGS = flags.FLAGS
74# Define all flags
75flags.DEFINE_boolean("save_output", True, "Save output to a file. [True/False]")
76flags.DEFINE_string("filename", "configs.json", "Output filename.")
77flags.DEFINE_boolean("debug", False, "View debug messages. [True/False]")
78
79# For linux: commands for retrieving user machine configs.
80cmds_linux = {
81    "cpu_type": (
82        "cat /proc/cpuinfo 2>&1 | grep 'vendor' | uniq"),
83    "cpu_arch": (
84        "uname -m"),
85    "distrib": (
86        "cat /etc/*-release | grep DISTRIB_ID* | sed 's/^.*=//'"),
87    "distrib_ver": (
88        "cat /etc/*-release | grep DISTRIB_RELEASE* | sed 's/^.*=//'"),
89    "gpu_type": (
90        "sudo lshw -C display | grep product:* | sed 's/^.*: //'"),
91    "gpu_type_no_sudo":
92        r"lspci | grep 'VGA compatible\|3D controller' | cut -d' ' -f 1 | "
93        r"xargs -i lspci -v -s {} | head -n 2 | tail -1 | "
94        r"awk '{print $(NF-2), $(NF-1), $NF}'",
95    "gpu_count": (
96        "sudo lshw -C display | grep *-display:* | wc -l"),
97    "gpu_count_no_sudo": (
98        r"lspci | grep 'VGA compatible\|3D controller' | wc -l"),
99    "cuda_ver_all": (
100        "ls -d /usr/local/cuda* 2> /dev/null"),
101    "cuda_ver_dflt": (
102        ["nvcc --version 2> /dev/null",
103         "cat /usr/local/cuda/version.txt 2> /dev/null | awk '{print $NF}'"]),
104    "cudnn_ver": (
105        ["whereis cudnn.h",
106         "cat `awk '{print $2}'` | grep CUDNN_MAJOR -A 2 | echo "
107         "`awk '{print $NF}'` | awk '{print $1, $2, $3}' | sed 's/ /./g'"]),
108    "gcc_ver": (
109        "gcc --version | awk '{print $NF}' | head -n 1"),
110    "glibc_ver": (
111        "ldd --version | tail -n+1 | head -n 1 | awk '{print $NF}'"),
112    "libstdcpp_ver":
113        "strings $(/sbin/ldconfig -p | grep libstdc++ | head -n 1 | "
114        "awk '{print $NF}') | grep LIBCXX | tail -2 | head -n 1",
115    "cpu_isa": (
116        "cat /proc/cpuinfo | grep flags | head -n 1"),
117}
118
119cmds_all = {
120    "linux": cmds_linux,
121}
122
123# Global variable(s).
124PLATFORM = None
125GPU_TYPE = None
126PATH_TO_DIR = "tensorflow/tools/tensorflow_builder/config_detector"
127
128
129def run_shell_cmd(args):
130  """Executes shell commands and returns output.
131
132  Args:
133    args: String of shell commands to run.
134
135  Returns:
136    Tuple output (stdoutdata, stderrdata) from running the shell commands.
137  """
138  proc = subprocess.Popen(
139      args,
140      shell=True,
141      stdout=subprocess.PIPE,
142      stderr=subprocess.STDOUT
143  )
144  return proc.communicate()
145
146
147def get_platform():
148  """Retrieves platform information.
149
150  Currently the script only support linux. If other platoforms such as Windows
151  or MacOS is detected, it throws an error and terminates.
152
153  Returns:
154    String that is platform type.
155      e.g. 'linux'
156  """
157  global PLATFORM
158  cmd = "uname"
159  out, err = run_shell_cmd(cmd)
160  platform_detected = out.strip().lower()
161  if platform_detected != "linux":
162    if err and FLAGS.debug:
163      print("Error in detecting platform:\n %s" % str(err))
164
165    print("Error: Detected unsupported operating system.\nStopping...")
166    sys.exit(1)
167  else:
168    PLATFORM = platform_detected
169
170  return PLATFORM
171
172
173def get_cpu_type():
174  """Retrieves CPU (type) information.
175
176  Returns:
177    String that is name of the CPU.
178      e.g. 'GenuineIntel'
179  """
180  key = "cpu_type"
181  out, err = run_shell_cmd(cmds_all[PLATFORM][key])
182  cpu_detected = out.split(b":")[1].strip()
183  if err and FLAGS.debug:
184    print("Error in detecting CPU type:\n %s" % str(err))
185
186  return cpu_detected
187
188
189def get_cpu_arch():
190  """Retrieves processor architecture type (32-bit or 64-bit).
191
192  Returns:
193    String that is CPU architecture.
194      e.g. 'x86_64'
195  """
196  key = "cpu_arch"
197  out, err = run_shell_cmd(cmds_all[PLATFORM][key])
198  if err and FLAGS.debug:
199    print("Error in detecting CPU arch:\n %s" % str(err))
200
201  return out.strip(b"\n")
202
203
204def get_distrib():
205  """Retrieves distribution name of the operating system.
206
207  Returns:
208    String that is the name of distribution.
209      e.g. 'Ubuntu'
210  """
211  key = "distrib"
212  out, err = run_shell_cmd(cmds_all[PLATFORM][key])
213  if err and FLAGS.debug:
214    print("Error in detecting distribution:\n %s" % str(err))
215
216  return out.strip(b"\n")
217
218
219def get_distrib_version():
220  """Retrieves distribution version of the operating system.
221
222  Returns:
223    String that is the distribution version.
224      e.g. '14.04'
225  """
226  key = "distrib_ver"
227  out, err = run_shell_cmd(cmds_all[PLATFORM][key])
228  if err and FLAGS.debug:
229    print(
230        "Error in detecting distribution version:\n %s" % str(err)
231    )
232
233  return out.strip(b"\n")
234
235
236def get_gpu_type():
237  """Retrieves GPU type.
238
239  Returns:
240    String that is the name of the detected NVIDIA GPU.
241      e.g. 'Tesla K80'
242
243    'unknown' will be returned if detected GPU type is an unknown name.
244      Unknown name refers to any GPU name that is not specified in this page:
245      https://developer.nvidia.com/cuda-gpus
246  """
247  global GPU_TYPE
248  key = "gpu_type_no_sudo"
249  gpu_dict = cuda_compute_capability.retrieve_from_golden()
250  out, err = run_shell_cmd(cmds_all[PLATFORM][key])
251  ret_val = out.split(b" ")
252  gpu_id = ret_val[0]
253  if err and FLAGS.debug:
254    print("Error in detecting GPU type:\n %s" % str(err))
255
256  if not isinstance(ret_val, list):
257    GPU_TYPE = "unknown"
258    return gpu_id, GPU_TYPE
259  else:
260    if "[" or "]" in ret_val[1]:
261      gpu_release = ret_val[1].replace(b"[", b"") + b" "
262      gpu_release += ret_val[2].replace(b"]", b"").strip(b"\n")
263    else:
264      gpu_release = ret_val[1].replace("\n", " ")
265
266    if gpu_release not in gpu_dict:
267      GPU_TYPE = "unknown"
268    else:
269      GPU_TYPE = gpu_release
270
271    return gpu_id, GPU_TYPE
272
273
274def get_gpu_count():
275  """Retrieves total number of GPU's available in the system.
276
277  Returns:
278    Integer that is the total # of GPU's found.
279  """
280  key = "gpu_count_no_sudo"
281  out, err = run_shell_cmd(cmds_all[PLATFORM][key])
282  if err and FLAGS.debug:
283    print("Error in detecting GPU count:\n %s" % str(err))
284
285  return out.strip(b"\n")
286
287
288def get_cuda_version_all():
289  """Retrieves all additional CUDA versions available (other than default).
290
291  For retrieving default CUDA version, use `get_cuda_version` function.
292
293  stderr is silenced by default. Setting FLAGS.debug mode will not enable it.
294  Remove `2> /dev/null` command from `cmds_linux['cuda_ver_dflt']` to enable
295  stderr.
296
297  Returns:
298    List of all CUDA versions found (except default version).
299      e.g. ['10.1', '10.2']
300  """
301  key = "cuda_ver_all"
302  out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key])
303  ret_val = out.split(b"\n")
304  filtered = []
305  for item in ret_val:
306    if item not in ["\n", ""]:
307      filtered.append(item)
308
309  all_vers = []
310  for item in filtered:
311    ver_re = re.search(r".*/cuda(\-[\d]+\.[\d]+)?", item.decode("utf-8"))
312    if ver_re.group(1):
313      all_vers.append(ver_re.group(1).strip("-"))
314
315  if err and FLAGS.debug:
316    print("Error in detecting CUDA version:\n %s" % str(err))
317
318  return all_vers
319
320
321def get_cuda_version_default():
322  """Retrieves default CUDA version.
323
324  Default version is the version found in `/usr/local/cuda/` installation.
325
326  stderr is silenced by default. Setting FLAGS.debug mode will not enable it.
327  Remove `2> /dev/null` command from `cmds_linux['cuda_ver_dflt']` to enable
328  stderr.
329
330  It iterates through two types of version retrieval method:
331    1) Using `nvcc`: If `nvcc` is not available, then it uses next method.
332    2) Read version file (`version.txt`) found in CUDA install directory.
333
334  Returns:
335    String that is the default CUDA version.
336      e.g. '10.1'
337  """
338  key = "cuda_ver_dflt"
339  out = ""
340  cmd_list = cmds_all[PLATFORM.lower()][key]
341  for i, cmd in enumerate(cmd_list):
342    try:
343      out, err = run_shell_cmd(cmd)
344      if not out:
345        raise Exception(err)
346
347    except Exception as e:
348      if FLAGS.debug:
349        print("\nWarning: Encountered issue while retrieving default CUDA "
350              "version. (%s) Trying a different method...\n" % e)
351
352      if i == len(cmd_list) - 1:
353        if FLAGS.debug:
354          print("Error: Cannot retrieve CUDA default version.\nStopping...")
355
356      else:
357        pass
358
359  return out.strip("\n")
360
361
362def get_cuda_compute_capability(source_from_url=False):
363  """Retrieves CUDA compute capability based on the detected GPU type.
364
365  This function uses the `cuda_compute_capability` module to retrieve the
366  corresponding CUDA compute capability for the given GPU type.
367
368  Args:
369    source_from_url: Boolean deciding whether to source compute capability
370                     from NVIDIA website or from a local golden file.
371
372  Returns:
373    List of all supported CUDA compute capabilities for the given GPU type.
374      e.g. ['3.5', '3.7']
375  """
376  if not GPU_TYPE:
377    if FLAGS.debug:
378      print("Warning: GPU_TYPE is empty. "
379            "Make sure to call `get_gpu_type()` first.")
380
381  elif GPU_TYPE == "unknown":
382    if FLAGS.debug:
383      print("Warning: Unknown GPU is detected. "
384            "Skipping CUDA compute capability retrieval.")
385
386  else:
387    if source_from_url:
388      cuda_compute_capa = cuda_compute_capability.retrieve_from_web()
389    else:
390      cuda_compute_capa = cuda_compute_capability.retrieve_from_golden()
391
392    return cuda_compute_capa[GPU_TYPE]
393  return
394
395
396def get_cudnn_version():
397  """Retrieves the version of cuDNN library detected.
398
399  Returns:
400    String that is the version of cuDNN library detected.
401      e.g. '7.5.0'
402  """
403  key = "cudnn_ver"
404  cmds = cmds_all[PLATFORM.lower()][key]
405  out, err = run_shell_cmd(cmds[0])
406  if err and FLAGS.debug:
407    print("Error in finding `cudnn.h`:\n %s" % str(err))
408
409  if len(out.split(b" ")) > 1:
410    cmd = cmds[0] + " | " + cmds[1]
411    out_re, err_re = run_shell_cmd(cmd)
412    if err_re and FLAGS.debug:
413      print("Error in detecting cuDNN version:\n %s" % str(err_re))
414
415    return out_re.strip(b"\n")
416  else:
417    return
418
419
420def get_gcc_version():
421  """Retrieves version of GCC detected.
422
423  Returns:
424    String that is the version of GCC.
425      e.g. '7.3.0'
426  """
427  key = "gcc_ver"
428  out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key])
429  if err and FLAGS.debug:
430    print("Error in detecting GCC version:\n %s" % str(err))
431
432  return out.strip(b"\n")
433
434
435def get_glibc_version():
436  """Retrieves version of GLIBC detected.
437
438  Returns:
439    String that is the version of GLIBC.
440      e.g. '2.24'
441  """
442  key = "glibc_ver"
443  out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key])
444  if err and FLAGS.debug:
445    print("Error in detecting GCC version:\n %s" % str(err))
446
447  return out.strip(b"\n")
448
449
450def get_libstdcpp_version():
451  """Retrieves version of libstdc++ detected.
452
453  Returns:
454    String that is the version of libstdc++.
455      e.g. '3.4.25'
456  """
457  key = "libstdcpp_ver"
458  out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key])
459  if err and FLAGS.debug:
460    print("Error in detecting libstdc++ version:\n %s" % str(err))
461
462  ver = out.split(b"_")[-1].replace(b"\n", b"")
463  return ver
464
465
466def get_cpu_isa_version():
467  """Retrieves all Instruction Set Architecture(ISA) available.
468
469  Required ISA(s): 'avx', 'avx2', 'avx512f', 'sse4', 'sse4_1'
470
471  Returns:
472    Tuple
473      (list of available ISA, list of missing ISA)
474  """
475  key = "cpu_isa"
476  out, err = run_shell_cmd(cmds_all[PLATFORM.lower()][key])
477  if err and FLAGS.debug:
478    print("Error in detecting supported ISA:\n %s" % str(err))
479
480  ret_val = out
481  required_isa = ["avx", "avx2", "avx512f", "sse4", "sse4_1"]
482  found = []
483  missing = []
484  for isa in required_isa:
485    for sys_isa in ret_val.split(b" "):
486      if isa == sys_isa:
487        if isa not in found:
488          found.append(isa)
489
490  missing = list(set(required_isa) - set(found))
491  return found, missing
492
493
494def get_python_version():
495  """Retrieves default Python version.
496
497  Returns:
498    String that is the version of default Python.
499      e.g. '2.7.4'
500  """
501  ver = str(sys.version_info)
502  mmm = re.search(r".*major=([\d]), minor=([\d]), micro=([\d]+),.*", ver)
503  return mmm.group(1) + "." + mmm.group(2) + "." + mmm.group(3)
504
505
506def get_all_configs():
507  """Runs all functions for detecting user machine configurations.
508
509  Returns:
510    Tuple
511      (List of all configurations found,
512       List of all missing configurations,
513       List of all configurations found with warnings,
514       Dict of all configurations)
515  """
516  all_functions = collections.OrderedDict(
517      [("Platform", get_platform()),
518       ("CPU", get_cpu_type()),
519       ("CPU arch", get_cpu_arch()),
520       ("Distribution", get_distrib()),
521       ("Distribution version", get_distrib_version()),
522       ("GPU", get_gpu_type()[1]),
523       ("GPU count", get_gpu_count()),
524       ("CUDA version (default)", get_cuda_version_default()),
525       ("CUDA versions (all)", get_cuda_version_all()),
526       ("CUDA compute capability",
527        get_cuda_compute_capability(get_gpu_type()[1])),
528       ("cuDNN version", get_cudnn_version()),
529       ("GCC version", get_gcc_version()),
530       ("Python version (default)", get_python_version()),
531       ("GNU C Lib (glibc) version", get_glibc_version()),
532       ("libstdc++ version", get_libstdcpp_version()),
533       ("CPU ISA (min requirement)", get_cpu_isa_version())]
534  )
535  configs_found = []
536  json_data = {}
537  missing = []
538  warning = []
539  for config, call_func in all_functions.items():
540    ret_val = call_func
541    if not ret_val:
542      configs_found.append([config, "\033[91m\033[1mMissing\033[0m"])
543      missing.append([config])
544      json_data[config] = ""
545    elif ret_val == "unknown":
546      configs_found.append([config, "\033[93m\033[1mUnknown type\033[0m"])
547      warning.append([config, ret_val])
548      json_data[config] = "unknown"
549
550    else:
551      if "ISA" in config:
552        if not ret_val[1]:
553          # Not missing any required ISA
554          configs_found.append([config, ret_val[0]])
555          json_data[config] = ret_val[0]
556        else:
557          configs_found.append([
558              config,
559              "\033[91m\033[1mMissing " + str(ret_val[1][1:-1]) + "\033[0m"
560          ])
561          missing.append(
562              [config,
563               "\n\t=> Found %s but missing %s"
564               % (str(ret_val[0]), str(ret_val[1]))]
565          )
566          json_data[config] = ret_val[0]
567
568      else:
569        configs_found.append([config, ret_val])
570        json_data[config] = ret_val
571
572  return (configs_found, missing, warning, json_data)
573
574
575def print_all_configs(configs, missing, warning):
576  """Prints the status and info on all configurations in a table format.
577
578  Args:
579    configs: List of all configurations found.
580    missing: List of all configurations that are missing.
581    warning: List of all configurations found with warnings.
582  """
583  print_text = ""
584  llen = 65  # line length
585  for i, row in enumerate(configs):
586    if i != 0:
587      print_text += "-" * llen + "\n"
588
589    if isinstance(row[1], list):
590      val = ", ".join(row[1])
591    else:
592      val = row[1]
593
594    print_text += " {: <28}".format(row[0]) + "    {: <25}".format(val) + "\n"
595
596  print_text += "="*llen
597  print("\n\n {: ^32}    {: ^25}".format("Configuration(s)",
598                                         "Detected value(s)"))
599  print("="*llen)
600  print(print_text)
601
602  if missing:
603    print("\n * ERROR: The following configurations are missing:")
604    for m in missing:
605      print("   ", *m)
606
607  if warning:
608    print("\n * WARNING: The following configurations could cause issues:")
609    for w in warning:
610      print("   ", *w)
611
612  if not missing and not warning:
613    print("\n * INFO: Successfully found all configurations.")
614
615  print("\n")
616
617
618def save_to_file(json_data, filename):
619  """Saves all detected configuration(s) into a JSON file.
620
621  Args:
622    json_data: Dict of all configurations found.
623    filename: String that is the name of the output JSON file.
624  """
625  if filename[-5:] != ".json":
626    print("filename: %s" % filename)
627    filename += ".json"
628
629  with open(PATH_TO_DIR + "/" + filename, "w") as f:
630    json.dump(json_data, f, sort_keys=True, indent=4)
631
632  print(" Successfully wrote configs to file `%s`.\n" % (filename))
633
634
635def manage_all_configs(save_results, filename):
636  """Manages configuration detection and retrieval based on user input.
637
638  Args:
639    save_results: Boolean indicating whether to save the results to a file.
640    filename: String that is the name of the output JSON file.
641  """
642  # Get all configs
643  all_configs = get_all_configs()
644  # Print all configs based on user input
645  print_all_configs(all_configs[0], all_configs[1], all_configs[2])
646  # Save all configs to a file based on user request
647  if save_results:
648    save_to_file(all_configs[3], filename)
649
650
651def main(argv):
652  if len(argv) > 3:
653    raise app.UsageError("Too many command-line arguments.")
654
655  manage_all_configs(
656      save_results=FLAGS.save_output,
657      filename=FLAGS.filename,
658  )
659
660
661if __name__ == "__main__":
662  app.run(main)
663