1# mypy: allow-untyped-defs 2import copy 3import warnings 4from collections import namedtuple 5from typing import Any, Optional, Type, Union 6from typing_extensions import deprecated 7 8import torch 9import torch.nn as nn 10from torch.ao.quantization.fake_quantize import ( 11 default_dynamic_fake_quant, 12 default_embedding_fake_quant, 13 default_embedding_fake_quant_4bit, 14 default_fake_quant, 15 default_fused_act_fake_quant, 16 default_fused_per_channel_wt_fake_quant, 17 default_fused_wt_fake_quant, 18 default_per_channel_weight_fake_quant, 19 default_weight_fake_quant, 20 FakeQuantize, 21 FakeQuantizeBase, 22 fused_per_channel_wt_fake_quant_range_neg_127_to_127, 23 fused_wt_fake_quant_range_neg_127_to_127, 24 FusedMovingAvgObsFakeQuantize, 25) 26 27from .observer import ( 28 _PartialWrapper, 29 default_debug_observer, 30 default_dynamic_quant_observer, 31 default_float_qparams_observer, 32 default_float_qparams_observer_4bit, 33 default_observer, 34 default_per_channel_weight_observer, 35 default_placeholder_observer, 36 default_reuse_input_observer, 37 default_weight_observer, 38 HistogramObserver, 39 MinMaxObserver, 40 MovingAverageMinMaxObserver, 41 NoopObserver, 42 ObserverBase, 43 per_channel_weight_observer_range_neg_127_to_127, 44 PlaceholderObserver, 45 ReuseInputObserver, 46 weight_observer_range_neg_127_to_127, 47) 48 49 50__all__ = [ 51 "QConfig", 52 # TODO: deprecated, remove 53 "QConfigDynamic", 54 "default_qconfig", 55 "default_debug_qconfig", 56 "default_per_channel_qconfig", 57 "default_dynamic_qconfig", 58 "float16_dynamic_qconfig", 59 "float16_static_qconfig", 60 "per_channel_dynamic_qconfig", 61 "float_qparams_weight_only_qconfig", 62 "float_qparams_weight_only_qconfig_4bit", 63 "default_quint8_weight_qconfig", 64 "default_qat_qconfig", 65 "default_dynamic_qat_qconfig", 66 "default_weight_only_qconfig", 67 "default_activation_only_qconfig", 68 "default_qat_qconfig_v2", 69 "default_reuse_input_qconfig", 70 "default_symmetric_qnnpack_qconfig", 71 "default_per_channel_symmetric_qnnpack_qconfig", 72 "default_symmetric_qnnpack_qat_qconfig", 73 "default_per_channel_symmetric_qnnpack_qat_qconfig", 74 "default_embedding_qat_qconfig", 75 "default_embedding_qat_qconfig_4bit", 76 "get_default_qconfig", 77 "get_default_qat_qconfig", 78 "get_default_qconfig_dict", 79 "get_default_qat_qconfig_dict", 80 "QConfigAny", 81 "qconfig_equals", 82] 83 84 85class QConfig(namedtuple("QConfig", ["activation", "weight"])): 86 """ 87 Describes how to quantize a layer or a part of the network by providing 88 settings (observer classes) for activations and weights respectively. 89 90 91 Note that QConfig needs to contain observer **classes** (like MinMaxObserver) or a callable that returns 92 instances on invocation, not the concrete observer instances themselves. 93 Quantization preparation function will instantiate observers multiple times for each of the layers. 94 95 96 Observer classes have usually reasonable default arguments, but they can be overwritten with `with_args` 97 method (that behaves like functools.partial):: 98 99 my_qconfig = QConfig( 100 activation=MinMaxObserver.with_args(dtype=torch.qint8), 101 weight=default_observer.with_args(dtype=torch.qint8)) 102 103 """ 104 105 def __new__(cls, activation, weight): 106 # catch common mistakes 107 if isinstance(activation, nn.Module) or isinstance(weight, nn.Module): 108 raise ValueError( 109 "QConfig received observer instance, please pass observer class instead. " 110 + "Use MyObserver.with_args(x=1) to override arguments to constructor if needed" 111 ) 112 return super().__new__(cls, activation, weight) 113 114 115@deprecated( 116 "`QConfigDynamic` is going to be deprecated in PyTorch 1.12, please use `QConfig` instead", 117 category=FutureWarning, 118) 119class QConfigDynamic(namedtuple("QConfigDynamic", ["activation", "weight"])): 120 """ 121 Describes how to dynamically quantize a layer or a part of the network by providing 122 settings (observer classes) for weights. 123 124 It's like QConfig, but for dynamic quantization. 125 126 Note that QConfigDynamic needs to contain observer **classes** (like MinMaxObserver) or a callable that returns 127 instances on invocation, not the concrete observer instances themselves. 128 Quantization function will instantiate observers multiple times for each of the layers. 129 130 Observer classes have usually reasonable default arguments, but they can be overwritten with `with_args` 131 method (that behaves like functools.partial):: 132 133 my_qconfig = QConfigDynamic(weight=default_observer.with_args(dtype=torch.qint8)) 134 """ 135 136 def __new__(cls, activation=torch.nn.Identity, weight=torch.nn.Identity): 137 # catch common mistakes 138 if isinstance(weight, nn.Module): 139 raise ValueError( 140 "QConfigDynamic received observer instance, please pass observer class instead. " 141 + "Use MyObserver.with_args(x=1) to override arguments to constructor if needed" 142 ) 143 return super().__new__(cls, activation, weight) 144 145 146default_qconfig = QConfig(activation=default_observer, weight=default_weight_observer) 147""" 148Default qconfig configuration. 149""" 150 151default_debug_qconfig = QConfig( 152 weight=default_weight_observer, activation=default_debug_observer 153) 154""" 155Default qconfig configuration for debugging. 156""" 157 158default_per_channel_qconfig = QConfig( 159 activation=default_observer, weight=default_per_channel_weight_observer 160) 161""" 162Default qconfig configuration for per channel weight quantization. 163""" 164 165default_dynamic_qconfig = QConfig( 166 activation=default_dynamic_quant_observer, weight=default_weight_observer 167) 168""" 169Default dynamic qconfig. 170""" 171 172float16_dynamic_qconfig = QConfig( 173 activation=PlaceholderObserver.with_args(dtype=torch.float16, is_dynamic=True), 174 weight=PlaceholderObserver.with_args(dtype=torch.float16), 175) 176""" 177Dynamic qconfig with weights quantized to `torch.float16`. 178""" 179 180float16_static_qconfig = QConfig( 181 activation=PlaceholderObserver.with_args(dtype=torch.float16), 182 weight=PlaceholderObserver.with_args(dtype=torch.float16), 183) 184""" 185Dynamic qconfig with both activations and weights quantized to `torch.float16`. 186""" 187 188per_channel_dynamic_qconfig = QConfig( 189 activation=default_dynamic_quant_observer, 190 weight=default_per_channel_weight_observer, 191) 192""" 193Dynamic qconfig with weights quantized per channel. 194""" 195 196float_qparams_weight_only_qconfig = QConfig( 197 activation=default_placeholder_observer, weight=default_float_qparams_observer 198) 199""" 200Dynamic qconfig with weights quantized with a floating point zero_point. 201""" 202 203float_qparams_weight_only_qconfig_4bit = QConfig( 204 activation=default_placeholder_observer, weight=default_float_qparams_observer_4bit 205) 206 207default_qat_qconfig = QConfig( 208 activation=default_fake_quant, weight=default_weight_fake_quant 209) 210""" 211Default qconfig for QAT. 212""" 213 214default_dynamic_qat_qconfig = QConfig( 215 activation=default_dynamic_fake_quant, weight=default_weight_fake_quant 216) 217""" 218Default qconfig for dynamic QAT. 219""" 220 221default_weight_only_qconfig = QConfig( 222 activation=torch.nn.Identity, weight=default_weight_fake_quant 223) 224""" 225Default qconfig for quantizing weights only. 226""" 227 228default_activation_only_qconfig = QConfig( 229 activation=default_fake_quant, weight=torch.nn.Identity 230) 231""" 232Default qconfig for quantizing activations only. 233""" 234 235# QAT config that uses a fused observer + fake quant modules for optimized training performance. 236# to modify the activation/weight observers, the default entries in fake_quantize.py can be modified. 237default_qat_qconfig_v2 = QConfig( 238 activation=default_fused_act_fake_quant, weight=default_fused_wt_fake_quant 239) 240""" 241Fused version of `default_qat_config`, has performance benefits. 242""" 243 244default_reuse_input_qconfig = QConfig( 245 activation=default_reuse_input_observer, weight=NoopObserver 246) 247""" 248Default qconfig for operators that reuse the observers from input Tensor, e.g. reshape 249""" 250 251 252def get_default_qconfig(backend="x86", version=0): 253 """ 254 Returns the default PTQ qconfig for the specified backend. 255 256 Args: 257 * `backend` (str): a string representing the target backend. Currently supports 258 `x86` (default), `fbgemm`, `qnnpack` and `onednn`. 259 260 Return: 261 qconfig 262 """ 263 supported_backends = ["fbgemm", "x86", "qnnpack", "onednn"] 264 if backend not in supported_backends: 265 raise AssertionError( 266 "backend: " 267 + str(backend) 268 + f" not supported. backend must be one of {supported_backends}" 269 ) 270 271 if version == 0: 272 if backend == "fbgemm": 273 qconfig = QConfig( 274 activation=HistogramObserver.with_args(reduce_range=True), 275 weight=default_per_channel_weight_observer, 276 ) 277 elif backend == "qnnpack": 278 # TODO: make this compatible with xnnpack constraints 279 qconfig = QConfig( 280 activation=HistogramObserver.with_args(reduce_range=False), 281 weight=default_weight_observer, 282 ) 283 elif backend == "onednn": 284 if not torch.cpu._is_vnni_supported(): 285 warnings.warn( 286 "Default qconfig of oneDNN backend with reduce_range of false may have accuracy issues " 287 "on CPU without Vector Neural Network Instruction support." 288 ) 289 qconfig = QConfig( 290 activation=HistogramObserver.with_args(reduce_range=False), 291 weight=default_per_channel_weight_observer, 292 ) 293 elif backend == "x86": 294 qconfig = QConfig( 295 activation=HistogramObserver.with_args(reduce_range=True), 296 weight=default_per_channel_weight_observer, 297 ) 298 else: 299 # won't reach 300 qconfig = default_qconfig 301 else: 302 raise AssertionError( 303 "Version number: " 304 + str(version) 305 + " in get_default_qconfig is not supported. Version number must be 0" 306 ) 307 308 return qconfig 309 310 311""" 312Default, symmetric PTQ qconfig for the specified backend. And a per_channel 313variant of the same. 314 315Symmetric here applies to signed weights with zero point = 0, and additional 316value restrictions. The activations are also signed 8-bit integers with this 317qconfig. 318 319 * Once this change is merged [as of 3/17/22], with backend or qengine = 320 'qnnpack', some quantized operators with this symmetric qconfig may use 321 operators from xnnpack library. 322 323 ** Support to use xnnpack ops with `qnnpack` backed for asymmetric 324 qconfig (returned by get_default_qconfig()) is not available yet. 325 326 * This qconfig uses signed activations and weights. Weights have added 327 restrictions such as zero point is forced to be 0, making the weights 328 symmetric, hence the name. And the 8-bit quantized values are 329 restricting to to [-127, +127], excluding -128. 330 331 * xnnpack has a requantization scale value restriction, 0x1p-32 <= 332 requantization_scale < 256.0 where, `requantization_scale = (input_scale 333 * kernel_scale) / (output_scale)`. Using this eps (w/ assumed max value 334 of 256) is to prevent requantization_scale to go below xnnpack lower 335 threshold. 336""" 337default_symmetric_qnnpack_qconfig = QConfig( 338 activation=HistogramObserver.with_args( 339 dtype=torch.qint8, reduce_range=False, eps=2**-12 340 ), 341 weight=weight_observer_range_neg_127_to_127, 342) 343 344default_per_channel_symmetric_qnnpack_qconfig = QConfig( 345 activation=HistogramObserver.with_args( 346 dtype=torch.qint8, reduce_range=False, eps=2**-12 347 ), 348 weight=per_channel_weight_observer_range_neg_127_to_127, 349) 350 351default_embedding_qat_qconfig = QConfig( 352 activation=NoopObserver.with_args(dtype=torch.float32), 353 weight=default_embedding_fake_quant, 354) 355 356default_embedding_qat_qconfig_4bit = QConfig( 357 activation=NoopObserver.with_args(dtype=torch.float32), 358 weight=default_embedding_fake_quant_4bit, 359) 360 361default_quint8_weight_qconfig = QConfig( 362 activation=HistogramObserver, weight=MinMaxObserver 363) 364 365 366def get_default_qat_qconfig(backend="x86", version=1): 367 """ 368 Returns the default QAT qconfig for the specified backend. 369 370 Args: 371 * `backend` (str): a string representing the target backend. Currently supports 372 `x86` (default), `fbgemm`, `qnnpack` and `onednn`. 373 * `version`: version, for backwards compatibility. Can be `None` or `1`. 374 375 Return: 376 qconfig 377 """ 378 supported_backends = ["fbgemm", "x86", "qnnpack", "onednn"] 379 if backend not in supported_backends: 380 raise AssertionError( 381 "backend: " 382 + str(backend) 383 + f" not supported. backend must be one of {supported_backends}" 384 ) 385 386 # Histogram observer is too slow for quantization aware training 387 if version == 0: 388 if backend == "fbgemm": 389 qconfig = QConfig( 390 activation=FakeQuantize.with_args( 391 observer=MovingAverageMinMaxObserver, 392 quant_min=0, 393 quant_max=255, 394 reduce_range=True, 395 ), 396 weight=default_per_channel_weight_fake_quant, 397 ) 398 elif backend == "qnnpack": 399 qconfig = QConfig( 400 activation=FakeQuantize.with_args( 401 observer=MovingAverageMinMaxObserver, 402 quant_min=0, 403 quant_max=255, 404 reduce_range=False, 405 ), 406 weight=default_weight_fake_quant, 407 ) 408 elif backend == "onednn": 409 qconfig = QConfig( 410 activation=FakeQuantize.with_args( 411 observer=MovingAverageMinMaxObserver, quant_min=0, quant_max=255 412 ), 413 weight=default_per_channel_weight_fake_quant, 414 ) 415 elif backend == "x86": 416 qconfig = QConfig( 417 activation=FakeQuantize.with_args( 418 observer=MovingAverageMinMaxObserver, 419 quant_min=0, 420 quant_max=255, 421 reduce_range=True, 422 ), 423 weight=default_per_channel_weight_fake_quant, 424 ) 425 else: 426 qconfig = default_qat_qconfig 427 # Use the fused observe + fake_quant modules for doing QAT. 428 elif version == 1: 429 if backend == "fbgemm": 430 qconfig = QConfig( 431 activation=FusedMovingAvgObsFakeQuantize.with_args( 432 observer=MovingAverageMinMaxObserver, 433 quant_min=0, 434 quant_max=255, 435 reduce_range=True, 436 ), 437 weight=default_fused_per_channel_wt_fake_quant, 438 ) 439 elif backend == "qnnpack": 440 # TODO: make this compatible with xnnpack constraints 441 qconfig = QConfig( 442 activation=FusedMovingAvgObsFakeQuantize.with_args( 443 observer=MovingAverageMinMaxObserver, 444 quant_min=0, 445 quant_max=255, 446 reduce_range=False, 447 ), 448 weight=default_fused_wt_fake_quant, 449 ) 450 elif backend == "onednn": 451 qconfig = QConfig( 452 activation=FusedMovingAvgObsFakeQuantize.with_args( 453 observer=MovingAverageMinMaxObserver, quant_min=0, quant_max=255 454 ), 455 weight=default_fused_per_channel_wt_fake_quant, 456 ) 457 elif backend == "x86": 458 qconfig = QConfig( 459 activation=FusedMovingAvgObsFakeQuantize.with_args( 460 observer=MovingAverageMinMaxObserver, 461 quant_min=0, 462 quant_max=255, 463 reduce_range=True, 464 ), 465 weight=default_fused_per_channel_wt_fake_quant, 466 ) 467 else: 468 qconfig = default_qat_qconfig_v2 469 else: 470 raise AssertionError( 471 "Version number: " 472 + str(version) 473 + "in get_default_qat_qconfig is not supported. Version number must be 0 or 1" 474 ) 475 476 return qconfig 477 478 479""" 480Default symmetric QAT qconfig for qnnpack. And its per channel weight variant. 481""" 482default_symmetric_qnnpack_qat_qconfig = QConfig( 483 activation=FusedMovingAvgObsFakeQuantize.with_args( 484 observer=MovingAverageMinMaxObserver, 485 quant_min=-128, 486 quant_max=127, 487 dtype=torch.qint8, 488 reduce_range=False, 489 eps=2**-12, 490 ), 491 weight=fused_wt_fake_quant_range_neg_127_to_127, 492) 493 494default_per_channel_symmetric_qnnpack_qat_qconfig = QConfig( 495 activation=FusedMovingAvgObsFakeQuantize.with_args( 496 observer=MovingAverageMinMaxObserver, 497 quant_min=-128, 498 quant_max=127, 499 dtype=torch.qint8, 500 reduce_range=False, 501 eps=2**-12, 502 ), 503 weight=fused_per_channel_wt_fake_quant_range_neg_127_to_127, 504) 505 506_default_fp32_placeholder_qconfig = QConfig( 507 activation=PlaceholderObserver.with_args(dtype=torch.float32), 508 weight=PlaceholderObserver.with_args(dtype=torch.float32), 509) 510 511_default_quint8_placeholder_qconfig = QConfig( 512 activation=PlaceholderObserver.with_args(dtype=torch.quint8), 513 # operators using this qconfig doesn't have weights 514 weight=None, 515) 516 517 518@deprecated( 519 "`torch.ao.quantization.get_default_qconfig_dict` is deprecated and will be removed in " 520 "a future version. Please use `torch.ao.quantization.get_default_qconfig_mapping` instead.", 521 category=FutureWarning, 522) 523def get_default_qconfig_dict(backend="x86", version=0): 524 return torch.ao.quantization.get_default_qconfig_mapping(backend, version).to_dict() 525 526 527@deprecated( 528 "`torch.ao.quantization.get_default_qat_qconfig_dict` is deprecated and will be removed in " 529 "a future version. Please use `torch.ao.quantization.get_default_qat_qconfig_mapping` instead.", 530 category=FutureWarning, 531) 532def get_default_qat_qconfig_dict(backend="x86", version=1): 533 return torch.ao.quantization.get_default_qat_qconfig_mapping( 534 backend, version 535 ).to_dict() 536 537 538def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> None: 539 """ 540 Verifies that this `qconfig` is valid. 541 """ 542 if qconfig is None: 543 return 544 is_conv_transpose_mod = isinstance( 545 mod, 546 (torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d, torch.nn.ConvTranspose3d), 547 ) 548 if is_conv_transpose_mod: 549 if qconfig.weight is None: 550 # for now, we assume that any qconfig for ConvTranspose without a weight is valid 551 return 552 example_observer = qconfig.weight() 553 is_per_channel = isinstance( 554 example_observer, 555 ( 556 torch.ao.quantization.PerChannelMinMaxObserver, 557 torch.ao.quantization.MovingAveragePerChannelMinMaxObserver, 558 ), 559 ) 560 assert ( 561 not is_per_channel 562 ), "Per channel weight observer is not supported yet for ConvTranspose{n}d." 563 564 565QConfigAny = Optional[QConfig] 566QConfigAny.__module__ = "torch.ao.quantization.qconfig" 567 568 569def _add_module_to_qconfig_obs_ctr( 570 qconfig: QConfigAny, module: Optional[nn.Module] 571) -> Any: 572 r"""This is a helper function for use in quantization prepare that updates a qconfig so that 573 the constructors stored in the qconfig will create observers on the same device that 574 'module' is on. This is intended to be used when the qconfigs are propagated to each 575 module in order to avoid potential device alignment issues. 576 577 Args: 578 qconfig: QConfig with obs constructors stored in activation and weight 579 module: module which the qconfig is related to 580 581 Return: 582 qconfig: configured so that obs constructors set to construct on the same device as module 583 """ 584 585 if module is None or qconfig is None or qconfig._fields != ("activation", "weight"): 586 return qconfig 587 588 def get_factory_kwargs_based_on_module_device(): 589 assert isinstance(module, torch.nn.Module) 590 devices = {p.device for p in module.parameters()} | { 591 p.device for p in module.buffers() 592 } 593 device = next(iter(devices)) if len(devices) > 0 else None 594 return None if device is None else {"device": device} 595 596 def configure_constructor_to_put_obs_on_module_device(original_constructor): 597 try: 598 # check if constructor can accept factory_kwargs 599 check = original_constructor.with_args(factory_kwargs=None) 600 check() 601 return original_constructor.with_callable_args( 602 factory_kwargs=get_factory_kwargs_based_on_module_device 603 ) 604 except AttributeError: # qconfig doesn't have activation or weight 605 return original_constructor 606 except TypeError: # the class doesn't accept factory_kwargs argument 607 return original_constructor 608 609 activation = configure_constructor_to_put_obs_on_module_device(qconfig.activation) 610 weight = configure_constructor_to_put_obs_on_module_device(qconfig.weight) 611 612 return QConfig(activation, weight) 613 614 615_ObserverOrFakeQuantizeConstructor = Union[ 616 _PartialWrapper, Type[ObserverBase], Type[FakeQuantizeBase] 617] 618 619 620def _obs_or_fq_ctr_equals( 621 obs_or_fq1: _ObserverOrFakeQuantizeConstructor, 622 obs_or_fq2: _ObserverOrFakeQuantizeConstructor, 623): 624 if isinstance(obs_or_fq1, _PartialWrapper) and isinstance( 625 obs_or_fq2, _PartialWrapper 626 ): 627 return _partial_wrapper_equals(obs_or_fq1, obs_or_fq2) 628 return obs_or_fq1 == obs_or_fq2 629 630 631def _partial_wrapper_equals(obs_or_fq1: _PartialWrapper, obs_or_fq2: _PartialWrapper): 632 """ 633 Return whether the two partial wrappers are equal, 634 """ 635 # functools.partial has no __eq__ operator defined so '==' defaults to 'is' 636 obs_or_fq1_keywords = copy.copy(obs_or_fq1.p.keywords) 637 obs_or_fq2_keywords = copy.copy(obs_or_fq2.p.keywords) 638 keywords_equal = True 639 # compare observer constructor with _obs_or_fq_ctr_equals since direct compare would fail 640 if "observer" in obs_or_fq1_keywords and "observer" in obs_or_fq2_keywords: 641 keywords_equal = keywords_equal and _obs_or_fq_ctr_equals( 642 obs_or_fq1_keywords["observer"], obs_or_fq2_keywords["observer"] 643 ) 644 obs_or_fq1_keywords.pop("observer") 645 obs_or_fq2_keywords.pop("observer") 646 keywords_equal = keywords_equal and obs_or_fq1_keywords == obs_or_fq2_keywords 647 return ( 648 obs_or_fq1.p.func == obs_or_fq2.p.func 649 and obs_or_fq1.p.args == obs_or_fq2.p.args 650 and keywords_equal 651 ) 652 653 654def qconfig_equals(q1: QConfigAny, q2: QConfigAny): 655 """ 656 Returns `True` if `q1` equals `q2`, and `False` otherwise. 657 """ 658 if q1 is None or q2 is None: 659 return q1 == q2 660 else: 661 assert q1 is not None and q2 is not None 662 try: 663 # Qconfig weight and activation can be either a partial wrapper, 664 # or an observer class. Special handling is required (above) for 665 # comparing partial wrappers. 666 activation_same = _obs_or_fq_ctr_equals(q1.activation, q2.activation) 667 weight_same = _obs_or_fq_ctr_equals(q1.weight, q2.weight) 668 return activation_same and weight_same 669 except AttributeError: 670 return q1 == q2 671 672 673def _activation_is_memoryless(qconfig: QConfig): 674 """ 675 Return whether the observer for activations defined in the given QConfig is memoryless. 676 This means a MovingAverage observer with averaging constant equal to 1. 677 """ 678 679 def _is_memoryless(observer): 680 return ( 681 hasattr(observer, "averaging_constant") and observer.averaging_constant == 1 682 ) 683 684 act = qconfig.activation() 685 if isinstance(act, FakeQuantizeBase) and hasattr(act, "activation_post_process"): 686 return _is_memoryless(act.activation_post_process) 687 else: 688 return _is_memoryless(act) 689 690 691def _is_reuse_input_qconfig(qconfig: Optional[QConfig]): 692 return ( 693 qconfig is not None 694 and isinstance(qconfig.activation(), ReuseInputObserver) 695 and isinstance(qconfig.weight(), NoopObserver) 696 ) 697