Merge pull request #20 from NVIDIA/fp16_path

Fp16 patch, not path!
2026-03-08 01:32:35 +00:00 · 2018-05-15 09:55:19 -07:00
parent bd42cb6ed7 27b1767cb2
commit da30fd8709
2 changed files with 8 additions and 6 deletions
--- a/loss_scaler.py
+++ b/loss_scaler.py
@@ -51,11 +51,10 @@ class DynamicLossScaler:

    # `x` is a torch.Tensor
    def _has_inf_or_nan(x):
-        inf_count = torch.sum(x.abs() == float('inf'))
-        if inf_count > 0:
+        cpu_sum = float(x.float().sum())
+        if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
            return True
-        nan_count = torch.sum(x != x)
-        return nan_count > 0
+        return False

    # `overflow` is boolean indicating whether we overflowed in gradient
    def update_scale(self, overflow):
--- a/train.py
+++ b/train.py
@@ -2,6 +2,7 @@ import os
 import time
 import argparse
 import math
+from numpy import finfo

 import torch
 from distributed import DistributedDataParallel
@@ -77,7 +78,9 @@ def prepare_directories_and_logger(output_directory, log_directory, rank):

 def load_model(hparams):
    model = Tacotron2(hparams).cuda()
-    model = batchnorm_to_float(model.half()) if hparams.fp16_run else model
+    if hparams.fp16_run:
+        model = batchnorm_to_float(model.half())
+        model.decoder.attention_layer.score_mask_value = float(finfo('float16').min)

    if hparams.distributed_run:
        model = DistributedDataParallel(model)
@@ -276,7 +279,7 @@ if __name__ == '__main__':
    torch.backends.cudnn.benchmark = hparams.cudnn_benchmark

    print("FP16 Run:", hparams.fp16_run)
-    print("Dynamic Loss Scaling", hparams.dynamic_loss_scaling)
+    print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling)
    print("Distributed Run:", hparams.distributed_run)
    print("cuDNN Enabled:", hparams.cudnn_enabled)
    print("cuDNN Benchmark:", hparams.cudnn_benchmark)