Source code for nnsvs.discriminators

"""Discriminator implementations mostly used for GAN-based post-filters.

All the discriminators must returns list of tensors.
The last tensor of the list is regarded as the output of the discrminator.
The others are used as intermedieate feature maps.
"""

import numpy as np
import torch
from nnsvs.util import init_weights
from torch import nn



[docs]
class Conv2dD(nn.Module):
    """Conv2d-based discriminator


    The implementation follows the discrimiantor of the GAN-based post-filters
    in :cite:t:`Kaneko2017Interspeech`.

    Args:
        in_dim (int): Input feature dim
        channels (int): Number of channels
        kernel_size (tuple): Kernel size for 2d-convolution
        padding (tuple): Padding for 2d-convolution
        last_sigmoid (bool): If True, apply sigmoid on the output
        init_type (str): Initialization type
        padding_mode (str): Padding mode
    """

    def __init__(
        self,
        in_dim=None,
        channels=64,
        kernel_size=(5, 3),
        padding=(0, 0),
        last_sigmoid=False,
        init_type="kaiming_normal",
        padding_mode="zeros",
    ):
        super().__init__()
        self.last_sigmoid = last_sigmoid
        C = channels
        ks = np.asarray(list(kernel_size))
        if padding is None:
            padding = (ks - 1) // 2

        self.convs = nn.ModuleList()
        self.convs.append(
            nn.Sequential(
                nn.Conv2d(
                    1,
                    C,
                    kernel_size=ks,
                    padding=padding,
                    stride=(1, 1),
                    padding_mode=padding_mode,
                ),
                nn.LeakyReLU(0.2),
            )
        )
        self.convs.append(
            nn.Sequential(
                nn.Conv2d(
                    C,
                    2 * C,
                    kernel_size=ks,
                    padding=padding,
                    stride=(2, 1),
                    padding_mode=padding_mode,
                ),
                nn.LeakyReLU(0.2),
            )
        )
        self.convs.append(
            nn.Sequential(
                nn.Conv2d(
                    2 * C,
                    4 * C,
                    kernel_size=ks,
                    padding=padding,
                    stride=(2, 1),
                    padding_mode=padding_mode,
                ),
                nn.LeakyReLU(0.2),
            )
        )
        self.convs.append(
            nn.Sequential(
                nn.Conv2d(
                    4 * C,
                    2 * C,
                    kernel_size=ks,
                    padding=padding,
                    stride=(2, 1),
                    padding_mode=padding_mode,
                ),
                nn.LeakyReLU(0.2),
            )
        )
        self.last_conv = nn.Conv2d(
            2 * C,
            1,
            kernel_size=ks,
            padding=padding,
            stride=(1, 1),
            padding_mode=padding_mode,
        )
        init_weights(self, init_type)

    def forward(self, x, c=None, lengths=None):
        """Forward step

        Args:
            x (torch.Tensor): Input tensor
            c (torch.Tensor): Optional conditional features
            lengths (torch.Tensor): Optional lengths of the input

        Returns:
            list: List of output tensors
        """
        outs = []
        # (B, T, C) -> (B, 1, T, C):
        x = x.unsqueeze(1)
        for conv in self.convs:
            x = conv(x)
            outs.append(x)
        y = self.last_conv(x)
        y = torch.sigmoid(y) if self.last_sigmoid else y
        # (B, 1, T, C) -> (B, T, C)
        y = y.squeeze(1)
        outs.append(y)

        return [outs]