feat: support linear (fully connected layer) dynamo converter

zewenli98 · zewenli98 · commit 50734d311594 · 2023-08-21T16:45:45.000-07:00
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -420,3 +420,22 @@ def aten_ops_clone(
         name,
         args[0],
     )
+
+
+@dynamo_tensorrt_converter(torch.ops.aten.linear)
+def aten_ops_linear(
+    network: TRTNetwork,
+    target: Target,
+    args: Tuple[Argument, ...],
+    kwargs: Dict[str, Argument],
+    name: str,
+) -> Union[TRTTensor, Sequence[TRTTensor]]:
+    return impl.linear.linear(
+        network,
+        target,
+        SourceIR.ATEN,
+        name,
+        input=args[0],
+        weight=args[1],
+        bias=args[2],
+    )
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/__init__.py b/py/torch_tensorrt/dynamo/conversion/impl/__init__.py
@@ -6,6 +6,7 @@
     condition,
     elementwise,
     embedding,
+    linear,
     matmul,
     normalization,
     permutation,
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/linear.py b/py/torch_tensorrt/dynamo/conversion/impl/linear.py
@@ -0,0 +1,85 @@
+from typing import Optional, Union
+
+import tensorrt as trt
+import torch
+from torch.fx.node import Target
+from torch_tensorrt.fx.converters.converter_utils import (
+    SourceIR,
+    get_trt_tensor,
+    set_layer_name,
+    to_numpy,
+)
+from torch_tensorrt.fx.types import TRTNetwork, TRTTensor
+
+
+def linear(
+    network: TRTNetwork,
+    target: Union[Target, str],
+    source_ir: Optional[SourceIR],
+    name: str,
+    input: TRTTensor,
+    weight: Union[TRTTensor, torch.Tensor],
+    bias: Optional[Union[TRTTensor, torch.Tensor]],
+) -> TRTTensor:
+    """
+    TensorRT fully connected layer implicitly flatten last three dimensions at
+    the start and implicitly reshape the result to (K, 1, 1) at the end.
+
+    e.g. If input is (N, C, H, W), first it gets flatten to (N, C*H*W). Then after
+    going through fully connected operation, it becomes (N, K). Before sending it
+    out, it gets reshaped into (N, K, 1, 1) and this is the final output.
+
+    TODO: We can optimize this to get rid of unneccesary transformation.
+    """
+
+    if not isinstance(input, trt.tensorrt.ITensor):
+        raise RuntimeError(
+            f"Linear received input {input} that is not part " "of the TensorRT region!"
+        )
+
+    # reshape the input to (*, X, 1, 1)
+    pre_shuffle_layer = network.add_shuffle(input)
+    pre_shuffle_layer.reshape_dims = tuple(input.shape) + (1, 1)
+    set_layer_name(pre_shuffle_layer, target, f"{name}_pre_shuffle", source_ir)
+
+    # Process bias terms
+    if isinstance(bias, torch.Tensor):
+        # Transform the bias constant into a Numpy array
+        bias = to_numpy(bias)
+
+    elif isinstance(bias, TRTTensor):
+        bias = get_trt_tensor(network, bias, f"{name}_bias")
+
+    elif bias is not None:
+        raise RuntimeError(
+            f"Linear layer {name} has bias of type {type(bias)}, Expected Torch Tensor or TRT Tensor"
+        )
+
+    # Process weight terms
+    if network.has_explicit_precision or isinstance(weight, TRTTensor):
+        weight = get_trt_tensor(network, weight, f"{name}_weight")
+
+    elif isinstance(weight, torch.Tensor):
+        # Transform the weight constant into a Numpy array
+        weight = to_numpy(weight)
+
+    else:
+        raise RuntimeError(
+            f"Linear layer {name} has weight of type {type(weight)}, Expect Optional[Tensor]"
+        )
+
+    # add fully connected layer
+    fully_connected_layer = network.add_fully_connected(
+        input=pre_shuffle_layer.get_output(0),
+        num_outputs=weight.shape[0],
+        kernel=trt.Weights() if isinstance(weight, TRTTensor) else weight,
+        bias=trt.Weights() if isinstance(bias, TRTTensor) else bias,
+    )
+    set_layer_name(fully_connected_layer, target, f"{name}_linear", source_ir)
+
+    # reshape the output from (*, K, 1, 1) to (*, K)
+    post_shuffle_layer = network.add_shuffle(fully_connected_layer.get_output(0))
+    post_shuffle_layer.reshape_dims = tuple(input.shape[:-1]) + (weight.shape[0],)
+    set_layer_name(post_shuffle_layer, target, f"{name}_post_shuffle", source_ir)
+
+    return post_shuffle_layer.get_output(0)