Adding dilations argument inside DepthwiseConv2D API (#1129)

diwakar-vsingh · BradLarson · web-flow · commit 02f31d531cbb · 2021-01-26T14:24:37.000-06:00
* Update NN.swift

Add dilations argument inside depthwiseConv2D function.

* Update Convolutional.swift

Updated depthwiseConv2D struct to include the dilations functionality as well.

* Update Convolutional.swift

Dilations argument added in SeparableConv2D

* Updating X10 tests with dilations parameter.

* Update ops_test.swift

Correcting dilation argument inside depthwiseConv2D function.

* Update NN.swift

Update depthwiseconv2D() function's strides and dilations argument with default (1, 1, 1, 1) value.

Co-authored-by: Brad Larson &lt;bradlarson@google.com&gt;
diff --git a/Sources/TensorFlow/Layers/Convolutional.swift b/Sources/TensorFlow/Layers/Convolutional.swift
@@ -708,6 +708,8 @@ public struct DepthwiseConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
   @noDerivative public let strides: (Int, Int)
   /// The padding algorithm for convolution.
   @noDerivative public let padding: Padding
+  /// The dilation factor for spatial dimensions.
+  @noDerivative public let dilations: (Int, Int)
   /// Note: `useBias` is a workaround for TF-1153: optional differentiation support.
   @noDerivative private let useBias: Bool
 
@@ -723,18 +725,21 @@ public struct DepthwiseConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
   ///   - activation: The element-wise activation function.
   ///   - strides: The strides of the sliding window for spatial dimensions.
   ///   - padding: The padding algorithm for convolution.
+  ///   - dilations: The dilation factors for spatial dimensions.
   public init(
     filter: Tensor<Scalar>,
     bias: Tensor<Scalar>? = nil,
     activation: @escaping Activation = identity,
     strides: (Int, Int) = (1, 1),
-    padding: Padding = .valid
+    padding: Padding = .valid,
+    dilations: (Int, Int) = (1, 1)
   ) {
     self.filter = filter
     self.bias = bias ?? .zero
     self.activation = activation
     self.strides = strides
     self.padding = padding
+    self.dilations = dilations
     useBias = (bias != nil)
   }
 
@@ -750,7 +755,8 @@ public struct DepthwiseConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
       input,
       filter: filter,
       strides: (1, strides.0, strides.1, 1),
-      padding: padding)
+      padding: padding,
+      dilations: (1, dilations.0, dilations.1, 1))
     return activation(useBias ? (conv + bias) : conv)
   }
 }
@@ -771,6 +777,7 @@ extension DepthwiseConv2D {
     filterShape: (Int, Int, Int, Int),
     strides: (Int, Int) = (1, 1),
     padding: Padding = .valid,
+    dilations: (Int, Int) = (1, 1),
     activation: @escaping Activation = identity,
     useBias: Bool = true,
     filterInitializer: ParameterInitializer<Scalar> = glorotUniform(),
@@ -784,7 +791,8 @@ extension DepthwiseConv2D {
       bias: useBias ? biasInitializer([filterShape.2 * filterShape.3]) : nil,
       activation: activation,
       strides: strides,
-      padding: padding)
+      padding: padding,
+      dilations: dilations)
   }
 }
 
@@ -901,12 +909,15 @@ public struct SeparableConv1D<Scalar: TensorFlowFloatingPoint>: Layer {
   public var pointwiseFilter: Tensor<Scalar>
   /// The bias vector.
   public var bias: Tensor<Scalar>
+
   /// The element-wise activation function.
   @noDerivative public let activation: Activation
   /// The strides of the sliding window for spatial dimensions.
   @noDerivative public let stride: Int
   /// The padding algorithm for convolution.
   @noDerivative public let padding: Padding
+  /// The dilation factor for the temporal dimension.
+  @noDerivative public let dilation: Int
   /// Note: `useBias` is a workaround for TF-1153: optional differentiation support.
   @noDerivative private let useBias: Bool
 
@@ -925,20 +936,23 @@ public struct SeparableConv1D<Scalar: TensorFlowFloatingPoint>: Layer {
   ///   - activation: The element-wise activation function.
   ///   - strides: The strides of the sliding window for spatial dimensions.
   ///   - padding: The padding algorithm for convolution.
+  ///   - dilation: The dilation factor for the temporal dimension.
   public init(
     depthwiseFilter: Tensor<Scalar>,
     pointwiseFilter: Tensor<Scalar>,
     bias: Tensor<Scalar>? = nil,
     activation: @escaping Activation = identity,
     stride: Int = 1,
-    padding: Padding = .valid
+    padding: Padding = .valid,
+    dilation: Int = 1
   ) {
     self.depthwiseFilter = depthwiseFilter
     self.pointwiseFilter = pointwiseFilter
     self.bias = bias ?? .zero
     self.activation = activation
     self.stride = stride
     self.padding = padding
+    self.dilation = dilation
     useBias = (bias != nil)
   }
 
@@ -952,7 +966,8 @@ public struct SeparableConv1D<Scalar: TensorFlowFloatingPoint>: Layer {
       input.expandingShape(at: 1),
       filter: depthwiseFilter.expandingShape(at: 1),
       strides: (1, stride, stride, 1),
-      padding: padding)
+      padding: padding,
+      dilations: (1, dilation, dilation, 1))
     let x = conv2D(
       depthwise,
       filter: pointwiseFilter.expandingShape(at: 1),
@@ -970,8 +985,9 @@ extension SeparableConv1D {
   /// - Parameters:
   ///   - depthwiseFilterShape: The shape of the 3-D depthwise convolution kernel.
   ///   - pointwiseFilterShape: The shape of the 3-D pointwise convolution kernel.
-  ///   - strides: The strides of the sliding window for temporal dimensions.
+  ///   - stride: The stride of the sliding window for temporal dimensions.
   ///   - padding: The padding algorithm for convolution.
+  ///   - dilation: The dilation factor for the temporal dimension.
   ///   - activation: The element-wise activation function.
   ///   - filterInitializer: Initializer to use for the filter parameters.
   ///   - biasInitializer: Initializer to use for the bias parameters.
@@ -980,6 +996,7 @@ extension SeparableConv1D {
     pointwiseFilterShape: (Int, Int, Int),
     stride: Int = 1,
     padding: Padding = .valid,
+    dilation: Int = 1,
     activation: @escaping Activation = identity,
     useBias: Bool = true,
     depthwiseFilterInitializer: ParameterInitializer<Scalar> = glorotUniform(),
@@ -998,7 +1015,8 @@ extension SeparableConv1D {
       bias: useBias ? biasInitializer([pointwiseFilterShape.2]) : nil,
       activation: activation,
       stride: stride,
-      padding: padding)
+      padding: padding,
+      dilation: dilation)
   }
 }
 
@@ -1020,6 +1038,8 @@ public struct SeparableConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
   @noDerivative public let strides: (Int, Int)
   /// The padding algorithm for convolution.
   @noDerivative public let padding: Padding
+  /// The dilation factor for spatial dimensions.
+  @noDerivative public let dilations: (Int, Int)
   /// Note: `useBias` is a workaround for TF-1153: optional differentiation support.
   @noDerivative private let useBias: Bool
 
@@ -1038,20 +1058,23 @@ public struct SeparableConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
   ///   - activation: The element-wise activation function.
   ///   - strides: The strides of the sliding window for spatial dimensions.
   ///   - padding: The padding algorithm for convolution.
+  ///   - dilations: The dilation factors for spatial dimensions.
   public init(
     depthwiseFilter: Tensor<Scalar>,
     pointwiseFilter: Tensor<Scalar>,
     bias: Tensor<Scalar>? = nil,
     activation: @escaping Activation = identity,
     strides: (Int, Int) = (1, 1),
-    padding: Padding = .valid
+    padding: Padding = .valid,
+    dilations: (Int, Int) = (1, 1)
   ) {
     self.depthwiseFilter = depthwiseFilter
     self.pointwiseFilter = pointwiseFilter
     self.bias = bias ?? .zero
     self.activation = activation
     self.strides = strides
     self.padding = padding
+    self.dilations = dilations
     useBias = (bias != nil)
   }
 
@@ -1065,7 +1088,8 @@ public struct SeparableConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
       input,
       filter: depthwiseFilter,
       strides: (1, strides.0, strides.1, 1),
-      padding: padding)
+      padding: padding,
+      dilations: (1, dilations.0, dilations.1, 1))
     let conv = conv2D(
       depthwise,
       filter: pointwiseFilter,
@@ -1085,6 +1109,7 @@ extension SeparableConv2D {
   ///   - pointwiseFilterShape: The shape of the 4-D pointwise convolution kernel.
   ///   - strides: The strides of the sliding window for spatial/spatio-temporal dimensions.
   ///   - padding: The padding algorithm for convolution.
+  ///   - dilations: The dilation factors for spatial dimensions.
   ///   - activation: The element-wise activation function.
   ///   - filterInitializer: Initializer to use for the filter parameters.
   ///   - biasInitializer: Initializer to use for the bias parameters.
@@ -1093,6 +1118,7 @@ extension SeparableConv2D {
     pointwiseFilterShape: (Int, Int, Int, Int),
     strides: (Int, Int) = (1, 1),
     padding: Padding = .valid,
+    dilations: (Int, Int) = (1, 1),
     activation: @escaping Activation = identity,
     useBias: Bool = true,
     depthwiseFilterInitializer: ParameterInitializer<Scalar> = glorotUniform(),
@@ -1113,6 +1139,7 @@ extension SeparableConv2D {
       bias: useBias ? biasInitializer([pointwiseFilterShape.3]) : nil,
       activation: activation,
       strides: strides,
-      padding: padding)
+      padding: padding,
+      dilations: dilations)
   }
 }
diff --git a/Sources/TensorFlow/Operators/NN.swift b/Sources/TensorFlow/Operators/NN.swift
@@ -460,16 +460,18 @@ func _vjpConv3DBackpropFilter<Scalar: TensorFlowFloatingPoint>(
 public func depthwiseConv2D<Scalar: TensorFlowFloatingPoint>(
   _ input: Tensor<Scalar>,
   filter: Tensor<Scalar>,
-  strides: (Int, Int, Int, Int),
-  padding: Padding
+  strides: (Int, Int, Int, Int) = (1, 1, 1, 1),
+  padding: Padding,
+  dilations: (Int, Int, Int, Int) = (1, 1, 1, 1)
 ) -> Tensor<Scalar> {
   precondition(input.shape.rank == 4, "The input must have rank 4.")
   precondition(filter.shape.rank == 4, "The filter must have rank 4.")
   return _Raw.depthwiseConv2dNative(
     input,
     filter: filter,
     strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
-    padding: padding.raw)
+    padding: padding.raw,
+    dilations: [Int32(dilations.0), Int32(dilations.1), Int32(dilations.2), Int32(dilations.3)])
 }
 
 @usableFromInline
@@ -478,21 +480,22 @@ func _vjpDepthwiseConv2D<Scalar: TensorFlowFloatingPoint>(
   _ x: Tensor<Scalar>,
   filter: Tensor<Scalar>,
   strides: (Int, Int, Int, Int),
-  padding: Padding
+  padding: Padding,
+  dilations: (Int, Int, Int, Int)
 ) -> (value: Tensor<Scalar>, pullback: (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
   let value = depthwiseConv2D(
     x, filter: filter, strides: strides,
-    padding: padding)
+    padding: padding, dilations: dilations)
   return (
     value,
     { v in
       (
         depthwiseConv2dBackpropInput(
           v, shape: x.shapeTensor, filter: filter,
-          strides: strides, padding: padding),
+          strides: strides, padding: padding, dilations: dilations),
         depthwiseConv2dBackpropFilter(
           v, input: x, filterSizes: filter.shapeTensor,
-          strides: strides, padding: padding)
+          strides: strides, padding: padding, dilations: dilations)
       )
     }
   )
@@ -505,15 +508,17 @@ func depthwiseConv2dBackpropInput<Scalar: TensorFlowFloatingPoint>(
   _ x: Tensor<Scalar>,
   shape: Tensor<Int32>,
   filter: Tensor<Scalar>,
-  strides: (Int, Int, Int, Int),
-  padding: Padding
+  strides: (Int, Int, Int, Int) = (1, 1, 1, 1),
+  padding: Padding,
+  dilations: (Int, Int, Int, Int) = (1, 1, 1, 1)
 ) -> Tensor<Scalar> {
   return _Raw.depthwiseConv2dNativeBackpropInput(
     inputSizes: shape,
     filter: filter,
     outBackprop: x,
     strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
-    padding: padding.raw)
+    padding: padding.raw,
+    dilations: [Int32(dilations.0), Int32(dilations.1), Int32(dilations.2), Int32(dilations.3)])
 }
 
 @usableFromInline
@@ -523,19 +528,20 @@ func _vjpDepthwiseConv2dBackpropInput<Scalar: TensorFlowFloatingPoint>(
   _ shape: Tensor<Int32>,
   _ filter: Tensor<Scalar>,
   _ strides: (Int, Int, Int, Int),
-  _ padding: Padding
+  _ padding: Padding,
+  _ dilations: (Int, Int, Int, Int)
 ) -> (value: Tensor<Scalar>, pullback: (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
   let value = depthwiseConv2dBackpropInput(
     x, shape: shape, filter: filter, strides: strides,
-    padding: padding)
+    padding: padding, dilations: dilations)
   return (
     value,
     { v in
       (
-        depthwiseConv2D(v, filter: filter, strides: strides, padding: padding),
+        depthwiseConv2D(v, filter: filter, strides: strides, padding: padding, dilations: dilations),
         depthwiseConv2dBackpropFilter(
           x, input: v, filterSizes: filter.shapeTensor,
-          strides: strides, padding: padding)
+          strides: strides, padding: padding, dilations: dilations)
       )
 
     }
@@ -549,15 +555,17 @@ func depthwiseConv2dBackpropFilter<Scalar: TensorFlowFloatingPoint>(
   _ x: Tensor<Scalar>,
   input: Tensor<Scalar>,
   filterSizes: Tensor<Int32>,
-  strides: (Int, Int, Int, Int),
-  padding: Padding
+  strides: (Int, Int, Int, Int) = (1, 1, 1, 1),
+  padding: Padding,
+  dilations: (Int, Int, Int, Int) = (1, 1, 1, 1)
 ) -> Tensor<Scalar> {
   return _Raw.depthwiseConv2dNativeBackpropFilter(
     input,
     filterSizes: filterSizes,
     outBackprop: x,
     strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
-    padding: padding.raw)
+    padding: padding.raw,
+    dilations: [Int32(dilations.0), Int32(dilations.1), Int32(dilations.2), Int32(dilations.3)])
 }
 
 @usableFromInline
@@ -567,19 +575,20 @@ func _vjpDepthwiseConv2dBackpropFilter<Scalar: TensorFlowFloatingPoint>(
   _ input: Tensor<Scalar>,
   _ filterSizes: Tensor<Int32>,
   _ strides: (Int, Int, Int, Int),
-  _ padding: Padding
+  _ padding: Padding,
+  _ dilations: (Int, Int, Int, Int)
 ) -> (value: Tensor<Scalar>, pullback: (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
   let value = depthwiseConv2dBackpropFilter(
     x, input: input, filterSizes: filterSizes,
-    strides: strides, padding: padding)
+    strides: strides, padding: padding, dilations: dilations)
   return (
     value,
     { v in
       (
-        depthwiseConv2D(input, filter: v, strides: strides, padding: padding),
+        depthwiseConv2D(input, filter: v, strides: strides, padding: padding, dilations: dilations),
         depthwiseConv2dBackpropInput(
           x, shape: x.shapeTensor, filter: v, strides: strides,
-          padding: padding)
+          padding: padding, dilations: dilations)
       )
     }
   )
diff --git a/Tests/x10/ops_test.swift b/Tests/x10/ops_test.swift
@@ -1069,14 +1069,15 @@ final class TensorTests: XCTestCase {
     let kernelSize = 5
     let inputSize = 14
     let batch = 2
+    let dilation = 1
     for useReducedPrecision in [false, true] {
       for stride in 1..<4 {
         for padSame in [false, true] {
           var input = Tensor<Float>.rand([batch, inputSize, inputSize, inChannels])
           var filter = Tensor<Float>.rand([kernelSize, kernelSize, inChannels, channelMultiplier])
           let outShape = depthwiseConv2D(
             TF(input), filter: TF(filter), strides: (1, stride, stride, 1),
-            padding: padSame ? Padding.same : Padding.valid
+            padding: padSame ? Padding.same : Padding.valid, dilations: (1, dilation, dilation, 1)
           )
           .shape
           var outGrad = Tensor<Float>.rand(outShape.dimensions)
@@ -1091,13 +1092,13 @@ final class TensorTests: XCTestCase {
               (_ input: Tensor<Float>, _ filter: Tensor<Float>) -> Tensor<Float> in
               depthwiseConv2D(
                 input, filter: filter, strides: (1, stride, stride, 1),
-                padding: padSame ? Padding.same : Padding.valid
+                padding: padSame ? Padding.same : Padding.valid, dilations: (1, dilation, dilation, 1)
               )
             },
             { (_ input: Tensor<Float>, _ filter: Tensor<Float>) -> Tensor<Float> in
               depthwiseConv2D(
                 input, filter: filter, strides: (1, stride, stride, 1),
-                padding: padSame ? Padding.same : Padding.valid
+                padding: padSame ? Padding.same : Padding.valid, dilations: (1, dilation, dilation, 1)
               )
             }, input, filter, outGrad, relTolerance: relTolerance, absTolerance: 1e-4)
         }