Significantly improve performance of row decoding. (vapor#130) (vapor#134)

tanner0101 · web-flow · commit 41cec2846f9c · 2019-03-14T14:18:34.000-04:00
* Significantly improve performance of row decoding.

Motivation:

1. `PostgreSQLRowDecoder`: `_KeyedDecodingContainer.decode` is called once for each key on the decoded object. With the current implementation, this performs a linear search on the `row` dictionary, which results in a total runtime (per object) of O(keys * columns), i.e. quadratic runtime ~O(keys^2). We replace the current linear search with one or two dictionary lookups if `tableOID != 0`, resulting in linear runtime (per object) in the number of keys (provided dictionary lookups can be assumed to take roughly constant time).
2. `PostgreSQLConnection.TableNameCache`: Most lookups are `tableName -&gt; OID`. We accelerate that lookup by preparing a dictionary for that kind of lookup ahead of time, again replacing linear search.

Effect:

The time required for decoding ~5k objects with 9 fields each drops from ~0.4s on a Core i7-6700k (Release build) to ~0.2s, effectively doubling throughput. Optimization 1 contributes ~130 ms, Optimization 2 contributes ~70ms.

* Whitespace fixes.

* Comment fix.

* More whitespace, sorry.

* Implement `decodeIfPresent` to avoid two dictionary lookups per call.

* Minor code simplification.
diff --git a/Sources/PostgreSQL/Codable/PostgreSQLRowDecoder.swift b/Sources/PostgreSQL/Codable/PostgreSQLRowDecoder.swift
@@ -47,26 +47,48 @@ struct PostgreSQLRowDecoder {
         let codingPath: [CodingKey] = []
         let row: [PostgreSQLColumn: PostgreSQLData]
         let tableOID: UInt32
-        let allKeys: [Key]
+        var allKeys: [Key] {
+            // Unlikely to be called (mostly present for protocol conformance), so we don't need to cache this property.
+            return row.keys
+                .compactMap { col in
+                    if tableOID == 0 || col.tableOID == tableOID || col.tableOID == 0 {
+                        return col.name
+                    } else {
+                        return nil
+                    }
+                }.compactMap(Key.init(stringValue:))
+        }
         
         init(row: [PostgreSQLColumn: PostgreSQLData], tableOID: UInt32) {
             self.row = row
             self.tableOID = tableOID
-            self.allKeys = row.keys.compactMap { col in
-                if tableOID == 0 || col.tableOID == tableOID || col.tableOID == 0 {
-                    return col.name
-                } else {
-                    return nil
-                }
-            }.compactMap(Key.init(stringValue:))
+        }
+        
+        private func data(for key: Key) -> PostgreSQLData? {
+            let columnName = key.stringValue
+            var column = PostgreSQLColumn(tableOID: self.tableOID, name: columnName)
+            // First, check for an exact (tableOID, columnName) match.
+            if let data = row[column] { return data }
+            
+            if self.tableOID != 0 {
+                // No column with our exact table OID; check for a (0, columnName) match instead.
+                column.tableOID = 0
+                return row[column]
+            } else {
+                // No (0, columnName) match; check via (slow!) linear search for _any_ matching column name,
+                // regardless of tableOID.
+                // Note: This path is hit in `PostgreSQLConnection.tableNames`, but luckily the `PGClass` only has
+                // two keys, so the performance impact of linear search is acceptable there.
+                return row.firstValue(tableOID: tableOID, name: columnName)
+            }
         }
         
         func contains(_ key: Key) -> Bool {
-            return allKeys.contains { $0.stringValue == key.stringValue }
+            return data(for: key) != nil
         }
         
         func decodeNil(forKey key: Key) throws -> Bool {
-            guard let data = row.firstValue(tableOID: tableOID, name: key.stringValue) else {
+            guard let data = data(for: key) else {
                 return true
             }
             switch data.storage {
@@ -76,12 +98,22 @@ struct PostgreSQLRowDecoder {
         }
         
         func decode<T>(_ type: T.Type, forKey key: Key) throws -> T where T : Decodable {
-            guard let data = row.firstValue(tableOID: tableOID, name: key.stringValue) else {
+            guard let data = data(for: key) else {
                 throw DecodingError.valueNotFound(T.self, .init(codingPath: codingPath + [key], debugDescription: "Could not decode \(T.self)."))
             }
             return try PostgreSQLDataDecoder().decode(T.self, from: data)
         }
         
+        // This specialization avoids two dictionary lookups (caused by calls to `contains` and `decodeNil`) present in
+        // the default implementation of `decodeIfPresent`.
+        func decodeIfPresent<T>(_ type: T.Type, forKey key: Key) throws -> T? where T : Decodable {
+            guard let data = data(for: key) else { return nil }
+            switch data.storage {
+            case .null: return nil
+            default: return try PostgreSQLDataDecoder().decode(T.self, from: data)
+            }
+        }
+        
         func nestedContainer<NestedKey>(keyedBy type: NestedKey.Type, forKey key: Key) throws -> KeyedDecodingContainer<NestedKey> where NestedKey : CodingKey {
             fatalError()
         }
diff --git a/Sources/PostgreSQL/Column/PostgreSQLColumn.swift b/Sources/PostgreSQL/Column/PostgreSQLColumn.swift
@@ -25,6 +25,8 @@ extension PostgreSQLColumn: CustomStringConvertible {
 
 extension Dictionary where Key == PostgreSQLColumn {
     /// Accesses the _first_ value from this dictionary with a matching field name.
+    ///
+    /// - Note: This performs a linear search over the dictionary and thus is fairly slow.
     public func firstValue(tableOID: UInt32 = 0, name: String) -> Value? {
         for (column, data) in self {
             if (tableOID == 0 || column.tableOID == 0 || column.tableOID == tableOID) && column.name == name {
diff --git a/Sources/PostgreSQL/Connection/PostgreSQLConnection+TableNameCache.swift b/Sources/PostgreSQL/Connection/PostgreSQLConnection+TableNameCache.swift
@@ -1,8 +1,17 @@
+private struct PGClass: PostgreSQLTable {
+    static let sqlTableIdentifierString = "pg_class"
+    var oid: UInt32
+    var relname: String
+}
+
 extension PostgreSQLConnection {
     /// Caches table OID to string name associations.
     public struct TableNameCache {
         /// Stores table names. [OID: Name]
         private let tableNames: [UInt32: String]
+        /// Stores table OIDs. [Name: OID]
+        /// Used to accelerate the Name -> OID lookup.
+        private let tableOIDs: [String: UInt32]
         
         /// Fetches the table name for a given table OID. Returns `nil` if no table with that OID is known.
         ///
@@ -19,17 +28,19 @@ extension PostgreSQLConnection {
         ///     - name: Table name.
         /// - returns: Table OID.
         public func tableOID(name: String) -> UInt32? {
-            for (key, val) in tableNames {
-                if val == name {
-                    return key
-                }
-            }
-            return nil
+            return tableOIDs[name]
         }
         
         /// Creates a new cache.
-        init(_ tableNames: [UInt32: String]) {
+        fileprivate init(_ tableClasses: [PGClass]) {
+            var tableNames: [UInt32: String] = [:]
+            var tableOIDs: [String: UInt32] = [:]
+            for tableClass in tableClasses {
+                tableNames[tableClass.oid] = tableClass.relname
+                tableOIDs[tableClass.relname] = tableClass.oid
+            }
             self.tableNames = tableNames
+            self.tableOIDs = tableOIDs
         }
     }
 
@@ -42,18 +53,9 @@ extension PostgreSQLConnection {
         if let existing = tableNameCache, !refresh {
             return future(existing)
         } else {
-            struct PGClass: PostgreSQLTable {
-                static let sqlTableIdentifierString = "pg_class"
-                var oid: UInt32
-                var relname: String
-            }
             return select().column("oid").column("relname").from(PGClass.self).all().map { rows in
-                var cache: [UInt32: String] = [:]
                 let rows = try rows.map { try self.decode(PGClass.self, from: $0, table: nil) }
-                for row in rows {
-                    cache[row.oid] = row.relname
-                }
-                let new = TableNameCache(cache)
+                let new = TableNameCache(rows)
                 self.tableNameCache = new
                 return new
             }