Oil3 · Oil3 · Jan 4, 2024 · Dec 15, 2022 · Dec 15, 2022 · Dec 15, 2022
diff --git a/BuildConfiguratons/Debug.xcconfig b/BuildConfiguratons/Debug.xcconfig
@@ -0,0 +1,13 @@
+//
+//  Debug.xcconfig
+//  Diffusion
+//
+//  Created by Fahim Farook on 15/12/2022.
+//
+
+// Detailed explanation of how to set this up can be found here: https://ajpagente.github.io/mobile/using-xcconfig/
+// Note: This file is the one referenced from the Xcode project and this file can be checked into the Git repo. But this references the file with your personal info (Sign-Debug.xccconfig) and that file should not be checked into the repo.
+// Configuration settings file format documentation can be found at:
+// https://help.apple.com/xcode/#/dev745c5c974
+
+#include "Sign-Debug.xcconfig"
diff --git a/BuildConfiguratons/Release.xcconfig b/BuildConfiguratons/Release.xcconfig
@@ -0,0 +1,13 @@
+//
+//  Release.xcconfig
+//  Diffusion
+//
+//  Created by Fahim Farook on 15/12/2022.
+//
+
+// Detailed explanation of how to set this up can be found here: https://ajpagente.github.io/mobile/using-xcconfig/
+// Note: This file is the one referenced from the Xcode project and this file can be checked into the Git repo. But this references the file with your personal info (Sign-Debug.xccconfig) and that file should not be checked into the repo.
+// Configuration settings file format documentation can be found at:
+// https://help.apple.com/xcode/#/dev745c5c974
+
+#include "Sign-Release.xcconfig"
diff --git a/BuildConfiguratons/Sign-Debug-template.xcconfig b/BuildConfiguratons/Sign-Debug-template.xcconfig
@@ -0,0 +1,20 @@
+//
+//  Sign-Debug.xcconfig
+//  Diffusion
+//
+//  Created by Fahim Farook on 15/12/2022.
+//
+
+// Detailed explanation of how to set this up can be found here: https://ajpagente.github.io/mobile/using-xcconfig/
+// Note: This is your personal signing details. *Do not* check this into the repo. The Debug.xcconfig file includes this one so that you can modify this without impacting the project.
+// Configuration settings file format documentation can be found at:
+// https://help.apple.com/xcode/#/dev745c5c974
+
+// See the first link above for details on how to get the following values
+PRODUCT_BUNDLE_IDENTIFIER = <Your bundle identifier from the app>
+DEVELOPMENT_TEAM = <10 character Team ID>
+CODE_SIGN_IDENTITY[sdk=iphoneos*] = <40 character SHA1 Hash from provisionin profile for iOS>
+PROVISIONING_PROFILE_SPECIFIER[sdk=iphoneos*] = <36 character UUID from provisioning profile for iOS>
+
+CODE_SIGN_IDENTITY[sdk=macos*] = <40 character SHA1 Hash from provisionin profile for macOS>
+PROVISIONING_PROFILE_SPECIFIER[sdk=macos*] = <36 character UUID from provisioning profile for macOS>
diff --git a/BuildConfiguratons/Sign-Release-template.xcconfig b/BuildConfiguratons/Sign-Release-template.xcconfig
@@ -0,0 +1,20 @@
+//
+//  Sign-Release-template.xcconfig
+//  Diffusion
+//
+//  Created by Fahim Farook on 15/12/2022.
+//
+
+// Detailed explanation of how to set this up can be found here: https://ajpagente.github.io/mobile/using-xcconfig/
+// Note: This is your personal signing details. *Do not* check this into the repo. The Debug.xcconfig file includes this one so that you can modify this without impacting the project.
+// Configuration settings file format documentation can be found at:
+// https://help.apple.com/xcode/#/dev745c5c974
+
+// See the first link above for details on how to get the following values
+PRODUCT_BUNDLE_IDENTIFIER = <Your bundle identifier from the app>
+DEVELOPMENT_TEAM = <10 character Team ID>
+CODE_SIGN_IDENTITY[sdk=iphoneos*] = <40 character SHA1 Hash from provisionin profile for iOS>
+PROVISIONING_PROFILE_SPECIFIER[sdk=iphoneos*] = <36 character UUID from provisioning profile for iOS>
+
+CODE_SIGN_IDENTITY[sdk=macos*] = <40 character SHA1 Hash from provisionin profile for macOS>
+PROVISIONING_PROFILE_SPECIFIER[sdk=macos*] = <36 character UUID from provisioning profile for macOS>
diff --git a/CoreML/pipeline/DPMSolverMultistepScheduler.swift b/CoreML/pipeline/DPMSolverMultistepScheduler.swift
@@ -0,0 +1,182 @@
+// For licensing see accompanying LICENSE.md file.
+// Copyright (C) 2022 Apple Inc. and The HuggingFace Team. All Rights Reserved.
+
+import Accelerate
+import CoreML
+
+/// A scheduler used to compute a de-noised image
+///
+///  This implementation matches:
+///  [Hugging Face Diffusers DPMSolverMultistepScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py)
+///
+/// It uses the DPM-Solver++ algorithm: [code](https://github.com/LuChengTHU/dpm-solver) [paper](https://arxiv.org/abs/2211.01095).
+/// Limitations:
+///  - Only implemented for DPM-Solver++ algorithm (not DPM-Solver).
+///  - Second order only.
+///  - Assumes the model predicts epsilon.
+///  - No dynamic thresholding.
+///  - `midpoint` solver algorithm.
+@available(iOS 16.2, macOS 13.1, *)
+public final class DPMSolverMultistepScheduler: Scheduler {
+    public let trainStepCount: Int
+    public let inferenceStepCount: Int
+    public let betas: [Float]
+    public let alphas: [Float]
+    public let alphasCumProd: [Float]
+    public let timeSteps: [Int]
+
+    public let alpha_t: [Float]
+    public let sigma_t: [Float]
+    public let lambda_t: [Float]
+
+    public let solverOrder = 2
+    private(set) var lowerOrderStepped = 0
+
+    /// Whether to use lower-order solvers in the final steps. Only valid for less than 15 inference steps.
+    /// We empirically find this trick can stabilize the sampling of DPM-Solver, especially with 10 or fewer steps.
+    public let useLowerOrderFinal = true
+
+    // Stores solverOrder (2) items
+    private(set) var modelOutputs: [MLShapedArray<Float32>] = []
+
+    /// Create a scheduler that uses a second order DPM-Solver++ algorithm.
+    ///
+    /// - Parameters:
+    ///   - stepCount: Number of inference steps to schedule
+    ///   - trainStepCount: Number of training diffusion steps
+    ///   - betaSchedule: Method to schedule betas from betaStart to betaEnd
+    ///   - betaStart: The starting value of beta for inference
+    ///   - betaEnd: The end value for beta for inference
+    /// - Returns: A scheduler ready for its first step
+    public init(
+        stepCount: Int = 50,
+        trainStepCount: Int = 1000,
+        betaSchedule: BetaSchedule = .scaledLinear,
+        betaStart: Float = 0.00085,
+        betaEnd: Float = 0.012
+    ) {
+        self.trainStepCount = trainStepCount
+        self.inferenceStepCount = stepCount
+
+        switch betaSchedule {
+        case .linear:
+            self.betas = linspace(betaStart, betaEnd, trainStepCount)
+        case .scaledLinear:
+            self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 })
+        }
+
+        self.alphas = betas.map({ 1.0 - $0 })
+        var alphasCumProd = self.alphas
+        for i in 1..<alphasCumProd.count {
+            alphasCumProd[i] *= alphasCumProd[i -  1]
+        }
+        self.alphasCumProd = alphasCumProd
+
+        // Currently we only support VP-type noise shedule
+        self.alpha_t = vForce.sqrt(self.alphasCumProd)
+        self.sigma_t = vForce.sqrt(vDSP.subtract([Float](repeating: 1, count: self.alphasCumProd.count), self.alphasCumProd))
+        self.lambda_t = zip(self.alpha_t, self.sigma_t).map { α, σ in log(α) - log(σ) }
+
+        self.timeSteps = linspace(0, Float(self.trainStepCount-1), stepCount).reversed().map { Int(round($0)) }
+    }
+
+    /// Convert the model output to the corresponding type the algorithm needs.
+    /// This implementation is for second-order DPM-Solver++ assuming epsilon prediction.
+    func convertModelOutput(modelOutput: MLShapedArray<Float32>, timestep: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
+        assert(modelOutput.scalars.count == sample.scalars.count)
+        let (alpha_t, sigma_t) = (self.alpha_t[timestep], self.sigma_t[timestep])
+
+        // This could be optimized with a Metal kernel if we find we need to
+        let x0_scalars = zip(modelOutput.scalars, sample.scalars).map { m, s in
+            (s - m * sigma_t) / alpha_t
+        }
+        return MLShapedArray(scalars: x0_scalars, shape: modelOutput.shape)
+    }
+
+    /// One step for the first-order DPM-Solver (equivalent to DDIM).
+    /// See https://arxiv.org/abs/2206.00927 for the detailed derivation.
+    /// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+    func firstOrderUpdate(
+        modelOutput: MLShapedArray<Float32>,
+        timestep: Int,
+        prevTimestep: Int,
+        sample: MLShapedArray<Float32>
+    ) -> MLShapedArray<Float32> {
+        let (p_lambda_t, lambda_s) = (Double(lambda_t[prevTimestep]), Double(lambda_t[timestep]))
+        let p_alpha_t = Double(alpha_t[prevTimestep])
+        let (p_sigma_t, sigma_s) = (Double(sigma_t[prevTimestep]), Double(sigma_t[timestep]))
+        let h = p_lambda_t - lambda_s
+        // x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output
+        let x_t = weightedSum(
+            [p_sigma_t / sigma_s, -p_alpha_t * (exp(-h) - 1)],
+            [sample, modelOutput]
+        )
+        return x_t
+    }
+
+    /// One step for the second-order multistep DPM-Solver++ algorithm, using the midpoint method.
+    /// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+    func secondOrderUpdate(
+        modelOutputs: [MLShapedArray<Float32>],
+        timesteps: [Int],
+        prevTimestep t: Int,
+        sample: MLShapedArray<Float32>
+    ) -> MLShapedArray<Float32> {
+        let (s0, s1) = (timesteps[back: 1], timesteps[back: 2])
+        let (m0, m1) = (modelOutputs[back: 1], modelOutputs[back: 2])
+        let (p_lambda_t, lambda_s0, lambda_s1) = (Double(lambda_t[t]), Double(lambda_t[s0]), Double(lambda_t[s1]))
+        let p_alpha_t = Double(alpha_t[t])
+        let (p_sigma_t, sigma_s0) = (Double(sigma_t[t]), Double(sigma_t[s0]))
+        let (h, h_0) = (p_lambda_t - lambda_s0, lambda_s0 - lambda_s1)
+        let r0 = h_0 / h
+        let D0 = m0
+
+        // D1 = (1.0 / r0) * (m0 - m1)
+        let D1 = weightedSum(
+            [1/r0, -1/r0],
+            [m0, m1]
+        )
+
+        // See https://arxiv.org/abs/2211.01095 for detailed derivations
+        // x_t = (
+        //     (sigma_t / sigma_s0) * sample
+        //     - (alpha_t * (torch.exp(-h) - 1.0)) * D0
+        //     - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1
+        // )
+        let x_t = weightedSum(
+            [p_sigma_t/sigma_s0, -p_alpha_t * (exp(-h) - 1), -0.5 * p_alpha_t * (exp(-h) - 1)],
+            [sample, D0, D1]
+        )
+        return x_t
+    }
+
+    public func step(output: MLShapedArray<Float32>, timeStep t: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
+        let stepIndex = timeSteps.firstIndex(of: t) ?? timeSteps.count - 1
+        let prevTimestep = stepIndex == timeSteps.count - 1 ? 0 : timeSteps[stepIndex + 1]
+
+        let lowerOrderFinal = useLowerOrderFinal && stepIndex == timeSteps.count - 1 && timeSteps.count < 15
+        let lowerOrderSecond = useLowerOrderFinal && stepIndex == timeSteps.count - 2 && timeSteps.count < 15
+        let lowerOrder = lowerOrderStepped < 1 || lowerOrderFinal || lowerOrderSecond
+
+        let modelOutput = convertModelOutput(modelOutput: output, timestep: t, sample: sample)
+        if modelOutputs.count == solverOrder { modelOutputs.removeFirst() }
+        modelOutputs.append(modelOutput)
+
+        let prevSample: MLShapedArray<Float32>
+        if lowerOrder {
+            prevSample = firstOrderUpdate(modelOutput: modelOutput, timestep: t, prevTimestep: prevTimestep, sample: sample)
+        } else {
+            prevSample = secondOrderUpdate(
+                modelOutputs: modelOutputs,
+                timesteps: [timeSteps[stepIndex - 1], t],
+                prevTimestep: prevTimestep,
+                sample: sample
+            )
+        }
+        if lowerOrderStepped < solverOrder {
+            lowerOrderStepped += 1
+        }
+
+        return prevSample
+    }
+}
diff --git a/CoreML/pipeline/Decoder.swift b/CoreML/pipeline/Decoder.swift
@@ -0,0 +1,123 @@
+// For licensing see accompanying LICENSE.md file.
+// Copyright (C) 2022 Apple Inc. All Rights Reserved.
+
+import Foundation
+import CoreML
+import Accelerate
+
+/// A decoder model which produces RGB images from latent samples
+@available(iOS 16.2, macOS 13.1, *)
+public struct Decoder: ResourceManaging {
+
+    /// VAE decoder model
+    var model: ManagedMLModel
+
+    /// Create decoder from Core ML model
+    ///
+    /// - Parameters:
+    ///     - url: Location of compiled VAE decoder Core ML model
+    ///     - configuration: configuration to be used when the model is loaded
+    /// - Returns: A decoder that will lazily load its required resources when needed or requested
+    public init(modelAt url: URL, configuration: MLModelConfiguration) {
+        self.model = ManagedMLModel(modelAt: url, configuration: configuration)
+    }
+
+    /// Ensure the model has been loaded into memory
+    public func loadResources() throws {
+        try model.loadResources()
+    }
+
+    /// Unload the underlying model to free up memory
+    public func unloadResources() {
+       model.unloadResources()
+    }
+
+    /// Batch decode latent samples into images
+    ///
+    ///  - Parameters:
+    ///    - latents: Batch of latent samples to decode
+    ///  - Returns: decoded images
+    public func decode(_ latents: [MLShapedArray<Float32>]) throws -> [CGImage] {
+
+        // Form batch inputs for model
+        let inputs: [MLFeatureProvider] = try latents.map { sample in
+            // Reference pipeline scales the latent samples before decoding
+            let sampleScaled = MLShapedArray<Float32>(
+                scalars: sample.scalars.map { $0 / 0.18215 },
+                shape: sample.shape)
+
+            let dict = [inputName: MLMultiArray(sampleScaled)]
+            return try MLDictionaryFeatureProvider(dictionary: dict)
+        }
+        let batch = MLArrayBatchProvider(array: inputs)
+
+        // Batch predict with model
+        let results = try model.perform { model in
+            try model.predictions(fromBatch: batch)
+        }
+
+        // Transform the outputs to CGImages
+        let images: [CGImage] = (0..<results.count).map { i in
+            let result = results.features(at: i)
+            let outputName = result.featureNames.first!
+            let output = result.featureValue(for: outputName)!.multiArrayValue!
+
+            return toRGBCGImage(MLShapedArray<Float32>(output))
+        }
+
+        return images
+    }
+
+    var inputName: String {
+        try! model.perform { model in
+            model.modelDescription.inputDescriptionsByName.first!.key
+        }
+    }
+
+    typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
+    typealias PixelBufferP8x3 = vImage.PixelBuffer<vImage.Planar8x3>
+    typealias PixelBufferIFx3 = vImage.PixelBuffer<vImage.InterleavedFx3>
+    typealias PixelBufferI8x3 = vImage.PixelBuffer<vImage.Interleaved8x3>
+
+    func toRGBCGImage(_ array: MLShapedArray<Float32>) -> CGImage {
+
+        // array is [N,C,H,W], where C==3
+        let channelCount = array.shape[1]
+        assert(channelCount == 3,
+               "Decoding model output has \(channelCount) channels, expected 3")
+        let height = array.shape[2]
+        let width = array.shape[3]
+
+        // Normalize each channel into a float between 0 and 1.0
+        let floatChannels = (0..<channelCount).map { i in
+
+            // Normalized channel output
+            let cOut = PixelBufferPFx1(width: width, height:height)
+
+            // Reference this channel in the array and normalize
+            array[0][i].withUnsafeShapedBufferPointer { ptr, _, strides in
+                let cIn = PixelBufferPFx1(data: .init(mutating: ptr.baseAddress!),
+                                          width: width, height: height,
+                                          byteCountPerRow: strides[0]*4)
+                // Map [-1.0 1.0] -> [0.0 1.0]
+                cIn.multiply(by: 0.5, preBias: 1.0, postBias: 0.0, destination: cOut)
+            }
+            return cOut
+        }
+
+        // Convert to interleaved and then to UInt8
+        let floatImage = PixelBufferIFx3(planarBuffers: floatChannels)
+        let uint8Image = PixelBufferI8x3(width: width, height: height)
+        floatImage.convert(to:uint8Image) // maps [0.0 1.0] -> [0 255] and clips
+
+        // Convert to uint8x3 to RGB CGImage (no alpha)
+        let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue)
+        let cgImage = uint8Image.makeCGImage(cgImageFormat:
+                .init(bitsPerComponent: 8,
+                      bitsPerPixel: 3*8,
+                      colorSpace: CGColorSpaceCreateDeviceRGB(),
+                      bitmapInfo: bitmapInfo)!)!
+
+        return cgImage
+    }
+}