From 3a90366a560a96b21c636c8086ca42e823bb2254 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 13:03:39 -0700 Subject: [PATCH 01/14] Initial Push w/ Elements from Voice (Not Working) --- .../AbstractVoice.swift | 63 +++++++ .../AbstractVoiceSettings.swift | 147 ++++++++++++++++ .../SpeechSynthesisRequestHandling.swift | 70 ++++++++ .../VideoGenerationSettings.FrameRate.swift | 16 ++ ...deoGenerationSettings.MotionSettings.swift | 23 +++ .../VideoGenerationSettings.Quality.swift | 29 ++++ .../VideoGenerationSettings.Resolution.swift | 163 ++++++++++++++++++ ...ideoGenerationSettings.StyleStrength.swift | 27 +++ .../VideoGenerationSettings.swift | 43 +++++ .../VideoGenerationRequestHandling.swift | 41 +++++ .../VideoModel.swift | 35 ++++ 11 files changed, 657 insertions(+) create mode 100644 Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift create mode 100644 Sources/AI/WIP - Move Somewhere Else/VideoModel.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift b/Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift new file mode 100644 index 00000000..ff7dfc11 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift @@ -0,0 +1,63 @@ +// +// AudioStore.swift +// Voice +// +// Created by Jared Davidson on 10/31/24. +// + +import CorePersistence +import SwiftUI +import AVFoundation +import UniformTypeIdentifiers +import ElevenLabs + +public struct AbstractVoice: Codable, Hashable, Identifiable, Sendable { + public typealias ID = _TypeAssociatedID + + public let id: ID + public let voiceID: String + public let name: String + public let description: String? + + init( + voiceID: String, + name: String, + description: String? + ) { + self.id = .init(rawValue: voiceID) + self.voiceID = voiceID + self.name = name + self.description = description + } +} + +// MARK: - Conformances + +public protocol AbstractVoiceInitiable { + init(voice: AbstractVoice) throws +} + +public protocol AbstractVoiceConvertible { + func __conversion() throws -> AbstractVoice +} + +extension ElevenLabs.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.voiceID, + name: self.name, + description: self.description + ) + } +} + +extension ElevenLabs.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + voiceID: voice.voiceID, + name: voice.name, + description: voice.description, + isOwner: nil + ) + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift b/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift new file mode 100644 index 00000000..76052981 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift @@ -0,0 +1,147 @@ +// +// VoiceStore.swift +// Voice +// +// Created by Jared Davidson on 10/30/24. +// + +import SwiftUIZ +import CorePersistence +import ElevenLabs + +public struct AbstractVoiceSettings: Codable, Sendable, Initiable { + public init() { + self.init(stability: 1.0) + } + + + public enum Setting: String, Codable, Sendable { + case stability + case similarityBoost = "similarity_boost" + case styleExaggeration = "style" + case speakerBoost = "use_speaker_boost" + } + + /// Increasing stability will make the voice more consistent between re-generations, but it can also make it sounds a bit monotone. On longer text fragments it is recommended to lower this value. + /// This is a double between 0 (more variable) and 1 (more stable). + public var stability: Double + + /// Increasing the Similarity Boost setting enhances the overall voice clarity and targets speaker similarity. However, very high values can cause artifacts, so it is recommended to adjust this setting to find the optimal value. + /// This is a double between 0 (Low) and 1 (High). + public var similarityBoost: Double + + /// High values are recommended if the style of the speech should be exaggerated compared to the selected voice. Higher values can lead to more instability in the generated speech. Setting this to 0 will greatly increase generation speed and is the default setting. + public var styleExaggeration: Double + + /// Boost the similarity of the synthesized speech and the voice at the cost of some generation speed. + public var speakerBoost: Bool + + public var removeBackgroundNoise: Bool + + public init(stability: Double, + similarityBoost: Double, + styleExaggeration: Double, + speakerBoost: Bool, + removeBackgroundNoise: Bool) { + self.stability = max(0, min(1, stability)) + self.similarityBoost = max(0, min(1, similarityBoost)) + self.styleExaggeration = max(0, min(1, styleExaggeration)) + self.speakerBoost = speakerBoost + self.removeBackgroundNoise = removeBackgroundNoise + } + + public init(stability: Double? = nil, + similarityBoost: Double? = nil, + styleExaggeration: Double? = nil, + speakerBoost: Bool? = nil, + removeBackgroundNoise: Bool? = nil) { + self.stability = stability.map { max(0, min(1, $0)) } ?? 0.5 + self.similarityBoost = similarityBoost.map { max(0, min(1, $0)) } ?? 0.75 + self.styleExaggeration = styleExaggeration.map { max(0, min(1, $0)) } ?? 0 + self.speakerBoost = speakerBoost ?? true + self.removeBackgroundNoise = removeBackgroundNoise ?? false + } + + public init(stability: Double) { + self.init( + stability: stability, + similarityBoost: 0.75, + styleExaggeration: 0, + speakerBoost: true, + removeBackgroundNoise: false + ) + } + + public init(similarityBoost: Double) { + self.init( + stability: 0.5, + similarityBoost: similarityBoost, + styleExaggeration: 0, + speakerBoost: true, + removeBackgroundNoise: false + ) + } + + public init(styleExaggeration: Double) { + self.init( + stability: 0.5, + similarityBoost: 0.75, + styleExaggeration: styleExaggeration, + speakerBoost: true, + removeBackgroundNoise: false + ) + } + + public init(speakerBoost: Bool) { + self.init( + stability: 0.5, + similarityBoost: 0.75, + styleExaggeration: 0, + speakerBoost: speakerBoost, + removeBackgroundNoise: false + ) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + + try container.encode(stability, forKey: .stability) + try container.encode(similarityBoost, forKey: .similarityBoost) + try container.encode(styleExaggeration, forKey: .styleExaggeration) + try container.encode(speakerBoost, forKey: .speakerBoost) + try container.encode(removeBackgroundNoise, forKey: .removeBackgroundNoise) + } +} + + +public protocol AbstractVoiceSettingsInitiable { + init(settings: AbstractVoiceSettings) throws +} + +public protocol AbstractVoiceSettingsConvertible { + func __conversion() throws -> AbstractVoiceSettings +} + +extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsConvertible { + public func __conversion() throws -> AbstractVoiceSettings { + return .init( + stability: stability, + similarityBoost: similarityBoost, + styleExaggeration: styleExaggeration, + speakerBoost: speakerBoost, + removeBackgroundNoise: removeBackgroundNoise + ) + } +} + +extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsInitiable { + public init(settings: AbstractVoiceSettings) throws { + self.init( + stability: settings.stability, + similarityBoost: settings.similarityBoost, + styleExaggeration: settings.styleExaggeration, + speakerBoost: settings.speakerBoost, + removeBackgroundNoise: settings.removeBackgroundNoise + ) + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..43ac0d9c --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -0,0 +1,70 @@ +// +// SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 10/30/24. +// + +import Foundation +import AI +import ElevenLabs +import PlayHT +import SwiftUI + +public protocol SpeechToSpeechRequest { + +} + +public protocol SpeechToSpeechRequestHandling { + +} + +public protocol SpeechSynthesisRequestHandling { + func availableVoices() async throws -> [ElevenLabs.Voice] + + func speech( + for text: String, + voiceID: String, + voiceSettings: ElevenLabs.VoiceSettings, + model: ElevenLabs.Model + ) async throws -> Data + + func speechToSpeech( + inputAudioURL: URL, + voiceID: String, + voiceSettings: ElevenLabs.VoiceSettings, + model: ElevenLabs.Model + ) async throws -> Data + + func upload( + voiceWithName name: String, + description: String, + fileURL: URL + ) async throws -> ElevenLabs.Voice.ID + + func edit( + voice: ElevenLabs.Voice.ID, + name: String, + description: String, + fileURL: URL? + ) async throws -> Bool + + func delete(voice: ElevenLabs.Voice.ID) async throws +} + +// MARK: - Environment Key + +private struct ElevenLabsClientKey: EnvironmentKey { + static let defaultValue: (any SpeechSynthesisRequestHandling)? = ElevenLabs.Client(apiKey: "") +} + +extension EnvironmentValues { + var speechSynthesizer: (any SpeechSynthesisRequestHandling)? { + get { self[ElevenLabsClientKey.self] } + set { self[ElevenLabsClientKey.self] = newValue } + } +} + +// MARK: - Conformances + +extension ElevenLabs.Client: SpeechSynthesisRequestHandling {} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift new file mode 100644 index 00000000..da61c5dd --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift @@ -0,0 +1,16 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum FrameRate: Int, Codable, CaseIterable { + case fps8 = 8 + case fps16 = 16 + case fps24 = 24 + case fps30 = 30 + + public var fps: Int { rawValue } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift new file mode 100644 index 00000000..addec423 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift @@ -0,0 +1,23 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public struct MotionSettings: Codable, Hashable { + public var stabilize: Bool + public var motionBucketId: Int // 0-127 + public var conditioningAugmentation: Double // 0.01-0.1 + + public init( + stabilize: Bool = true, + motionBucketId: Int = 127, + conditioningAugmentation: Double = 0.02 + ) { + self.stabilize = stabilize + self.motionBucketId = max(0, min(127, motionBucketId)) + self.conditioningAugmentation = max(0.01, min(0.1, conditioningAugmentation)) + } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift new file mode 100644 index 00000000..bc3516a1 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift @@ -0,0 +1,29 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum Quality: String, Codable, CaseIterable { + case draft = "draft" // 20 steps + case fast = "fast" // 30 steps + case balanced = "balanced" // 35 steps + case quality = "quality" // 40 steps + case max = "max" // 50 steps + + var inferenceSteps: Int { + switch self { + case .draft: return 20 + case .fast: return 30 + case .balanced: return 35 + case .quality: return 40 + case .max: return 50 + } + } + + var qualityValue: Double { + Double(inferenceSteps - 20) / 30 + } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift new file mode 100644 index 00000000..a140a046 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift @@ -0,0 +1,163 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum Resolution: Codable, Hashable { + // Square Resolutions + case sd512x512 + case sd768x768 + case sd1024x1024 + + // Landscape HD Resolutions + case hd720p // 1280x720 + case hd1080p // 1920x1080 + case hd1440p // 2560x1440 + case uhd4k // 3840x2160 + + // Social Media Formats + case instagram // 1080x1080 + case story // 1080x1920 + case tiktok // 1080x1920 + case youtube // 1920x1080 + + // Custom Resolution + case custom(width: Int, height: Int) + + public static var allCases: [Resolution] { + [ + .sd512x512, .sd768x768, .sd1024x1024, + .hd720p, .hd1080p, .hd1440p, .uhd4k, + .instagram, .story, .tiktok, .youtube + ] + } + + public var dimensions: (width: Int, height: Int) { + switch self { + // Square Resolutions + case .sd512x512: + return (512, 512) + case .sd768x768: + return (768, 768) + case .sd1024x1024: + return (1024, 1024) + + // Landscape HD Resolutions + case .hd720p: + return (1280, 720) + case .hd1080p: + return (1920, 1080) + case .hd1440p: + return (2560, 1440) + case .uhd4k: + return (3840, 2160) + + // Social Media Formats + case .instagram: + return (1080, 1080) + case .story: + return (1080, 1920) + case .tiktok: + return (1080, 1920) + case .youtube: + return (1920, 1080) + + case .custom(let width, let height): + return (width, height) + } + } + + public var width: Int { dimensions.width } + public var height: Int { dimensions.height } + + public var aspectRatio: String { + let gcd = calculateGCD(width, height) + let simplifiedWidth = width / gcd + let simplifiedHeight = height / gcd + + // Check for common aspect ratios + switch (simplifiedWidth, simplifiedHeight) { + case (1, 1): return "1:1" // Square + case (16, 9): return "16:9" // Standard Widescreen + case (9, 16): return "9:16" // Vertical/Portrait + case (4, 3): return "4:3" // Traditional TV + case (21, 9): return "21:9" // Ultrawide + default: return "\(simplifiedWidth):\(simplifiedHeight)" + } + } + + public var resolution: String { + switch self { + case .uhd4k: + return "4K" + case .hd1440p: + return "1440p" + case .hd1080p, .youtube: + return "1080p" + case .hd720p: + return "720p" + case .instagram, .story, .tiktok: + return "1080p" + case .sd512x512: + return "512p" + case .sd768x768: + return "768p" + case .sd1024x1024: + return "1024p" + case .custom(let width, _): + if width >= 3840 { return "4K" } + if width >= 2560 { return "1440p" } + if width >= 1920 { return "1080p" } + if width >= 1280 { return "720p" } + return "\(width)p" + } + } + + public static func detectResolution(width: Int, height: Int) -> Resolution { + switch (width, height) { + case (512, 512): return .sd512x512 + case (768, 768): return .sd768x768 + case (1024, 1024): return .sd1024x1024 + case (1280, 720): return .hd720p + case (1920, 1080): return .hd1080p + case (2560, 1440): return .hd1440p + case (3840, 2160): return .uhd4k + case (1080, 1080): return .instagram + case (1080, 1920): return .story + default: return .custom(width: width, height: height) + } + } + + private func calculateGCD(_ a: Int, _ b: Int) -> Int { + var a = a + var b = b + while b != 0 { + let temp = b + b = a % b + a = temp + } + + return a + } + + public var displayName: String { + switch self { + case .sd512x512: return "512×512" + case .sd768x768: return "768×768" + case .sd1024x1024: return "1024×1024" + case .hd720p: return "HD 720p" + case .hd1080p: return "Full HD 1080p" + case .hd1440p: return "QHD 1440p" + case .uhd4k: return "4K UHD" + case .instagram: return "Instagram Square" + case .story: return "Instagram/TikTok Story" + case .tiktok: return "TikTok Video" + case .youtube: return "YouTube HD" + case .custom(let width, let height): + return "\(width)×\(height)" + } + } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift new file mode 100644 index 00000000..6a75ff9b --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift @@ -0,0 +1,27 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum StyleStrength: String, Codable, CaseIterable { + case subtle = "subtle" // 1-5 + case balanced = "balanced" // 5-10 + case strong = "strong" // 10-15 + case extreme = "extreme" // 15-20 + + var guidanceScale: Double { + switch self { + case .subtle: return 3.0 + case .balanced: return 7.5 + case .strong: return 12.5 + case .extreme: return 17.5 + } + } + + var strengthValue: Double { + (guidanceScale - 1) / 19 + } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift new file mode 100644 index 00000000..1c8d7314 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift @@ -0,0 +1,43 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +public struct VideoGenerationSettings: Codable, Hashable { + /// Duration of the generated video in seconds (1-60) + public var duration: Double { + didSet { + duration = max(1, min(60, duration)) + } + } + + public var resolution: Resolution + public var frameRate: FrameRate + public var quality: Quality + public var styleStrength: StyleStrength + public var motion: MotionSettings + public var negativePrompt: String + + public var fps: Int { frameRate.fps } + public var numInferenceSteps: Int { quality.inferenceSteps } + public var guidanceScale: Double { styleStrength.guidanceScale } + + public init( + duration: Double = 10.0, + resolution: Resolution = .sd512x512, + frameRate: FrameRate = .fps24, + quality: Quality = .balanced, + styleStrength: StyleStrength = .balanced, + motion: MotionSettings = MotionSettings(), + negativePrompt: String = "" + ) { + self.duration = max(1, min(60, duration)) + self.resolution = resolution + self.frameRate = frameRate + self.quality = quality + self.styleStrength = styleStrength + self.motion = motion + self.negativePrompt = negativePrompt + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift new file mode 100644 index 00000000..f8ea8566 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift @@ -0,0 +1,41 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import AVFoundation +import Foundation +import SwiftUI + +public protocol VideoGenerationRequestHandling { + func availableModels() async throws -> [VideoModel] + + func textToVideo( + text: String, + model: VideoModel, + settings: VideoGenerationSettings + ) async throws -> Data + + func imageToVideo( + imageURL: URL, + model: VideoModel, + settings: VideoGenerationSettings + ) async throws -> Data + + func videoToVideo( + videoURL: URL, + prompt: String, + model: VideoModel, + settings: VideoGenerationSettings + ) async throws -> Data +} + +private struct VideoGeneratorKey: EnvironmentKey { + static let defaultValue: (any VideoGenerationRequestHandling)? = DummyVideoGenerator() +} + +extension EnvironmentValues { + var videoClient: (any VideoGenerationRequestHandling)? { + get { self[VideoGeneratorKey.self] } + set { self[VideoGeneratorKey.self] = newValue } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoModel.swift b/Sources/AI/WIP - Move Somewhere Else/VideoModel.swift new file mode 100644 index 00000000..ed63bae8 --- /dev/null +++ b/Sources/AI/WIP - Move Somewhere Else/VideoModel.swift @@ -0,0 +1,35 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import CorePersistence +import Foundation + +public struct VideoModel: Codable, Hashable, Identifiable { + public typealias ID = _TypeAssociatedID + + public let id: ID + public let endpoint: String + public let name: String + public let description: String? + public let capabilities: [Capability] + + public enum Capability: String, Codable { + case textToVideo + case imageToVideo + case videoToVideo + } + + public init( + endpoint: String, + name: String, + description: String?, + capabilities: [Capability] + ) { + self.id = .random() + self.endpoint = endpoint + self.name = name + self.description = description + self.capabilities = capabilities + } +} From 7541d4d644f1f86ae9ebfa933f274c74f2a4cce3 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 13:32:50 -0700 Subject: [PATCH 02/14] Building again --- .../AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift | 2 +- .../SpeechSynthesisRequestHandling.swift | 2 +- .../VideoGenerationSettings.Quality.swift | 4 ++-- .../VideoGenerationSettings.StyleStrength.swift | 4 ++-- .../Video Generation Setttings/VideoGenerationSettings.swift | 2 +- .../VideoGenerationRequestHandling.swift | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift b/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift index 76052981..d6f87dc5 100644 --- a/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift +++ b/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift @@ -9,7 +9,7 @@ import SwiftUIZ import CorePersistence import ElevenLabs -public struct AbstractVoiceSettings: Codable, Sendable, Initiable { +public struct AbstractVoiceSettings: Codable, Sendable, Initiable, Equatable { public init() { self.init(stability: 1.0) } diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift index 43ac0d9c..d6e9f590 100644 --- a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -59,7 +59,7 @@ private struct ElevenLabsClientKey: EnvironmentKey { } extension EnvironmentValues { - var speechSynthesizer: (any SpeechSynthesisRequestHandling)? { + public var speechSynthesizer: (any SpeechSynthesisRequestHandling)? { get { self[ElevenLabsClientKey.self] } set { self[ElevenLabsClientKey.self] = newValue } } diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift index bc3516a1..5ce0de27 100644 --- a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift @@ -12,7 +12,7 @@ extension VideoGenerationSettings { case quality = "quality" // 40 steps case max = "max" // 50 steps - var inferenceSteps: Int { + public var inferenceSteps: Int { switch self { case .draft: return 20 case .fast: return 30 @@ -22,7 +22,7 @@ extension VideoGenerationSettings { } } - var qualityValue: Double { + public var qualityValue: Double { Double(inferenceSteps - 20) / 30 } } diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift index 6a75ff9b..1fdc10af 100644 --- a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift @@ -11,7 +11,7 @@ extension VideoGenerationSettings { case strong = "strong" // 10-15 case extreme = "extreme" // 15-20 - var guidanceScale: Double { + public var guidanceScale: Double { switch self { case .subtle: return 3.0 case .balanced: return 7.5 @@ -20,7 +20,7 @@ extension VideoGenerationSettings { } } - var strengthValue: Double { + public var strengthValue: Double { (guidanceScale - 1) / 19 } } diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift index 1c8d7314..81a72cfb 100644 --- a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift +++ b/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift @@ -4,7 +4,7 @@ import Foundation -public struct VideoGenerationSettings: Codable, Hashable { +public struct VideoGenerationSettings: Codable, Hashable, Equatable { /// Duration of the generated video in seconds (1-60) public var duration: Double { didSet { diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift index f8ea8566..9a6ed8e8 100644 --- a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift @@ -30,11 +30,11 @@ public protocol VideoGenerationRequestHandling { } private struct VideoGeneratorKey: EnvironmentKey { - static let defaultValue: (any VideoGenerationRequestHandling)? = DummyVideoGenerator() + public static let defaultValue: (any VideoGenerationRequestHandling)? = nil } extension EnvironmentValues { - var videoClient: (any VideoGenerationRequestHandling)? { + public var videoClient: (any VideoGenerationRequestHandling)? { get { self[VideoGeneratorKey.self] } set { self[VideoGeneratorKey.self] = newValue } } From 9497758347534aadcba256c68f720263f45383bf Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 17:29:29 -0700 Subject: [PATCH 03/14] Working --- .../SpeechSynthesisRequestHandling.swift | 30 ++++++++++++++++++- .../VideoGenerationRequestHandling.swift | 28 +++++++++++++++++ .../API/ElevenLabs.APISpecification.swift | 3 -- .../Intramodular/ElevenLabs.Client.swift | 25 +++++++++++++++- 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift index d6e9f590..54f9e853 100644 --- a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -19,7 +19,7 @@ public protocol SpeechToSpeechRequestHandling { } -public protocol SpeechSynthesisRequestHandling { +public protocol SpeechSynthesisRequestHandling: AnyObject { func availableVoices() async throws -> [ElevenLabs.Voice] func speech( @@ -68,3 +68,31 @@ extension EnvironmentValues { // MARK: - Conformances extension ElevenLabs.Client: SpeechSynthesisRequestHandling {} + + +public struct AnySpeechSynthesisRequestHandling: Hashable { + private let _service: any CoreMI._ServiceClientProtocol + private let _base: any SpeechSynthesisRequestHandling + private let _hashValue: Int + + public init( + _ base: any SpeechSynthesisRequestHandling, + service: any CoreMI._ServiceClientProtocol + ) { + self._base = base + self._hashValue = ObjectIdentifier(base as AnyObject).hashValue + self._service = service + } + + public static func == (lhs: AnySpeechSynthesisRequestHandling, rhs: AnySpeechSynthesisRequestHandling) -> Bool { + lhs._hashValue == rhs._hashValue + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(_hashValue) + } + + public func base() -> any SpeechSynthesisRequestHandling { + _base + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift index 9a6ed8e8..8d975397 100644 --- a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift @@ -5,6 +5,7 @@ import AVFoundation import Foundation import SwiftUI +import LargeLanguageModels public protocol VideoGenerationRequestHandling { func availableModels() async throws -> [VideoModel] @@ -39,3 +40,30 @@ extension EnvironmentValues { set { self[VideoGeneratorKey.self] = newValue } } } + +public struct AnyVideoGenerationRequestHandling: Hashable { + private let _service: any CoreMI._ServiceClientProtocol + private let _base: any VideoGenerationRequestHandling + private let _hashValue: Int + + public init( + _ base: any VideoGenerationRequestHandling, + service: any CoreMI._ServiceClientProtocol + ) { + self._base = base + self._hashValue = ObjectIdentifier(base as AnyObject).hashValue + self._service = service + } + + public static func == (lhs: AnyVideoGenerationRequestHandling, rhs: AnyVideoGenerationRequestHandling) -> Bool { + lhs._hashValue == rhs._hashValue + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(_hashValue) + } + + public func base() -> any VideoGenerationRequestHandling { + _base + } +} diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift index fa442c34..656255b8 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift @@ -128,9 +128,6 @@ extension ElevenLabs.APISpecification { context: DecodeOutputContext ) throws -> Output { do { - if Input.self == RequestBodies.EditVoiceInput.self { - print("TEsts") - } try response.validate() } catch { let apiError: Error diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift index 2093a3ea..6fbc4922 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift @@ -10,10 +10,15 @@ import SwiftAPI import Merge import FoundationX import Swallow +import LargeLanguageModels extension ElevenLabs { @RuntimeDiscoverable public final class Client: SwiftAPI.Client, ObservableObject { + public static var persistentTypeRepresentation: some IdentityRepresentation { + CoreMI._ServiceVendorIdentifier._ElevenLabs + } + public typealias API = ElevenLabs.APISpecification public typealias Session = HTTPSession @@ -33,6 +38,25 @@ extension ElevenLabs { } } +extension ElevenLabs.Client: CoreMI._ServiceClientProtocol { + public convenience init( + account: (any CoreMI._ServiceAccountProtocol)? + ) async throws { + let account: any CoreMI._ServiceAccountProtocol = try account.unwrap() + let serviceVendorIdentifier: CoreMI._ServiceVendorIdentifier = try account.serviceVendorIdentifier.unwrap() + + guard serviceVendorIdentifier == CoreMI._ServiceVendorIdentifier._ElevenLabs else { + throw CoreMI._ServiceClientError.incompatibleVendor(serviceVendorIdentifier) + } + + guard let credential = try account.credential as? CoreMI._ServiceCredentialTypes.APIKeyCredential else { + throw CoreMI._ServiceClientError.invalidCredential(try account.credential) + } + + self.init(apiKey: credential.apiKey) + } +} + extension ElevenLabs.Client { public func availableVoices() async throws -> [ElevenLabs.Voice] { try await run(\.listVoices).voices @@ -50,7 +74,6 @@ extension ElevenLabs.Client { voiceSettings: voiceSettings, model: model ) - return try await run(\.textToSpeech, with: .init(voiceId: voiceID, requestBody: requestBody)) } From fc6e2725c9679513e5665856498d9856dbbf3e00 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 19:11:01 -0700 Subject: [PATCH 04/14] Fixes --- .../SpeechSynthesisRequestHandling.swift | 23 +++++++++++-------- .../VideoGenerationRequestHandling.swift | 22 ++++++++++-------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift index 54f9e853..725b9e86 100644 --- a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -71,17 +71,24 @@ extension ElevenLabs.Client: SpeechSynthesisRequestHandling {} public struct AnySpeechSynthesisRequestHandling: Hashable { - private let _service: any CoreMI._ServiceClientProtocol - private let _base: any SpeechSynthesisRequestHandling private let _hashValue: Int + public let base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling + + public var displayName: String { + switch base { + case is ElevenLabs.Client: + return "ElevenLabs" + default: + fatalError() + } + } + public init( - _ base: any SpeechSynthesisRequestHandling, - service: any CoreMI._ServiceClientProtocol + _ base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling ) { - self._base = base + self.base = base self._hashValue = ObjectIdentifier(base as AnyObject).hashValue - self._service = service } public static func == (lhs: AnySpeechSynthesisRequestHandling, rhs: AnySpeechSynthesisRequestHandling) -> Bool { @@ -91,8 +98,4 @@ public struct AnySpeechSynthesisRequestHandling: Hashable { public func hash(into hasher: inout Hasher) { hasher.combine(_hashValue) } - - public func base() -> any SpeechSynthesisRequestHandling { - _base - } } diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift index 8d975397..d9f9ab1c 100644 --- a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift @@ -42,17 +42,23 @@ extension EnvironmentValues { } public struct AnyVideoGenerationRequestHandling: Hashable { - private let _service: any CoreMI._ServiceClientProtocol - private let _base: any VideoGenerationRequestHandling + public let base: any CoreMI._ServiceClientProtocol & VideoGenerationRequestHandling private let _hashValue: Int + +// var displayName: String { +// switch base { +// case is FalVideoGenerationRequestHandling: +// return "Fal" +// default: +// fatalError() +// } +// } public init( - _ base: any VideoGenerationRequestHandling, - service: any CoreMI._ServiceClientProtocol + _ base: any CoreMI._ServiceClientProtocol & VideoGenerationRequestHandling ) { - self._base = base + self.base = base self._hashValue = ObjectIdentifier(base as AnyObject).hashValue - self._service = service } public static func == (lhs: AnyVideoGenerationRequestHandling, rhs: AnyVideoGenerationRequestHandling) -> Bool { @@ -62,8 +68,4 @@ public struct AnyVideoGenerationRequestHandling: Hashable { public func hash(into hasher: inout Hasher) { hasher.combine(_hashValue) } - - public func base() -> any VideoGenerationRequestHandling { - _base - } } From fbc2f1062a265cbea60e5e9c35ae258ebc87f8f7 Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Mon, 13 Jan 2025 16:11:30 +0530 Subject: [PATCH 05/14] Update --- .../SpeechSynthesisRequestHandling.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift index 725b9e86..1de670e4 100644 --- a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -8,7 +8,6 @@ import Foundation import AI import ElevenLabs -import PlayHT import SwiftUI public protocol SpeechToSpeechRequest { From f74b0f59ef655540d6f6136dea8872455f758d42 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 16:11:00 -0700 Subject: [PATCH 06/14] Move files to LargeLanguageModels. Added conformances to individual structs. --- Package.swift | 2 +- .../AnySpeechSynthesisRequestHandling.swift | 39 +++++++ .../SpeechSynthesisRequestHandling.swift | 100 ------------------ .../Intramodular/ElevenLabs.Client.swift | 53 ++++++++++ .../Intramodular/ElevenLabs.Voice.swift | 22 ++++ .../ElevenLabs.VoiceSettings.swift | 27 +++++ .../AbstractVoice.swift | 24 +---- .../AbstractVoiceSettings.swift | 25 ----- .../SpeechSynthesisRequestHandling.swift | 63 +++++++++++ .../VideoGenerationSettings.FrameRate.swift | 0 ...deoGenerationSettings.MotionSettings.swift | 0 .../VideoGenerationSettings.Quality.swift | 0 .../VideoGenerationSettings.Resolution.swift | 0 ...ideoGenerationSettings.StyleStrength.swift | 0 .../VideoGenerationSettings.swift | 0 .../VideoGenerationRequestHandling.swift | 0 .../VideoModel.swift | 0 .../Intramodular/Models/PlayHT.Voice.swift | 57 +++++++++- .../Rime/Intramodular/Models/Rime.Voice.swift | 47 ++++++++ 19 files changed, 309 insertions(+), 150 deletions(-) create mode 100644 Sources/AI/AnySpeechSynthesisRequestHandling.swift delete mode 100644 Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/AbstractVoice.swift (60%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/AbstractVoiceSettings.swift (83%) create mode 100644 Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift (100%) rename Sources/{AI => LargeLanguageModels/Intramodular}/WIP - Move Somewhere Else/VideoModel.swift (100%) diff --git a/Package.swift b/Package.swift index a374aaf0..f731cfd7 100644 --- a/Package.swift +++ b/Package.swift @@ -115,7 +115,7 @@ let package = Package( "Merge", "NetworkKit", "Swallow", - "SwiftUIX", + "SwiftUIX" ], path: "Sources/LargeLanguageModels", resources: [ diff --git a/Sources/AI/AnySpeechSynthesisRequestHandling.swift b/Sources/AI/AnySpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..18dce27e --- /dev/null +++ b/Sources/AI/AnySpeechSynthesisRequestHandling.swift @@ -0,0 +1,39 @@ +// +// AnySpeechSynthesisRequestHandling.swift +// AI +// +// Created by Jared Davidson on 1/14/25. +// + +import ElevenLabs +import LargeLanguageModels + +public struct AnySpeechSynthesisRequestHandling: Hashable { + private let _hashValue: Int + + public let base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling + + public var displayName: String { + switch base { + case is ElevenLabs.Client: + return "ElevenLabs" + default: + fatalError() + } + } + + public init( + _ base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling + ) { + self.base = base + self._hashValue = ObjectIdentifier(base as AnyObject).hashValue + } + + public static func == (lhs: AnySpeechSynthesisRequestHandling, rhs: AnySpeechSynthesisRequestHandling) -> Bool { + lhs._hashValue == rhs._hashValue + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(_hashValue) + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift deleted file mode 100644 index 1de670e4..00000000 --- a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift +++ /dev/null @@ -1,100 +0,0 @@ -// -// SpeechSynthesisRequestHandling.swift -// Voice -// -// Created by Jared Davidson on 10/30/24. -// - -import Foundation -import AI -import ElevenLabs -import SwiftUI - -public protocol SpeechToSpeechRequest { - -} - -public protocol SpeechToSpeechRequestHandling { - -} - -public protocol SpeechSynthesisRequestHandling: AnyObject { - func availableVoices() async throws -> [ElevenLabs.Voice] - - func speech( - for text: String, - voiceID: String, - voiceSettings: ElevenLabs.VoiceSettings, - model: ElevenLabs.Model - ) async throws -> Data - - func speechToSpeech( - inputAudioURL: URL, - voiceID: String, - voiceSettings: ElevenLabs.VoiceSettings, - model: ElevenLabs.Model - ) async throws -> Data - - func upload( - voiceWithName name: String, - description: String, - fileURL: URL - ) async throws -> ElevenLabs.Voice.ID - - func edit( - voice: ElevenLabs.Voice.ID, - name: String, - description: String, - fileURL: URL? - ) async throws -> Bool - - func delete(voice: ElevenLabs.Voice.ID) async throws -} - -// MARK: - Environment Key - -private struct ElevenLabsClientKey: EnvironmentKey { - static let defaultValue: (any SpeechSynthesisRequestHandling)? = ElevenLabs.Client(apiKey: "") -} - -extension EnvironmentValues { - public var speechSynthesizer: (any SpeechSynthesisRequestHandling)? { - get { self[ElevenLabsClientKey.self] } - set { self[ElevenLabsClientKey.self] = newValue } - } -} - -// MARK: - Conformances - -extension ElevenLabs.Client: SpeechSynthesisRequestHandling {} - - -public struct AnySpeechSynthesisRequestHandling: Hashable { - private let _hashValue: Int - - public let base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling - - public var displayName: String { - switch base { - case is ElevenLabs.Client: - return "ElevenLabs" - default: - fatalError() - } - } - - public init( - _ base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling - ) { - self.base = base - self._hashValue = ObjectIdentifier(base as AnyObject).hashValue - } - - public static func == (lhs: AnySpeechSynthesisRequestHandling, rhs: AnySpeechSynthesisRequestHandling) -> Bool { - lhs._hashValue == rhs._hashValue - } - - public func hash(into hasher: inout Hasher) { - hasher.combine(_hashValue) - } -} diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift index 6fbc4922..2d7a03b6 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift @@ -130,3 +130,56 @@ extension ElevenLabs.Client { try await run(\.deleteVoice, with: voice.rawValue) } } + +// MARK: - Conformances + +extension ElevenLabs.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await self.availableVoices().map({try $0.__conversion()}) + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + try await self.speech( + for: text, + voiceID: voiceID, + voiceSettings: .init(settings: voiceSettings), + model: .init(rawValue: model) ?? .MultilingualV1 + ) + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + try await self.speechToSpeech( + inputAudioURL: inputAudioURL, + voiceID: voiceID, + voiceSettings: .init(settings: voiceSettings), + model: .init(rawValue: model) ?? .MultilingualV1 + ) + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + let voice: ElevenLabs.Voice.ID = try await self.upload( + voiceWithName: name, + description: description, + fileURL: fileURL + ) + + return .init(rawValue: voice.rawValue) + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + try await self.edit( + voice: ElevenLabs.Voice.ID(rawValue: voice.rawValue), + name: name, + description: description, + fileURL: fileURL + ) + } + + public func delete(voice: AbstractVoice.ID) async throws { + try await self.delete( + voice: ElevenLabs.Voice.ID( + rawValue: voice.rawValue + ) + ) + } +} diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift index 3a54532f..dbe29d63 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift @@ -4,6 +4,7 @@ import Foundation import Swift +import LargeLanguageModels extension ElevenLabs { public struct Voice: Hashable, Identifiable, Sendable { @@ -42,3 +43,24 @@ extension ElevenLabs.Voice: Codable { case isOwner } } + +extension ElevenLabs.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.voiceID, + name: self.name, + description: self.description + ) + } +} + +extension ElevenLabs.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + voiceID: voice.voiceID, + name: voice.name, + description: voice.description, + isOwner: nil + ) + } +} diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift index 1ffb7947..f0a6b825 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift @@ -3,6 +3,7 @@ // import Foundation +import LargeLanguageModels extension ElevenLabs { public struct VoiceSettings: Codable, Sendable, Hashable { @@ -98,3 +99,29 @@ extension ElevenLabs.VoiceSettings { ) } } + +// MARK: - Conformances + +extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsConvertible { + public func __conversion() throws -> AbstractVoiceSettings { + return .init( + stability: stability, + similarityBoost: similarityBoost, + styleExaggeration: styleExaggeration, + speakerBoost: speakerBoost, + removeBackgroundNoise: removeBackgroundNoise + ) + } +} + +extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsInitiable { + public init(settings: AbstractVoiceSettings) throws { + self.init( + stability: settings.stability, + similarityBoost: settings.similarityBoost, + styleExaggeration: settings.styleExaggeration, + speakerBoost: settings.speakerBoost, + removeBackgroundNoise: settings.removeBackgroundNoise + ) + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift similarity index 60% rename from Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift index ff7dfc11..80de4232 100644 --- a/Sources/AI/WIP - Move Somewhere Else/AbstractVoice.swift +++ b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift @@ -9,7 +9,6 @@ import CorePersistence import SwiftUI import AVFoundation import UniformTypeIdentifiers -import ElevenLabs public struct AbstractVoice: Codable, Hashable, Identifiable, Sendable { public typealias ID = _TypeAssociatedID @@ -19,7 +18,7 @@ public struct AbstractVoice: Codable, Hashable, Identifiable, Sendable { public let name: String public let description: String? - init( + public init( voiceID: String, name: String, description: String? @@ -40,24 +39,3 @@ public protocol AbstractVoiceInitiable { public protocol AbstractVoiceConvertible { func __conversion() throws -> AbstractVoice } - -extension ElevenLabs.Voice: AbstractVoiceConvertible { - public func __conversion() throws -> AbstractVoice { - return AbstractVoice( - voiceID: self.voiceID, - name: self.name, - description: self.description - ) - } -} - -extension ElevenLabs.Voice: AbstractVoiceInitiable { - public init(voice: AbstractVoice) throws { - self.init( - voiceID: voice.voiceID, - name: voice.name, - description: voice.description, - isOwner: nil - ) - } -} diff --git a/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift similarity index 83% rename from Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift index d6f87dc5..34fd03b6 100644 --- a/Sources/AI/WIP - Move Somewhere Else/AbstractVoiceSettings.swift +++ b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift @@ -7,7 +7,6 @@ import SwiftUIZ import CorePersistence -import ElevenLabs public struct AbstractVoiceSettings: Codable, Sendable, Initiable, Equatable { public init() { @@ -121,27 +120,3 @@ public protocol AbstractVoiceSettingsInitiable { public protocol AbstractVoiceSettingsConvertible { func __conversion() throws -> AbstractVoiceSettings } - -extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsConvertible { - public func __conversion() throws -> AbstractVoiceSettings { - return .init( - stability: stability, - similarityBoost: similarityBoost, - styleExaggeration: styleExaggeration, - speakerBoost: speakerBoost, - removeBackgroundNoise: removeBackgroundNoise - ) - } -} - -extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsInitiable { - public init(settings: AbstractVoiceSettings) throws { - self.init( - stability: settings.stability, - similarityBoost: settings.similarityBoost, - styleExaggeration: settings.styleExaggeration, - speakerBoost: settings.speakerBoost, - removeBackgroundNoise: settings.removeBackgroundNoise - ) - } -} diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..e8bec05c --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -0,0 +1,63 @@ +// +// SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 10/30/24. +// + +import Foundation +import SwiftUI + +public protocol SpeechToSpeechRequest { + +} + +public protocol SpeechToSpeechRequestHandling { + +} + +public protocol SpeechSynthesisRequestHandling: AnyObject { + func availableVoices() async throws -> [AbstractVoice] + + func speech( + for text: String, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data + + func speechToSpeech( + inputAudioURL: URL, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data + + func upload( + voiceWithName name: String, + description: String, + fileURL: URL + ) async throws -> AbstractVoice.ID + + func edit( + voice: AbstractVoice.ID, + name: String, + description: String, + fileURL: URL? + ) async throws -> Bool + + func delete(voice: AbstractVoice.ID) async throws +} + +// MARK: - Environment Key + +private struct AbstractClientKey: EnvironmentKey { + static let defaultValue: (any SpeechSynthesisRequestHandling)? = nil +} + +extension EnvironmentValues { + public var speechSynthesizer: (any SpeechSynthesisRequestHandling)? { + get { self[AbstractClientKey.self] } + set { self[AbstractClientKey.self] = newValue } + } +} diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift diff --git a/Sources/AI/WIP - Move Somewhere Else/VideoModel.swift b/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoModel.swift similarity index 100% rename from Sources/AI/WIP - Move Somewhere Else/VideoModel.swift rename to Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoModel.swift diff --git a/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift b/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift index 3ac8907f..7ee76f26 100644 --- a/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift +++ b/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift @@ -7,6 +7,7 @@ import Foundation import Swallow +import LargeLanguageModels extension PlayHT { public struct Voice: Codable, Hashable, Identifiable { @@ -16,7 +17,7 @@ extension PlayHT { public let name: String public let language: String? public let languageCode: String? - public let voiceEngine: String + public let voiceEngine: String? public let isCloned: Bool? public let gender: String? public let accent: String? @@ -26,6 +27,39 @@ extension PlayHT { public let texture: String? public let loudness: String? public let tempo: String? + + + init( + id: ID, + name: String, + language: String? = nil, + languageCode: String? = nil, + voiceEngine: String? = nil, + isCloned: Bool? = nil, + gender: String? = nil, + accent: String? = nil, + age: String? = nil, + style: String? = nil, + sample: String? = nil, + texture: String? = nil, + loudness: String? = nil, + tempo: String? = nil + ) { + self.id = id + self.name = name + self.language = language + self.languageCode = languageCode + self.voiceEngine = voiceEngine + self.isCloned = isCloned + self.gender = gender + self.accent = accent + self.age = age + self.style = style + self.sample = sample + self.texture = texture + self.loudness = loudness + self.tempo = tempo + } private enum CodingKeys: String, CodingKey { case id, name, language, languageCode, voiceEngine, isCloned @@ -72,3 +106,24 @@ extension PlayHT { case flac = "flac" } } + +// MARK: - Conformances + +extension PlayHT.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id.rawValue, + name: self.name, + description: nil + ) + } +} + +extension PlayHT.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + id: .init(rawValue: voice.id.rawValue), + name: voice.name + ) + } +} diff --git a/Sources/Rime/Intramodular/Models/Rime.Voice.swift b/Sources/Rime/Intramodular/Models/Rime.Voice.swift index 1a341b41..459a19ba 100644 --- a/Sources/Rime/Intramodular/Models/Rime.Voice.swift +++ b/Sources/Rime/Intramodular/Models/Rime.Voice.swift @@ -6,10 +6,32 @@ // import Foundation +import CorePersistence import Swallow +import LargeLanguageModels extension Rime { public struct Voice: Hashable { + public typealias ID = _TypeAssociatedID + + public init( + name: String, + age: String?, + country: String?, + region: String?, + demographic: String?, + genre: [String]? + ) { + self.id = .init(rawValue: UUID().uuidString) + self.name = name + self.age = age + self.country = country + self.region = region + self.demographic = demographic + self.genre = genre + } + + public let id: ID public let name: String public let age: String? public let country: String? @@ -42,5 +64,30 @@ extension Rime.Voice: Codable { self.region = try container.decodeIfPresent(String.self, forKey: Rime.Voice.CodingKeys.region) self.demographic = try container.decodeIfPresent(String.self, forKey: Rime.Voice.CodingKeys.demographic) self.genre = try container.decodeIfPresent([String].self, forKey: Rime.Voice.CodingKeys.genre) + + self.id = .init(rawValue: UUID().uuidString) + } +} + +extension Rime.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + name: voice.name, + age: nil, + country: nil, + region: nil, + demographic: nil, + genre: nil + ) + } +} + +extension Rime.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id.rawValue, + name: self.name, + description: nil + ) } } From 114db0bd655f650447ad7841aabe89d9aa2229a7 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 17:08:08 -0700 Subject: [PATCH 07/14] Moved protocol conformances --- ...lient+SpeechSynthesisRequestHandling.swift | 44 +++++++++++ .../AbstractVoice.swift | 0 .../AbstractVoiceSettings.swift | 0 .../SpeechSynthesisRequestHandling.swift | 0 .../VideoGenerationRequestHandling.swift | 1 - .../VideoGenerationSettings.FrameRate.swift | 0 ...deoGenerationSettings.MotionSettings.swift | 0 .../VideoGenerationSettings.Quality.swift | 0 .../VideoGenerationSettings.Resolution.swift | 0 ...ideoGenerationSettings.StyleStrength.swift | 0 .../VideoGenerationSettings.swift | 0 .../VideoModel.swift | 0 .../Intramodular/Models/NeetsAI.Voice.swift | 22 ++++++ ...lient+SpeechSynthesisRequestHandling.swift | 41 ++++++++++ ...lient+SpeechSynthesisRequestHandling.swift | 56 +++++++++++++ .../PlayHT/Intramodular/PlayHT.Client.swift | 6 +- Sources/PlayHT/Intramodular/URL++.swift | 79 +++++++++++++++++++ ...lient+SpeechSynthesisRequestHandling.swift | 55 +++++++++++++ 18 files changed, 300 insertions(+), 4 deletions(-) create mode 100644 Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => AbstractVoice (WIP)}/AbstractVoice.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => AbstractVoice (WIP)}/AbstractVoiceSettings.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => AbstractVoice (WIP)}/SpeechSynthesisRequestHandling.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => VideoGeneration (WIP)}/VideoGenerationRequestHandling.swift (98%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.FrameRate.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.MotionSettings.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.Quality.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.Resolution.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.StyleStrength.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => VideoGeneration (WIP)}/VideoModel.swift (100%) create mode 100644 Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift create mode 100644 Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift create mode 100644 Sources/PlayHT/Intramodular/URL++.swift create mode 100644 Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift diff --git a/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..cd5a4e8f --- /dev/null +++ b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,44 @@ +// +// HumeAI+ElevenLabsClientProtocol.swift +// Voice +// +// Created by Jared Davidson on 11/22/24. +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension HumeAI.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoices().map( + { voice in + return AbstractVoice( + voiceID: voice.id, + name: voice.name, + description: nil + ) + }) + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw HumeAI.APIError.unknown(message: "Text to speech not supported") + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw HumeAI.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } + + public func delete(voice: AbstractVoice.ID) async throws { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift rename to Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift rename to Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift rename to Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift similarity index 98% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift index d9f9ab1c..bc82693e 100644 --- a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift @@ -5,7 +5,6 @@ import AVFoundation import Foundation import SwiftUI -import LargeLanguageModels public protocol VideoGenerationRequestHandling { func availableModels() async throws -> [VideoModel] diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoModel.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoModel.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift diff --git a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift index 2f035154..4422720b 100644 --- a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift +++ b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift @@ -6,6 +6,7 @@ // import Foundation +import LargeLanguageModels extension NeetsAI { public struct Voice: Codable, Hashable { @@ -15,3 +16,24 @@ extension NeetsAI { public let supportedModels: [String] } } + +extension NeetsAI.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id, + name: self.title ?? "", + description: self.aliasOf + ) + } +} + +extension NeetsAI.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + id: voice.voiceID, + title: voice.name, + aliasOf: voice.description, + supportedModels: [] + ) + } +} diff --git a/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..3fa5844b --- /dev/null +++ b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,41 @@ +// +// NeetsAI.Client+SpeechSynthesisRequestHandling.swift +// Voice +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension NeetsAI.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoices().map( { try $0.__conversion() } ) + } + + public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + let audio = try await generateSpeech( + text: text, + voiceId: voiceID, + model: .init(rawValue: model) ?? .mistralai + ) + return audio + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + throw NeetsAI.APIError.unknown(message: "Speech to speech not supported") + + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> LargeLanguageModels.AbstractVoice.ID { + throw NeetsAI.APIError.unknown(message: "Uploading Voice is not supported") + } + + public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw NeetsAI.APIError.unknown(message: "Editing Voice is not supported") + } + + public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { + throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported") + } +} diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..c987b479 --- /dev/null +++ b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,56 @@ +// +// PlayHT+SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 11/20/24. +// + +import Foundation +import AI +import ElevenLabs +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension PlayHT.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + let voices: [AbstractVoice] = try await getAllAvailableVoices().map { try $0.__conversion() } + return voices + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + let data: Data = try await streamTextToSpeech( + text: text, + voice: voiceID, + settings: .init(), + model: .playHT2Turbo + ) + + return data + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + throw PlayHT.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + let mp4URL = try await fileURL.convertAudioToMP4() + let fileURLString = mp4URL.absoluteString + let voiceID = try await instantCloneVoice( + sampleFileURL: fileURLString, + name: name + ) + + try? FileManager.default.removeItem(at: mp4URL) + + return .init(rawValue: voiceID.rawValue) + } + + public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw PlayHT.APIError.unknown(message: "Voice editing not supported") + } + + public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { + try await deleteClonedVoice(voice: .init(rawValue: voice.rawValue)) + } +} diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client.swift b/Sources/PlayHT/Intramodular/PlayHT.Client.swift index 66e6e80f..eb63bfa8 100644 --- a/Sources/PlayHT/Intramodular/PlayHT.Client.swift +++ b/Sources/PlayHT/Intramodular/PlayHT.Client.swift @@ -59,14 +59,14 @@ extension PlayHT.Client: CoreMI._ServiceClientProtocol { extension PlayHT.Client { public func getAllAvailableVoices() async throws -> [PlayHT.Voice] { - async let htVoices = availableVoices() - async let clonedVoices = clonedVoices() + async let htVoices = self.getAvailableVoices() + async let clonedVoices = self.clonedVoices() let (available, cloned) = try await (htVoices, clonedVoices) return available + cloned } - public func availableVoices() async throws -> [PlayHT.Voice] { + public func getAvailableVoices() async throws -> [PlayHT.Voice] { try await run(\.listVoices).voices } diff --git a/Sources/PlayHT/Intramodular/URL++.swift b/Sources/PlayHT/Intramodular/URL++.swift new file mode 100644 index 00000000..f584da1f --- /dev/null +++ b/Sources/PlayHT/Intramodular/URL++.swift @@ -0,0 +1,79 @@ +// +// URL++.swift +// AI +// +// Created by Jared Davidson on 1/14/25. +// + +import AVFoundation +import AudioToolbox + +// FIXME: - This needs to be moved somewhere else (@archetapp) + +extension URL { + func convertAudioToMP4() async throws -> URL { + let outputURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("mp4") + + let asset = AVURLAsset(url: self) + + let composition = AVMutableComposition() + guard let compositionTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create composition track"]) + } + + guard let audioTrack = try await asset.loadTracks(withMediaType: .audio).first else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "No audio track found"]) + } + + let timeRange = CMTimeRange(start: .zero, duration: try await asset.load(.duration)) + for i in 0..<4 { + try compositionTrack.insertTimeRange( + timeRange, + of: audioTrack, + at: CMTime(seconds: Double(i) * timeRange.duration.seconds, preferredTimescale: 600) + ) + } + + guard let exportSession = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create export session"]) + } + + exportSession.outputURL = outputURL + exportSession.outputFileType = AVFileType.mp4 + exportSession.shouldOptimizeForNetworkUse = true + + // Create a tuple of values we need to check after export + try await withCheckedThrowingContinuation { continuation in + let mainQueue = DispatchQueue.main + exportSession.exportAsynchronously { + mainQueue.async { + switch exportSession.status { + case .completed: + continuation.resume() + case .failed: + continuation.resume(throwing: exportSession.error ?? NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export failed"])) + case .cancelled: + continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export cancelled"])) + default: + continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unknown export error"])) + } + } + } + } + + let fileSize = try FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int ?? 0 + if fileSize < 5000 { // 5KB minimum + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Converted file too small"]) + } + + return outputURL + } +} diff --git a/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..93126293 --- /dev/null +++ b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,55 @@ +// +// Rime+SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 11/21/24. +// + +import Foundation +import AI +import ElevenLabs +import SwiftUI +import AVFoundation + +extension Rime.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoiceDetails().map { try $0.__conversion() } + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + return try await streamTextToSpeech( + text: text, + voice: voiceID, + outputAudio: .MP3, + model: .mist + ) + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw Rime.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func delete(voice: AbstractVoice.ID) async throws { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func availableVoices() async throws -> [ElevenLabs.Voice] { + return try await getAllAvailableVoiceDetails().map { voice in + ElevenLabs.Voice( + voiceID: voice.name, + name: voice.name, + description: voice.demographic, + isOwner: false + ) + } + } + +} From f2f807c282e67ee6cb4cac35f3e471d0df0011f7 Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Thu, 16 Jan 2025 18:18:42 +0530 Subject: [PATCH 08/14] Cleanup --- .../SpeechSynthesisRequestHandling.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift index 1de670e4..e7d762d0 100644 --- a/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift +++ b/Sources/AI/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift @@ -6,7 +6,6 @@ // import Foundation -import AI import ElevenLabs import SwiftUI From 3aba2ada446184b215d872de01cec1abc7a88a6c Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Fri, 17 Jan 2025 00:43:00 +0530 Subject: [PATCH 09/14] Update Gemini (Incomplete) --- ...emini.APISpecification.RequestBodies.swift | 38 +++++++++---- .../API/_Gemini.APISpecification.swift | 53 +++++++++++++++++-- .../Intramodular/_Gemini.Client+Files.swift | 46 +++++++++------- 3 files changed, 103 insertions(+), 34 deletions(-) diff --git a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift index 7003aebd..0eaaa91f 100644 --- a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift +++ b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift @@ -142,10 +142,31 @@ extension _Gemini.APISpecification { } } - public struct FileUploadInput: Codable, HTTPRequest.Multipart.ContentConvertible { + public struct FinalizeFileUploadInput { + public let data: Data + public let uploadUrl: String + public let fileSize: Int + + public init(data: Data, uploadUrl: String, fileSize: Int) { + self.data = data + self.uploadUrl = uploadUrl + self.fileSize = fileSize + } + } + + public struct StartFileUploadInput: Codable { + public struct UploadMetadata: Codable { + let file: FileMetadata + + struct FileMetadata: Codable { + let display_name: String + } + } + public let fileData: Data public let mimeType: String public let displayName: String + public let metadata: UploadMetadata public init( fileData: Data, @@ -155,11 +176,12 @@ extension _Gemini.APISpecification { self.fileData = fileData self.mimeType = mimeType self.displayName = displayName + self.metadata = .init(file: .init(display_name: displayName)) } - + /* public func __conversion() throws -> HTTPRequest.Multipart.Content { var result = HTTPRequest.Multipart.Content() - + // TODO: - Add this to `HTTPMediaType` @jared @vmanot let fileExtension: String = { guard let subtype = mimeType.split(separator: "/").last else { @@ -188,17 +210,11 @@ extension _Gemini.APISpecification { } }() - result.append( - .file( - named: "file", - data: fileData, - filename: "\(displayName).\(fileExtension)", - contentType: .init(rawValue: mimeType) - ) - ) + result.ap return result } + */ } public struct DeleteFileInput: Codable { diff --git a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift index fe289d07..4474386e 100644 --- a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift +++ b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift @@ -75,11 +75,29 @@ extension _Gemini { // Initial Upload Request endpoint @POST @Path("/upload/v1beta/files") - @Header([ - "X-Goog-Upload-Command": "start, upload, finalize" - ]) - @Body(multipart: .input) - var uploadFile = Endpoint() + @Header({ context in + [ + HTTPHeaderField(key: "X-Goog-Upload-Protocol", value: "resumable"), + HTTPHeaderField(key: "X-Goog-Upload-Command", value: "start"), + HTTPHeaderField(key: "X-Goog-Upload-Header-Content-Length", value: "\(context.input.fileData.count)"), + HTTPHeaderField(key: "X-Goog-Upload-Header-Content-Type", value: context.input.mimeType), + HTTPHeaderField.contentType(.json) + ] + }) + @Body(json: \RequestBodies.StartFileUploadInput.metadata) + var startFileUpload = Endpoint() + + @POST + @Path({ context in context.input.uploadUrl }) + @Header({ context in + [ + HTTPHeaderField(key: "Content-Length", value: "\(context.input.fileSize)"), + HTTPHeaderField(key: "X-Goog-Upload-Offset", value: "0"), + HTTPHeaderField(key: "X-Goog-Upload-Command", value: "upload, finalize") + ] + }) + @Body(json: \RequestBodies.FinalizeFileUploadInput.data) + var finalizeFileUpload = Endpoint() // File Status endpoint @GET @@ -157,6 +175,7 @@ extension _Gemini.APISpecification { context: context ) + // FIXME: (@jared) - why are you replacing the query instead of appending a new query item? is this intentional? if let apiKey = context.root.configuration.apiKey { request = request.query([.init(name: "key", value: apiKey)]) } @@ -173,10 +192,34 @@ extension _Gemini.APISpecification { try response.validate() + + if let options: _Gemini.APISpecification.Options = context.options as? _Gemini.APISpecification.Options, let headerKey = options.outputHeaderKey { + print("HEADERS: \(response.headerFields)") + let stringValue: String? = response.headerFields.first (where: { $0.key == headerKey })?.value + print(stringValue) + + switch Output.self { + case String.self: + return (try stringValue.unwrap()) as! Output + case Optional.self: + return stringValue as! Output + default: + throw _Gemini.APIError.invalidContentType + } + } + return try response.decode( Output.self, keyDecodingStrategy: .convertFromSnakeCase ) } } + + public class Options { + var outputHeaderKey: HTTPHeaderField.Key? + + init(outputHeaderKey: HTTPHeaderField.Key? = nil) { + self.outputHeaderKey = outputHeaderKey + } + } } diff --git a/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift b/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift index b6fa298c..7ff3c88a 100644 --- a/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift +++ b/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift @@ -9,6 +9,17 @@ import Merge import NetworkKit import Swallow +fileprivate enum TempError: CustomStringError, Error { + case fetchedResponse + + public var description: String { + switch self { + case .fetchedResponse: + return "Got response url from header" + } + } +} + extension _Gemini.Client { public func uploadFile( from data: Data, @@ -20,27 +31,26 @@ extension _Gemini.Client { throw FileProcessingError.invalidFileName } - do { - var mimeType: String? = mimeType?.rawValue ?? _MediaAssetFileType(data)?.mimeType - - if mimeType == nil, let swiftType { - mimeType = HTTPMediaType(_swiftType: swiftType)?.rawValue - } - - let input = _Gemini.APISpecification.RequestBodies.FileUploadInput( - fileData: data, - mimeType: try mimeType.unwrap(), - displayName: displayName - ) - - let response = try await run(\.uploadFile, with: input) - - return response.file - } catch { - throw _Gemini.APIError.unknown(message: "File upload failed: \(error.localizedDescription)") + var mimeType: String? = mimeType?.rawValue ?? _MediaAssetFileType(data)?.mimeType + + if mimeType == nil, let swiftType { + mimeType = HTTPMediaType(_swiftType: swiftType)?.rawValue } + + let input = _Gemini.APISpecification.RequestBodies.StartFileUploadInput( + fileData: data, + mimeType: try mimeType.unwrap(), + displayName: displayName + ) + + let uploadURLString: String = try await run(\.startFileUpload, with: input, options: _Gemini.APISpecification.Options(outputHeaderKey: .custom("x-goog-upload-url"))).value + + let result: _Gemini.APISpecification.ResponseBodies.FileUpload = try await run(\.finalizeFileUpload, with: _Gemini.APISpecification.RequestBodies.FinalizeFileUploadInput(data: data, uploadUrl: uploadURLString, fileSize: data.count)) + + return result.file } + public func uploadFile( from url: URL, mimeType: HTTPMediaType?, From 5b3e1361cd40a0f87369c5605bf88f94dc189c54 Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Fri, 17 Jan 2025 01:21:54 +0530 Subject: [PATCH 10/14] Fix --- .../API/_Gemini.APISpecification.swift | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift index 4474386e..cecb0399 100644 --- a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift +++ b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift @@ -70,11 +70,13 @@ extension _Gemini { "/v1beta/models/\(context.input.model):generateContent" }) @Body(json: \.requestBody) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var generateContent = Endpoint() // Initial Upload Request endpoint @POST @Path("/upload/v1beta/files") + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) @Header({ context in [ HTTPHeaderField(key: "X-Goog-Upload-Protocol", value: "resumable"), @@ -88,7 +90,7 @@ extension _Gemini { var startFileUpload = Endpoint() @POST - @Path({ context in context.input.uploadUrl }) + @AbsolutePath({ $0.input.uploadUrl }) @Header({ context in [ HTTPHeaderField(key: "Content-Length", value: "\(context.input.fileSize)"), @@ -96,7 +98,7 @@ extension _Gemini { HTTPHeaderField(key: "X-Goog-Upload-Command", value: "upload, finalize") ] }) - @Body(json: \RequestBodies.FinalizeFileUploadInput.data) + @Body(data: \RequestBodies.FinalizeFileUploadInput.data) var finalizeFileUpload = Endpoint() // File Status endpoint @@ -104,6 +106,7 @@ extension _Gemini { @Path({ context -> String in "/v1beta/\(context.input.name.rawValue)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var getFile = Endpoint() @GET @@ -119,8 +122,13 @@ extension _Gemini { parameters["pageToken"] = pageToken } + if let apiKey = context.root.configuration.apiKey { + parameters["key"] = apiKey + } + return parameters }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var listFiles = Endpoint() // Delete File endpoint @@ -128,24 +136,28 @@ extension _Gemini { @Path({ context -> String in "/\(context.input.fileURL.path)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var deleteFile = Endpoint() - //Fine Tuning + // Fine Tuning @POST @Path("/v1beta/tunedModels") @Body(json: \.requestBody) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var createTunedModel = Endpoint() @GET @Path({ context -> String in "/v1/\(context.input.operationName)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var getTuningOperation = Endpoint() @GET @Path({ context -> String in "/v1beta/\(context.input.modelName)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var getTunedModel = Endpoint() @POST @@ -153,6 +165,7 @@ extension _Gemini { "/v1beta/\(context.input.model):generateContent" // Use the model name directly }) @Body(json: \.requestBody) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var generateTunedContent = Endpoint() @POST @@ -160,6 +173,7 @@ extension _Gemini { "/v1beta/models/\(context.input.model):embedContent" }) @Body(json: \.input) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var generateEmbedding = Endpoint() } } @@ -175,10 +189,7 @@ extension _Gemini.APISpecification { context: context ) - // FIXME: (@jared) - why are you replacing the query instead of appending a new query item? is this intentional? - if let apiKey = context.root.configuration.apiKey { - request = request.query([.init(name: "key", value: apiKey)]) - } + print("REQUEST URL: \(request.url)") return request } From 1f990563cffcea707608a3a6687deb6487c2a753 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 17 Jan 2025 09:26:45 -0700 Subject: [PATCH 11/14] Fixed build issue --- Package.swift | 1 + Sources/AI/AnySpeechSynthesisRequestHandling.swift | 3 +++ .../Intramodular/Models/NeetsAI.Voice.swift | 2 +- ...sAI.Client+SpeechSynthesisRequestHandling.swift | 14 +++++++++++++- Sources/NeetsAI/module.swift | 2 ++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Package.swift b/Package.swift index f731cfd7..df47734a 100644 --- a/Package.swift +++ b/Package.swift @@ -389,6 +389,7 @@ let package = Package( "Ollama", "OpenAI", "Swallow", + "NeetsAI", ], path: "Sources/AI", swiftSettings: [ diff --git a/Sources/AI/AnySpeechSynthesisRequestHandling.swift b/Sources/AI/AnySpeechSynthesisRequestHandling.swift index 18dce27e..5684958f 100644 --- a/Sources/AI/AnySpeechSynthesisRequestHandling.swift +++ b/Sources/AI/AnySpeechSynthesisRequestHandling.swift @@ -7,6 +7,7 @@ import ElevenLabs import LargeLanguageModels +import NeetsAI public struct AnySpeechSynthesisRequestHandling: Hashable { private let _hashValue: Int @@ -17,6 +18,8 @@ public struct AnySpeechSynthesisRequestHandling: Hashable { switch base { case is ElevenLabs.Client: return "ElevenLabs" + case is NeetsAI.Client: + return "NeetsAI" default: fatalError() } diff --git a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift index 4422720b..ee23943a 100644 --- a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift +++ b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift @@ -30,7 +30,7 @@ extension NeetsAI.Voice: AbstractVoiceConvertible { extension NeetsAI.Voice: AbstractVoiceInitiable { public init(voice: AbstractVoice) throws { self.init( - id: voice.voiceID, + id: .init(voice.voiceID), title: voice.name, aliasOf: voice.description, supportedModels: [] diff --git a/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift index 3fa5844b..973024bd 100644 --- a/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift +++ b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift @@ -10,7 +10,11 @@ import LargeLanguageModels extension NeetsAI.Client: SpeechSynthesisRequestHandling { public func availableVoices() async throws -> [AbstractVoice] { - return try await getAllAvailableVoices().map( { try $0.__conversion() } ) + let voices = try await getAllAvailableVoices() + .map({ try $0.__conversion() }) + .filter({ !$0.name.isEmpty }) + .unique(by: \.name) + return voices } public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { @@ -39,3 +43,11 @@ extension NeetsAI.Client: SpeechSynthesisRequestHandling { throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported") } } + +// FIXME: - REMOVE ME +extension Sequence { + func unique(by keyPath: KeyPath) -> [Element] { + var seen = Set() + return filter { seen.insert($0[keyPath: keyPath]).inserted } + } +} diff --git a/Sources/NeetsAI/module.swift b/Sources/NeetsAI/module.swift index 1c4d3b99..5b26df46 100644 --- a/Sources/NeetsAI/module.swift +++ b/Sources/NeetsAI/module.swift @@ -5,3 +5,5 @@ // Created by Jared Davidson on 11/22/24. // +@_exported import Swallow +@_exported import SwallowMacrosClient From 3fe5468107870109a8c0a7fabdca7abbf1fa87d9 Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Sat, 18 Jan 2025 22:45:30 +0530 Subject: [PATCH 12/14] Update --- Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift b/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift index 7ff3c88a..020e3cde 100644 --- a/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift +++ b/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift @@ -6,6 +6,7 @@ import CoreMI import Dispatch import FoundationX import Merge +import Media import NetworkKit import Swallow @@ -50,6 +51,9 @@ extension _Gemini.Client { return result.file } + public func upload(file: any MediaFile) async throws { + try await self.uploadFile(from: file.url, mimeType: HTTPMediaType(fileURL: file.url), displayName: file.name) + } public func uploadFile( from url: URL, From 6fac42dba41a5b3b8038d1e66e00c28086a57be8 Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Mon, 20 Jan 2025 04:03:54 +0530 Subject: [PATCH 13/14] Cleanup --- Package.swift | 3 ++- .../AbstractVoice (WIP)/AbstractVoiceSettings.swift | 2 +- .../Intramodular/API/_Gemini.APISpecification.swift | 10 +--------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/Package.swift b/Package.swift index df47734a..ee06cd68 100644 --- a/Package.swift +++ b/Package.swift @@ -191,7 +191,8 @@ let package = Package( "LargeLanguageModels", "Merge", "NetworkKit", - "Swallow" + "Swallow", + "Media" ], path: "Sources/_Gemini", swiftSettings: [ diff --git a/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift index 34fd03b6..b54b685f 100644 --- a/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift +++ b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift @@ -5,7 +5,7 @@ // Created by Jared Davidson on 10/30/24. // -import SwiftUIZ +import SwiftUIX import CorePersistence public struct AbstractVoiceSettings: Codable, Sendable, Initiable, Equatable { diff --git a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift index cecb0399..587217f2 100644 --- a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift +++ b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift @@ -184,13 +184,11 @@ extension _Gemini.APISpecification { from input: Input, context: BuildRequestContext ) throws -> Request { - var request = try super.buildRequestBase( + let request = try super.buildRequestBase( from: input, context: context ) - print("REQUEST URL: \(request.url)") - return request } @@ -198,16 +196,10 @@ extension _Gemini.APISpecification { from response: Request.Response, context: DecodeOutputContext ) throws -> Output { - - print(response) - try response.validate() - if let options: _Gemini.APISpecification.Options = context.options as? _Gemini.APISpecification.Options, let headerKey = options.outputHeaderKey { - print("HEADERS: \(response.headerFields)") let stringValue: String? = response.headerFields.first (where: { $0.key == headerKey })?.value - print(stringValue) switch Output.self { case String.self: From 96c7895d1619a34fa7ebbe5570451f7db701690c Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Mon, 20 Jan 2025 23:40:06 +0530 Subject: [PATCH 14/14] Fix package.swift --- Package.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/Package.swift b/Package.swift index ee06cd68..6dd9893e 100644 --- a/Package.swift +++ b/Package.swift @@ -92,6 +92,7 @@ let package = Package( ], dependencies: [ .package(url: "https://github.com/vmanot/CorePersistence.git", branch: "main"), + .package(url: "https://github.com/vmanot/Media", branch: "main"), .package(url: "https://github.com/vmanot/Merge.git", branch: "master"), .package(url: "https://github.com/vmanot/NetworkKit.git", branch: "master"), .package(url: "https://github.com/vmanot/Swallow.git", branch: "master"),