From dc041f78a95d40b835a9ec34356f10248885daf9 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 12:16:17 -0700 Subject: [PATCH 01/64] Add generation type to ModelConfig --- src/data_designer/config/models.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 6bff8efd..3e06a8fc 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -205,11 +205,18 @@ def _is_value_in_range(self, value: float, min_value: float, max_value: float) - return min_value <= value <= max_value +class GenerationType(str, Enum): + CHAT_COMPLETION = "chat-completion" + TEXT_EMBEDDING = "text-embedding" + IMAGE_GENERATION = "image-generation" + + class ModelConfig(ConfigBase): alias: str model: str inference_parameters: InferenceParameters = Field(default_factory=InferenceParameters) provider: Optional[str] = None + generation_type: GenerationType = GenerationType.CHAT_COMPLETION class ModelProvider(ConfigBase): From 0d6b830f6439b6bece0b642c921281fd817de5d3 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 12:21:28 -0700 Subject: [PATCH 02/64] pass tests --- src/data_designer/config/default_model_settings.py | 5 +++-- src/data_designer/config/models.py | 2 +- tests/cli/repositories/test_model_repository.py | 4 +++- tests/config/test_config_builder.py | 2 +- tests/config/test_models.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/data_designer/config/default_model_settings.py b/src/data_designer/config/default_model_settings.py index 32c1d42b..33d6dad4 100644 --- a/src/data_designer/config/default_model_settings.py +++ b/src/data_designer/config/default_model_settings.py @@ -103,7 +103,8 @@ def resolve_seed_default_model_settings() -> None: f"🍾 Default model configs were not found, so writing the following to {str(MODEL_CONFIGS_FILE_PATH)!r}" ) save_config_file( - MODEL_CONFIGS_FILE_PATH, {"model_configs": [mc.model_dump() for mc in get_builtin_model_configs()]} + MODEL_CONFIGS_FILE_PATH, + {"model_configs": [mc.model_dump(mode="json") for mc in get_builtin_model_configs()]}, ) if not MODEL_PROVIDERS_FILE_PATH.exists(): @@ -111,7 +112,7 @@ def resolve_seed_default_model_settings() -> None: f"πŸͺ„ Default model providers were not found, so writing the following to {str(MODEL_PROVIDERS_FILE_PATH)!r}" ) save_config_file( - MODEL_PROVIDERS_FILE_PATH, {"providers": [p.model_dump() for p in get_builtin_model_providers()]} + MODEL_PROVIDERS_FILE_PATH, {"providers": [p.model_dump(mode="json") for p in get_builtin_model_providers()]} ) if not MANAGED_ASSETS_PATH.exists(): diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 3e06a8fc..17698346 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -215,8 +215,8 @@ class ModelConfig(ConfigBase): alias: str model: str inference_parameters: InferenceParameters = Field(default_factory=InferenceParameters) - provider: Optional[str] = None generation_type: GenerationType = GenerationType.CHAT_COMPLETION + provider: Optional[str] = None class ModelProvider(ConfigBase): diff --git a/tests/cli/repositories/test_model_repository.py b/tests/cli/repositories/test_model_repository.py index 01884b5c..624cd360 100644 --- a/tests/cli/repositories/test_model_repository.py +++ b/tests/cli/repositories/test_model_repository.py @@ -21,7 +21,9 @@ def test_load_does_not_exist(): def test_load_exists(tmp_path: Path, stub_model_configs: list[ModelConfig]): model_configs_file_path = tmp_path / MODEL_CONFIGS_FILE_NAME - save_config_file(model_configs_file_path, {"model_configs": [mc.model_dump() for mc in stub_model_configs]}) + save_config_file( + model_configs_file_path, {"model_configs": [mc.model_dump(mode="json") for mc in stub_model_configs]} + ) repository = ModelRepository(tmp_path) assert repository.load() is not None assert repository.load().model_configs == stub_model_configs diff --git a/tests/config/test_config_builder.py b/tests/config/test_config_builder.py index 337d934e..aab8112a 100644 --- a/tests/config/test_config_builder.py +++ b/tests/config/test_config_builder.py @@ -54,7 +54,7 @@ def stub_data_designer_builder(stub_data_designer_builder_config_str): def test_loading_model_configs_in_constructor(stub_model_configs): - stub_model_configs_dict = [mc.model_dump() for mc in stub_model_configs] + stub_model_configs_dict = [mc.model_dump(mode="json") for mc in stub_model_configs] # test loading model configs from a list builder = DataDesignerConfigBuilder(model_configs=stub_model_configs) assert builder.model_configs == stub_model_configs diff --git a/tests/config/test_models.py b/tests/config/test_models.py index 9ccda6d5..6a3d7b25 100644 --- a/tests/config/test_models.py +++ b/tests/config/test_models.py @@ -212,7 +212,7 @@ def test_load_model_configs(): ModelConfig(alias="test", model="test"), ModelConfig(alias="test2", model="test2"), ] - stub_model_configs_dict_list = [mc.model_dump() for mc in stub_model_configs] + stub_model_configs_dict_list = [mc.model_dump(mode="json") for mc in stub_model_configs] assert load_model_configs([]) == [] assert load_model_configs(stub_model_configs) == stub_model_configs From 254fd8a71e261a7bb3ac71ad14d8aa10772529ea Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 14:36:02 -0700 Subject: [PATCH 03/64] added generate_text_embeddings --- .../generators/llm_generators.py | 1 - src/data_designer/engine/models/facade.py | 51 ++++++++++++++++++- .../generators/test_llm_generators.py | 17 ------- tests/engine/models/test_facade.py | 48 +++++++++++------ 4 files changed, 82 insertions(+), 35 deletions(-) diff --git a/src/data_designer/engine/column_generators/generators/llm_generators.py b/src/data_designer/engine/column_generators/generators/llm_generators.py index ee0ab58a..8f4cfc90 100644 --- a/src/data_designer/engine/column_generators/generators/llm_generators.py +++ b/src/data_designer/engine/column_generators/generators/llm_generators.py @@ -96,7 +96,6 @@ def generate(self, data: dict) -> dict: max_correction_steps=self.max_conversation_correction_steps, max_conversation_restarts=self.max_conversation_restarts, purpose=f"running generation for column '{self.config.name}'", - **self.inference_parameters.generate_kwargs, ) data[self.config.name] = deserialize_json_values(self.response_recipe.serialize_output(response)) diff --git a/src/data_designer/engine/models/facade.py b/src/data_designer/engine/models/facade.py index 93ca0fd7..b0ad3472 100644 --- a/src/data_designer/engine/models/facade.py +++ b/src/data_designer/engine/models/facade.py @@ -9,7 +9,7 @@ from typing import Any from litellm.types.router import DeploymentTypedDict, LiteLLM_Params -from litellm.types.utils import ModelResponse +from litellm.types.utils import EmbeddingResponse, ModelResponse from data_designer.config.models import ModelConfig, ModelProvider from data_designer.engine.model_provider import ModelProviderRegistry @@ -67,6 +67,7 @@ def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = extra={"model": self.model_name, "messages": messages, "sensitive": True}, ) response = None + kwargs = {**self._model_config.inference_parameters.generate_kwargs, **kwargs} if self.model_provider.extra_body: kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} try: @@ -87,6 +88,41 @@ def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = if not skip_usage_tracking: self._track_usage(response) + @catch_llm_exceptions + def generate_text_embeddings( + self, input_texts: list[str], skip_usage_tracking: bool = False, **kwargs + ) -> list[list[float]]: + logger.debug( + f"Generating embeddings with model {self.model_name!r}...", + extra={ + "model": self.model_name, + "input_count": len(input_texts), + "sensitive": True, + }, + ) + kwargs |= self._model_config.inference_parameters.generate_kwargs + if self.model_provider.extra_body: + kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} + try: + response = self._router.embedding(model=self.model_name, input=input_texts, **kwargs) + logger.debug( + f"Received embeddings from model {self.model_name!r}", + extra={ + "model": self.model_name, + "embedding_count": len(response.data) if response.data else 0, + "usage": self._usage_stats.model_dump(), + }, + ) + if response.data and len(response.data) == len(input_texts): + return [data["embedding"] for data in response.data] + else: + raise ValueError(f"Expected {len(input_texts)} embeddings, but received {len(response.data)}") + except Exception as e: + raise e + finally: + if not skip_usage_tracking: + self._track_usage_from_embedding(response) + @catch_llm_exceptions def generate( self, @@ -223,3 +259,16 @@ def _track_usage(self, response: ModelResponse | None) -> None: ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) + + def _track_usage_from_embedding(self, response: EmbeddingResponse | None) -> None: + if response is None: + self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) + return + if response.usage is not None and response.usage.prompt_tokens is not None: + self._usage_stats.extend( + token_usage=TokenUsageStats( + prompt_tokens=response.usage.prompt_tokens, + completion_tokens=0, + ), + request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), + ) diff --git a/tests/engine/column_generators/generators/test_llm_generators.py b/tests/engine/column_generators/generators/test_llm_generators.py index 259f3a08..acaa2c6f 100644 --- a/tests/engine/column_generators/generators/test_llm_generators.py +++ b/tests/engine/column_generators/generators/test_llm_generators.py @@ -259,20 +259,3 @@ def test_generate_with_json_deserialization(): result = generator.generate(data) assert result["test_column"] == {"result": "json_output"} - - -def test_generate_with_inference_parameters(): - generator, _, mock_model, _, mock_inference_params, mock_prompt_renderer, mock_response_recipe = ( - _create_generator_with_mocks() - ) - - mock_inference_params.generate_kwargs = {"temperature": 0.7, "max_tokens": 100} - _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model) - - data = {"input": "test_input"} - generator.generate(data) - - call_args = mock_model.generate.call_args - assert call_args[1]["temperature"] == 0.7 - assert call_args[1]["max_tokens"] == 100 - assert call_args[1]["purpose"] == "running generation for column 'test_column'" diff --git a/tests/engine/models/test_facade.py b/tests/engine/models/test_facade.py index 4fa73d9a..d240eeaa 100644 --- a/tests/engine/models/test_facade.py +++ b/tests/engine/models/test_facade.py @@ -133,7 +133,9 @@ def raise_exception(*args, **kwargs): stub_model_facade.completion(messages) -def test_completion_with_kwargs(stub_model_facade, stub_expected_response): +def test_completion_kwargs_overrides_model_config_generate_kwargs( + stub_model_configs, stub_model_facade, stub_expected_response +): captured_kwargs = {} def mock_completion(model_name, messages, **kwargs): @@ -147,28 +149,42 @@ def mock_completion(model_name, messages, **kwargs): result = stub_model_facade.completion(messages, **kwargs) assert result == stub_expected_response - assert captured_kwargs == kwargs + # completion kwargs overrides model config generate kwargs + assert captured_kwargs == {**stub_model_configs[0].inference_parameters.generate_kwargs, **kwargs} @patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True) -def test_completion_with_extra_body(mock_router_completion, stub_model_facade): +def test_provider_extra_body_overrides_completion_kwargs(mock_router_completion, stub_model_configs, stub_model_facade): messages = [{"role": "user", "content": "test"}] + stub_provider_extra_body = {"foo": "bar"} - # completion call has no extra body argument and provider has no extra body + # model config has generate kwargs, completion call has no kwargs, and provider has no extra body _ = stub_model_facade.completion(messages) assert len(mock_router_completion.call_args) == 2 assert mock_router_completion.call_args[0][1] == "stub-model-text" assert mock_router_completion.call_args[0][2] == messages + assert mock_router_completion.call_args[1] == stub_model_configs[0].inference_parameters.generate_kwargs - # completion call has no extra body argument and provider has extra body. - # Should pull extra body from model provider - custom_extra_body = {"some_custom_key": "some_custom_value"} - stub_model_facade.model_provider.extra_body = custom_extra_body - _ = stub_model_facade.completion(messages) - assert mock_router_completion.call_args[1] == {"extra_body": custom_extra_body} - - # completion call has extra body argument and provider has extra body. - # Should merge the two with provider extra body taking precedence - completion_extra_body = {"some_completion_key": "some_completion_value", "some_custom_key": "some_different_value"} - _ = stub_model_facade.completion(messages, extra_body=completion_extra_body) - assert mock_router_completion.call_args[1] == {"extra_body": {**completion_extra_body, **custom_extra_body}} + # model config has generate kwargs, completion call has kwargs, and provider has no extra body + # completion kwargs overrides model config generate kwargs + _ = stub_model_facade.completion(messages, temperature=0.1) + assert len(mock_router_completion.call_args) == 2 + assert mock_router_completion.call_args[0][1] == "stub-model-text" + assert mock_router_completion.call_args[0][2] == messages + assert mock_router_completion.call_args[1] == { + **stub_model_configs[0].inference_parameters.generate_kwargs, + "temperature": 0.1, + } + + # model config has generate kwargs, completion call has kwargs, and provider has extra body + # provider extra body overrides completion kwargs + stub_model_facade.model_provider.extra_body = stub_provider_extra_body + _ = stub_model_facade.completion(messages, temperature=0.15, extra_body={"foo": "bat"}) + assert len(mock_router_completion.call_args) == 2 + assert mock_router_completion.call_args[0][1] == "stub-model-text" + assert mock_router_completion.call_args[0][2] == messages + assert mock_router_completion.call_args[1] == { + **stub_model_configs[0].inference_parameters.generate_kwargs, + "temperature": 0.15, + "extra_body": stub_provider_extra_body, + } From 1126ea1bdfdf842ed8073aaf0cea6e405a77c0ce Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 15:18:59 -0700 Subject: [PATCH 04/64] tests --- src/data_designer/engine/models/facade.py | 21 +-- tests/engine/models/test_facade.py | 152 +++++++++++++--------- 2 files changed, 105 insertions(+), 68 deletions(-) diff --git a/src/data_designer/engine/models/facade.py b/src/data_designer/engine/models/facade.py index b0ad3472..4e3f36ef 100644 --- a/src/data_designer/engine/models/facade.py +++ b/src/data_designer/engine/models/facade.py @@ -67,11 +67,9 @@ def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = extra={"model": self.model_name, "messages": messages, "sensitive": True}, ) response = None - kwargs = {**self._model_config.inference_parameters.generate_kwargs, **kwargs} - if self.model_provider.extra_body: - kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} + kwargs = self.consolidate_kwargs(**kwargs) try: - response = self._router.completion(self.model_name, messages, **kwargs) + response = self._router.completion(model=self.model_name, messages=messages, **kwargs) logger.debug( f"Received completion from model {self.model_name!r}", extra={ @@ -85,9 +83,15 @@ def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = except Exception as e: raise e finally: - if not skip_usage_tracking: + if not skip_usage_tracking and response is not None: self._track_usage(response) + def consolidate_kwargs(self, **kwargs) -> dict[str, Any]: + kwargs = {**self._model_config.inference_parameters.generate_kwargs, **kwargs} + if self.model_provider.extra_body: + kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} + return kwargs + @catch_llm_exceptions def generate_text_embeddings( self, input_texts: list[str], skip_usage_tracking: bool = False, **kwargs @@ -100,9 +104,8 @@ def generate_text_embeddings( "sensitive": True, }, ) - kwargs |= self._model_config.inference_parameters.generate_kwargs - if self.model_provider.extra_body: - kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} + kwargs = self.consolidate_kwargs(**kwargs) + response = None try: response = self._router.embedding(model=self.model_name, input=input_texts, **kwargs) logger.debug( @@ -120,7 +123,7 @@ def generate_text_embeddings( except Exception as e: raise e finally: - if not skip_usage_tracking: + if not skip_usage_tracking and response is not None: self._track_usage_from_embedding(response) @catch_llm_exceptions diff --git a/tests/engine/models/test_facade.py b/tests/engine/models/test_facade.py index d240eeaa..afe27730 100644 --- a/tests/engine/models/test_facade.py +++ b/tests/engine/models/test_facade.py @@ -4,7 +4,7 @@ from collections import namedtuple from unittest.mock import patch -from litellm.types.utils import Choices, Message, ModelResponse +from litellm.types.utils import Choices, EmbeddingResponse, Message, ModelResponse import pytest from data_designer.engine.models.errors import ModelGenerationValidationFailureError @@ -30,10 +30,20 @@ def stub_model_facade(stub_model_configs, stub_secrets_resolver, stub_model_prov @pytest.fixture -def stub_expected_response(): +def stub_completion_messages(): + return [{"role": "user", "content": "test"}] + + +@pytest.fixture +def stub_expected_completion_response(): return ModelResponse(choices=Choices(message=Message(content="Test response"))) +@pytest.fixture +def stub_expected_embedding_response(): + return EmbeddingResponse(data=[{"embedding": [0.1, 0.2, 0.3]}] * 2) + + @pytest.mark.parametrize( "max_correction_steps,max_conversation_restarts,total_calls", [ @@ -105,6 +115,24 @@ def test_usage_stats_property(stub_model_facade): assert hasattr(stub_model_facade.usage_stats, "model_dump") +def test_consolidate_kwargs(stub_model_configs, stub_model_facade): + # Model config generate kwargs are used as base + result = stub_model_facade.consolidate_kwargs() + assert result == stub_model_configs[0].inference_parameters.generate_kwargs + + # kwargs overrides model config generate kwargs + result = stub_model_facade.consolidate_kwargs(temperature=0.01) + assert result == {**stub_model_configs[0].inference_parameters.generate_kwargs, "temperature": 0.01} + + # Provider extra_body overrides all other kwargs + stub_model_facade.model_provider.extra_body = {"foo_provider": "bar_provider"} + result = stub_model_facade.consolidate_kwargs(extra_body={"foo": "bar"}) + assert result == { + **stub_model_configs[0].inference_parameters.generate_kwargs, + "extra_body": {"foo_provider": "bar_provider", "foo": "bar"}, + } + + @pytest.mark.parametrize( "skip_usage_tracking", [ @@ -112,79 +140,85 @@ def test_usage_stats_property(stub_model_facade): True, ], ) -def test_completion_success(stub_model_facade, stub_expected_response, skip_usage_tracking): - stub_model_facade._router.completion = lambda model_name, messages, **kwargs: stub_expected_response - - messages = [{"role": "user", "content": "test"}] - result = stub_model_facade.completion(messages, skip_usage_tracking=skip_usage_tracking) - - assert result == stub_expected_response - - -def test_completion_with_exception(stub_model_facade): - def raise_exception(*args, **kwargs): - raise Exception("Router error") +@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True) +def test_completion_success( + mock_router_completion, + stub_completion_messages, + stub_model_configs, + stub_model_facade, + stub_expected_completion_response, + skip_usage_tracking, +): + mock_router_completion.side_effect = lambda self, model, messages, **kwargs: stub_expected_completion_response + result = stub_model_facade.completion(stub_completion_messages, skip_usage_tracking=skip_usage_tracking) + assert result == stub_expected_completion_response + assert mock_router_completion.call_count == 1 + assert mock_router_completion.call_args[1] == { + "model": "stub-model-text", + "messages": stub_completion_messages, + **stub_model_configs[0].inference_parameters.generate_kwargs, + } - stub_model_facade._router.completion = raise_exception - messages = [{"role": "user", "content": "test"}] +@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True) +def test_completion_with_exception(mock_router_completion, stub_completion_messages, stub_model_facade): + mock_router_completion.side_effect = Exception("Router error") with pytest.raises(Exception, match="Router error"): - stub_model_facade.completion(messages) + stub_model_facade.completion(stub_completion_messages) -def test_completion_kwargs_overrides_model_config_generate_kwargs( - stub_model_configs, stub_model_facade, stub_expected_response +@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True) +def test_completion_with_kwargs( + mock_router_completion, + stub_completion_messages, + stub_model_configs, + stub_model_facade, + stub_expected_completion_response, ): captured_kwargs = {} - def mock_completion(model_name, messages, **kwargs): + def mock_completion(self, model, messages, **kwargs): captured_kwargs.update(kwargs) - return stub_expected_response + return stub_expected_completion_response - stub_model_facade._router.completion = mock_completion + mock_router_completion.side_effect = mock_completion - messages = [{"role": "user", "content": "test"}] kwargs = {"temperature": 0.7, "max_tokens": 100} - result = stub_model_facade.completion(messages, **kwargs) + result = stub_model_facade.completion(stub_completion_messages, **kwargs) - assert result == stub_expected_response + assert result == stub_expected_completion_response # completion kwargs overrides model config generate kwargs assert captured_kwargs == {**stub_model_configs[0].inference_parameters.generate_kwargs, **kwargs} -@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True) -def test_provider_extra_body_overrides_completion_kwargs(mock_router_completion, stub_model_configs, stub_model_facade): - messages = [{"role": "user", "content": "test"}] - stub_provider_extra_body = {"foo": "bar"} - - # model config has generate kwargs, completion call has no kwargs, and provider has no extra body - _ = stub_model_facade.completion(messages) - assert len(mock_router_completion.call_args) == 2 - assert mock_router_completion.call_args[0][1] == "stub-model-text" - assert mock_router_completion.call_args[0][2] == messages - assert mock_router_completion.call_args[1] == stub_model_configs[0].inference_parameters.generate_kwargs - - # model config has generate kwargs, completion call has kwargs, and provider has no extra body - # completion kwargs overrides model config generate kwargs - _ = stub_model_facade.completion(messages, temperature=0.1) - assert len(mock_router_completion.call_args) == 2 - assert mock_router_completion.call_args[0][1] == "stub-model-text" - assert mock_router_completion.call_args[0][2] == messages - assert mock_router_completion.call_args[1] == { - **stub_model_configs[0].inference_parameters.generate_kwargs, - "temperature": 0.1, - } +@patch("data_designer.engine.models.facade.CustomRouter.embedding", autospec=True) +def test_generate_text_embeddings_success(mock_router_embedding, stub_model_facade, stub_expected_embedding_response): + mock_router_embedding.side_effect = lambda self, model, input, **kwargs: stub_expected_embedding_response + input_texts = ["test1", "test2"] + result = stub_model_facade.generate_text_embeddings(input_texts) + assert result == [data["embedding"] for data in stub_expected_embedding_response.data] - # model config has generate kwargs, completion call has kwargs, and provider has extra body - # provider extra body overrides completion kwargs - stub_model_facade.model_provider.extra_body = stub_provider_extra_body - _ = stub_model_facade.completion(messages, temperature=0.15, extra_body={"foo": "bat"}) - assert len(mock_router_completion.call_args) == 2 - assert mock_router_completion.call_args[0][1] == "stub-model-text" - assert mock_router_completion.call_args[0][2] == messages - assert mock_router_completion.call_args[1] == { - **stub_model_configs[0].inference_parameters.generate_kwargs, - "temperature": 0.15, - "extra_body": stub_provider_extra_body, - } + +@patch("data_designer.engine.models.facade.CustomRouter.embedding", autospec=True) +def test_generate_text_embeddings_with_exception(mock_router_embedding, stub_model_facade): + mock_router_embedding.side_effect = Exception("Router error") + + with pytest.raises(Exception, match="Router error"): + stub_model_facade.generate_text_embeddings(["test1", "test2"]) + + +@patch("data_designer.engine.models.facade.CustomRouter.embedding", autospec=True) +def test_generate_text_embeddings_with_kwargs( + mock_router_embedding, stub_model_configs, stub_model_facade, stub_expected_embedding_response +): + captured_kwargs = {} + + def mock_embedding(self, model, input, **kwargs): + captured_kwargs.update(kwargs) + return stub_expected_embedding_response + + mock_router_embedding.side_effect = mock_embedding + kwargs = {"temperature": 0.7, "max_tokens": 100, "input_type": "query"} + _ = stub_model_facade.generate_text_embeddings(["test1", "test2"], **kwargs) + assert captured_kwargs == {**stub_model_configs[0].inference_parameters.generate_kwargs, **kwargs} From 744bc8fd4c9d5662966ba09282b69daf326be9b8 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 16:46:42 -0700 Subject: [PATCH 05/64] remove sensitive=True old artifact no longer needed --- src/data_designer/engine/models/facade.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/data_designer/engine/models/facade.py b/src/data_designer/engine/models/facade.py index 4e3f36ef..ea72d4c3 100644 --- a/src/data_designer/engine/models/facade.py +++ b/src/data_designer/engine/models/facade.py @@ -64,7 +64,7 @@ def usage_stats(self) -> ModelUsageStats: def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs) -> ModelResponse: logger.debug( f"Prompting model {self.model_name!r}...", - extra={"model": self.model_name, "messages": messages, "sensitive": True}, + extra={"model": self.model_name, "messages": messages}, ) response = None kwargs = self.consolidate_kwargs(**kwargs) @@ -101,7 +101,6 @@ def generate_text_embeddings( extra={ "model": self.model_name, "input_count": len(input_texts), - "sensitive": True, }, ) kwargs = self.consolidate_kwargs(**kwargs) From b913f8d6dfc0d3717badc30a3ae4176287cbf9b8 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 17:11:23 -0700 Subject: [PATCH 06/64] Slight refactor --- .../utils/column_statistics_calculations.py | 2 +- ...llm_generators.py => generation_mixins.py} | 94 ++++--------------- .../generators/llm_completion_generators.py | 71 ++++++++++++++ .../engine/column_generators/registry.py | 2 +- .../dataset_builders/column_wise_builder.py | 6 +- ...s.py => test_llm_completion_generators.py} | 6 +- .../engine/column_generators/test_registry.py | 2 +- 7 files changed, 97 insertions(+), 86 deletions(-) rename src/data_designer/engine/column_generators/generators/{llm_generators.py => generation_mixins.py} (64%) create mode 100644 src/data_designer/engine/column_generators/generators/llm_completion_generators.py rename tests/engine/column_generators/generators/{test_llm_generators.py => test_llm_completion_generators.py} (97%) diff --git a/src/data_designer/engine/analysis/utils/column_statistics_calculations.py b/src/data_designer/engine/analysis/utils/column_statistics_calculations.py index 120caef4..1b23c0ea 100644 --- a/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +++ b/src/data_designer/engine/analysis/utils/column_statistics_calculations.py @@ -23,7 +23,7 @@ SingleColumnConfig, ValidationColumnConfig, ) -from data_designer.engine.column_generators.generators.llm_generators import ( +from data_designer.engine.column_generators.utils.prompt_renderer import ( PromptType, RecordBasedPromptRenderer, create_response_recipe, diff --git a/src/data_designer/engine/column_generators/generators/llm_generators.py b/src/data_designer/engine/column_generators/generators/generation_mixins.py similarity index 64% rename from src/data_designer/engine/column_generators/generators/llm_generators.py rename to src/data_designer/engine/column_generators/generators/generation_mixins.py index 8f4cfc90..4e29a37a 100644 --- a/src/data_designer/engine/column_generators/generators/llm_generators.py +++ b/src/data_designer/engine/column_generators/generators/generation_mixins.py @@ -4,20 +4,9 @@ import functools import logging -from data_designer.config.column_configs import ( - LLMCodeColumnConfig, - LLMJudgeColumnConfig, - LLMStructuredColumnConfig, - LLMTextColumnConfig, -) from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP from data_designer.config.models import InferenceParameters, ModelConfig from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX -from data_designer.engine.column_generators.generators.base import ( - ColumnGenerator, - GenerationStrategy, - GeneratorMetadata, -) from data_designer.engine.column_generators.utils.prompt_renderer import ( PromptType, RecordBasedPromptRenderer, @@ -26,7 +15,6 @@ from data_designer.engine.models.facade import ModelFacade from data_designer.engine.models.recipes.base import ResponseRecipe from data_designer.engine.processing.utils import deserialize_json_values -from data_designer.engine.resources.resource_provider import ResourceType DEFAULT_MAX_CONVERSATION_RESTARTS = 5 DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS = 0 @@ -35,7 +23,7 @@ logger = logging.getLogger(__name__) -class WithLLMGeneration: +class WithModelGeneration: @functools.cached_property def model(self) -> ModelFacade: return self.resource_provider.model_registry.get_model(model_alias=self.config.model_alias) @@ -59,6 +47,21 @@ def prompt_renderer(self) -> RecordBasedPromptRenderer: }, ) + def log_pre_generation(self) -> None: + emoji = COLUMN_TYPE_EMOJI_MAP[self.config.column_type] + logger.info(f"{emoji} Preparing {self.config.column_type} column generation") + logger.info(f" |-- column name: {self.config.name!r}") + logger.info(f" |-- model config:\n{self.model_config.model_dump_json(indent=4)}") + if self.model_config.provider is None: + logger.info(f" |-- default model provider: {self._get_provider_name()!r}") + + def _get_provider_name(self) -> str: + model_alias = self.model_config.alias + provider = self.resource_provider.model_registry.get_model_provider(model_alias=model_alias) + return provider.name + + +class WithCompletionGeneration(WithModelGeneration): @functools.cached_property def response_recipe(self) -> ResponseRecipe: return create_response_recipe(self.config, self.model_config) @@ -104,68 +107,3 @@ def generate(self, data: dict) -> dict: data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace return data - - def log_pre_generation(self) -> None: - emoji = COLUMN_TYPE_EMOJI_MAP[self.config.column_type] - logger.info(f"{emoji} Preparing {self.config.column_type} column generation") - logger.info(f" |-- column name: {self.config.name!r}") - logger.info(f" |-- model config:\n{self.model_config.model_dump_json(indent=4)}") - if self.model_config.provider is None: - logger.info(f" |-- default model provider: {self._get_provider_name()!r}") - - def _get_provider_name(self) -> str: - model_alias = self.model_config.alias - provider = self.resource_provider.model_registry.get_model_provider(model_alias=model_alias) - return provider.name - - -class LLMTextCellGenerator(WithLLMGeneration, ColumnGenerator[LLMTextColumnConfig]): - @staticmethod - def metadata() -> GeneratorMetadata: - return GeneratorMetadata( - name="llm_text_generator", - description="Generate a new dataset cell from a prompt template", - generation_strategy=GenerationStrategy.CELL_BY_CELL, - required_resources=[ResourceType.MODEL_REGISTRY], - ) - - -class LLMCodeCellGenerator(WithLLMGeneration, ColumnGenerator[LLMCodeColumnConfig]): - @staticmethod - def metadata() -> GeneratorMetadata: - return GeneratorMetadata( - name="llm_code_generator", - description="Generate a new dataset cell from a prompt template", - generation_strategy=GenerationStrategy.CELL_BY_CELL, - required_resources=[ResourceType.MODEL_REGISTRY], - ) - - -class LLMStructuredCellGenerator(WithLLMGeneration, ColumnGenerator[LLMStructuredColumnConfig]): - @staticmethod - def metadata() -> GeneratorMetadata: - return GeneratorMetadata( - name="llm_structured_generator", - description="Generate a new dataset cell from a prompt template", - generation_strategy=GenerationStrategy.CELL_BY_CELL, - required_resources=[ResourceType.MODEL_REGISTRY], - ) - - -class LLMJudgeCellGenerator(WithLLMGeneration, ColumnGenerator[LLMJudgeColumnConfig]): - @staticmethod - def metadata() -> GeneratorMetadata: - return GeneratorMetadata( - name="llm_judge_generator", - description="Judge a new dataset cell based on a set of rubrics", - generation_strategy=GenerationStrategy.CELL_BY_CELL, - required_resources=[ResourceType.MODEL_REGISTRY], - ) - - @property - def max_conversation_correction_steps(self) -> int: - return DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS - - @property - def max_conversation_restarts(self) -> int: - return 2 * DEFAULT_MAX_CONVERSATION_RESTARTS diff --git a/src/data_designer/engine/column_generators/generators/llm_completion_generators.py b/src/data_designer/engine/column_generators/generators/llm_completion_generators.py new file mode 100644 index 00000000..cc61c619 --- /dev/null +++ b/src/data_designer/engine/column_generators/generators/llm_completion_generators.py @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import logging + +from data_designer.config.column_configs import ( + LLMCodeColumnConfig, + LLMJudgeColumnConfig, + LLMStructuredColumnConfig, + LLMTextColumnConfig, +) +from data_designer.engine.column_generators.generators.base import ( + ColumnGenerator, + GenerationStrategy, + GeneratorMetadata, +) +from data_designer.engine.column_generators.generators.generation_mixins import ( + DEFAULT_MAX_CONVERSATION_RESTARTS, + WithCompletionGeneration, +) +from data_designer.engine.resources.resource_provider import ResourceType + +logger = logging.getLogger(__name__) + + +class LLMTextCellGenerator(WithCompletionGeneration, ColumnGenerator[LLMTextColumnConfig]): + @staticmethod + def metadata() -> GeneratorMetadata: + return GeneratorMetadata( + name="llm_text_generator", + description="Generate a new dataset cell from a prompt template", + generation_strategy=GenerationStrategy.CELL_BY_CELL, + required_resources=[ResourceType.MODEL_REGISTRY], + ) + + +class LLMCodeCellGenerator(WithCompletionGeneration, ColumnGenerator[LLMCodeColumnConfig]): + @staticmethod + def metadata() -> GeneratorMetadata: + return GeneratorMetadata( + name="llm_code_generator", + description="Generate a new dataset cell from a prompt template", + generation_strategy=GenerationStrategy.CELL_BY_CELL, + required_resources=[ResourceType.MODEL_REGISTRY], + ) + + +class LLMStructuredCellGenerator(WithCompletionGeneration, ColumnGenerator[LLMStructuredColumnConfig]): + @staticmethod + def metadata() -> GeneratorMetadata: + return GeneratorMetadata( + name="llm_structured_generator", + description="Generate a new dataset cell from a prompt template", + generation_strategy=GenerationStrategy.CELL_BY_CELL, + required_resources=[ResourceType.MODEL_REGISTRY], + ) + + +class LLMJudgeCellGenerator(WithCompletionGeneration, ColumnGenerator[LLMJudgeColumnConfig]): + @staticmethod + def metadata() -> GeneratorMetadata: + return GeneratorMetadata( + name="llm_judge_generator", + description="Judge a new dataset cell based on a set of rubrics", + generation_strategy=GenerationStrategy.CELL_BY_CELL, + required_resources=[ResourceType.MODEL_REGISTRY], + ) + + @property + def max_conversation_restarts(self) -> int: + return DEFAULT_MAX_CONVERSATION_RESTARTS * 2 diff --git a/src/data_designer/engine/column_generators/registry.py b/src/data_designer/engine/column_generators/registry.py index 61b43753..56a176ae 100644 --- a/src/data_designer/engine/column_generators/registry.py +++ b/src/data_designer/engine/column_generators/registry.py @@ -13,7 +13,7 @@ from data_designer.config.column_types import DataDesignerColumnType from data_designer.engine.column_generators.generators.base import ColumnGenerator from data_designer.engine.column_generators.generators.expression import ExpressionColumnGenerator -from data_designer.engine.column_generators.generators.llm_generators import ( +from data_designer.engine.column_generators.generators.llm_completion_generators import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, LLMStructuredCellGenerator, diff --git a/src/data_designer/engine/dataset_builders/column_wise_builder.py b/src/data_designer/engine/dataset_builders/column_wise_builder.py index e7060f82..ae6c54cc 100644 --- a/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -18,7 +18,7 @@ ProcessorType, ) from data_designer.engine.column_generators.generators.base import ColumnGenerator, GenerationStrategy -from data_designer.engine.column_generators.generators.llm_generators import WithLLMGeneration +from data_designer.engine.column_generators.generators.generation_mixins import WithCompletionGeneration from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError from data_designer.engine.dataset_builders.multi_column_configs import ( @@ -169,7 +169,7 @@ def _run_from_scratch_column_generator(self, generator: ColumnGenerator) -> None def _run_cell_by_cell_generator(self, generator: ColumnGenerator) -> None: max_workers = MAX_CONCURRENCY_PER_NON_LLM_GENERATOR - if isinstance(generator, WithLLMGeneration): + if isinstance(generator, WithCompletionGeneration): max_workers = generator.inference_parameters.max_parallel_requests self._fan_out_with_threads(generator, max_workers=max_workers) @@ -183,7 +183,7 @@ def _run_model_health_check_if_needed(self) -> bool: set(config.model_alias for config in self.llm_generated_column_configs) ) - def _fan_out_with_threads(self, generator: WithLLMGeneration, max_workers: int) -> None: + def _fan_out_with_threads(self, generator: WithCompletionGeneration, max_workers: int) -> None: if generator.generation_strategy != GenerationStrategy.CELL_BY_CELL: raise DatasetGenerationError( f"Generator {generator.metadata().name} is not a {GenerationStrategy.CELL_BY_CELL} " diff --git a/tests/engine/column_generators/generators/test_llm_generators.py b/tests/engine/column_generators/generators/test_llm_completion_generators.py similarity index 97% rename from tests/engine/column_generators/generators/test_llm_generators.py rename to tests/engine/column_generators/generators/test_llm_completion_generators.py index acaa2c6f..ab398aed 100644 --- a/tests/engine/column_generators/generators/test_llm_generators.py +++ b/tests/engine/column_generators/generators/test_llm_completion_generators.py @@ -11,10 +11,12 @@ LLMStructuredColumnConfig, LLMTextColumnConfig, ) -from data_designer.engine.column_generators.generators.llm_generators import ( +from data_designer.engine.column_generators.generators.generation_mixins import ( DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS, DEFAULT_MAX_CONVERSATION_RESTARTS, REASONING_TRACE_COLUMN_POSTFIX, +) +from data_designer.engine.column_generators.generators.llm_completion_generators import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, LLMStructuredCellGenerator, @@ -94,7 +96,7 @@ def test_generate_method(): assert call_args[1]["multi_modal_context"] is None -@patch("data_designer.engine.column_generators.generators.llm_generators.logger", autospec=True) +@patch("data_designer.engine.column_generators.generators.generation_mixins.logger", autospec=True) def test_log_pre_generation(mock_logger): generator, mock_resource_provider, _, mock_model_config, _, _, _ = _create_generator_with_mocks() mock_model_config.model_dump_json.return_value = '{"test": "config"}' diff --git a/tests/engine/column_generators/test_registry.py b/tests/engine/column_generators/test_registry.py index f70b0d90..57457b94 100644 --- a/tests/engine/column_generators/test_registry.py +++ b/tests/engine/column_generators/test_registry.py @@ -3,7 +3,7 @@ from data_designer.config.column_types import DataDesignerColumnType from data_designer.engine.column_generators.generators.expression import ExpressionColumnGenerator -from data_designer.engine.column_generators.generators.llm_generators import ( +from data_designer.engine.column_generators.generators.llm_completion_generators import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, LLMStructuredCellGenerator, From 052db7a41f142c8aa2b28f1701fb0fd3bfaa652f Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 17:21:20 -0700 Subject: [PATCH 07/64] slight refactor --- .../column_generators/generators/base.py | 48 ++++++++ .../generators/generation_mixins.py | 109 ------------------ .../generators/llm_completion_generators.py | 63 +++++++++- .../dataset_builders/column_wise_builder.py | 2 +- .../test_llm_completion_generators.py | 6 +- 5 files changed, 111 insertions(+), 117 deletions(-) delete mode 100644 src/data_designer/engine/column_generators/generators/generation_mixins.py diff --git a/src/data_designer/engine/column_generators/generators/base.py b/src/data_designer/engine/column_generators/generators/base.py index f4ddb60c..8977a63b 100644 --- a/src/data_designer/engine/column_generators/generators/base.py +++ b/src/data_designer/engine/column_generators/generators/base.py @@ -2,12 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 from abc import ABC, abstractmethod +import functools +import logging from typing import overload import pandas as pd +from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP +from data_designer.config.models import InferenceParameters, ModelConfig from data_designer.config.utils.type_helpers import StrEnum +from data_designer.engine.column_generators.utils.prompt_renderer import ( + RecordBasedPromptRenderer, +) from data_designer.engine.configurable_task import ConfigurableTask, ConfigurableTaskMetadata, DataT, TaskConfigT +from data_designer.engine.models.facade import ModelFacade + +logger = logging.getLogger(__name__) class GenerationStrategy(StrEnum): @@ -59,3 +69,41 @@ def can_generate_from_scratch(self) -> bool: @abstractmethod def generate_from_scratch(self, num_records: int) -> pd.DataFrame: ... + + +class WithModelGeneration: + @functools.cached_property + def model(self) -> ModelFacade: + return self.resource_provider.model_registry.get_model(model_alias=self.config.model_alias) + + @functools.cached_property + def model_config(self) -> ModelConfig: + return self.resource_provider.model_registry.get_model_config(model_alias=self.config.model_alias) + + @functools.cached_property + def inference_parameters(self) -> InferenceParameters: + return self.model_config.inference_parameters + + @functools.cached_property + def prompt_renderer(self) -> RecordBasedPromptRenderer: + return RecordBasedPromptRenderer( + response_recipe=self.response_recipe, + error_message_context={ + "column_name": self.config.name, + "column_type": self.config.column_type, + "model_alias": self.config.model_alias, + }, + ) + + def log_pre_generation(self) -> None: + emoji = COLUMN_TYPE_EMOJI_MAP[self.config.column_type] + logger.info(f"{emoji} Preparing {self.config.column_type} column generation") + logger.info(f" |-- column name: {self.config.name!r}") + logger.info(f" |-- model config:\n{self.model_config.model_dump_json(indent=4)}") + if self.model_config.provider is None: + logger.info(f" |-- default model provider: {self._get_provider_name()!r}") + + def _get_provider_name(self) -> str: + model_alias = self.model_config.alias + provider = self.resource_provider.model_registry.get_model_provider(model_alias=model_alias) + return provider.name diff --git a/src/data_designer/engine/column_generators/generators/generation_mixins.py b/src/data_designer/engine/column_generators/generators/generation_mixins.py deleted file mode 100644 index 4e29a37a..00000000 --- a/src/data_designer/engine/column_generators/generators/generation_mixins.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -import functools -import logging - -from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP -from data_designer.config.models import InferenceParameters, ModelConfig -from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX -from data_designer.engine.column_generators.utils.prompt_renderer import ( - PromptType, - RecordBasedPromptRenderer, - create_response_recipe, -) -from data_designer.engine.models.facade import ModelFacade -from data_designer.engine.models.recipes.base import ResponseRecipe -from data_designer.engine.processing.utils import deserialize_json_values - -DEFAULT_MAX_CONVERSATION_RESTARTS = 5 -DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS = 0 - - -logger = logging.getLogger(__name__) - - -class WithModelGeneration: - @functools.cached_property - def model(self) -> ModelFacade: - return self.resource_provider.model_registry.get_model(model_alias=self.config.model_alias) - - @functools.cached_property - def model_config(self) -> ModelConfig: - return self.resource_provider.model_registry.get_model_config(model_alias=self.config.model_alias) - - @functools.cached_property - def inference_parameters(self) -> InferenceParameters: - return self.model_config.inference_parameters - - @functools.cached_property - def prompt_renderer(self) -> RecordBasedPromptRenderer: - return RecordBasedPromptRenderer( - response_recipe=self.response_recipe, - error_message_context={ - "column_name": self.config.name, - "column_type": self.config.column_type, - "model_alias": self.config.model_alias, - }, - ) - - def log_pre_generation(self) -> None: - emoji = COLUMN_TYPE_EMOJI_MAP[self.config.column_type] - logger.info(f"{emoji} Preparing {self.config.column_type} column generation") - logger.info(f" |-- column name: {self.config.name!r}") - logger.info(f" |-- model config:\n{self.model_config.model_dump_json(indent=4)}") - if self.model_config.provider is None: - logger.info(f" |-- default model provider: {self._get_provider_name()!r}") - - def _get_provider_name(self) -> str: - model_alias = self.model_config.alias - provider = self.resource_provider.model_registry.get_model_provider(model_alias=model_alias) - return provider.name - - -class WithCompletionGeneration(WithModelGeneration): - @functools.cached_property - def response_recipe(self) -> ResponseRecipe: - return create_response_recipe(self.config, self.model_config) - - @property - def max_conversation_correction_steps(self) -> int: - return DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS - - @property - def max_conversation_restarts(self) -> int: - return DEFAULT_MAX_CONVERSATION_RESTARTS - - def generate(self, data: dict) -> dict: - deserialized_record = deserialize_json_values(data) - - multi_modal_context = None - if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0: - multi_modal_context = [ - context.get_context(deserialized_record) for context in self.config.multi_modal_context - ] - - response, reasoning_trace = self.model.generate( - prompt=self.prompt_renderer.render( - record=deserialized_record, - prompt_template=self.config.prompt, - prompt_type=PromptType.USER_PROMPT, - ), - system_prompt=self.prompt_renderer.render( - record=deserialized_record, - prompt_template=self.config.system_prompt, - prompt_type=PromptType.SYSTEM_PROMPT, - ), - parser=self.response_recipe.parse, - multi_modal_context=multi_modal_context, - max_correction_steps=self.max_conversation_correction_steps, - max_conversation_restarts=self.max_conversation_restarts, - purpose=f"running generation for column '{self.config.name}'", - ) - - data[self.config.name] = deserialize_json_values(self.response_recipe.serialize_output(response)) - - if reasoning_trace: - data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace - - return data diff --git a/src/data_designer/engine/column_generators/generators/llm_completion_generators.py b/src/data_designer/engine/column_generators/generators/llm_completion_generators.py index cc61c619..5665ba85 100644 --- a/src/data_designer/engine/column_generators/generators/llm_completion_generators.py +++ b/src/data_designer/engine/column_generators/generators/llm_completion_generators.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import functools import logging from data_designer.config.column_configs import ( @@ -9,20 +10,76 @@ LLMStructuredColumnConfig, LLMTextColumnConfig, ) +from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX from data_designer.engine.column_generators.generators.base import ( ColumnGenerator, GenerationStrategy, GeneratorMetadata, + WithModelGeneration, ) -from data_designer.engine.column_generators.generators.generation_mixins import ( - DEFAULT_MAX_CONVERSATION_RESTARTS, - WithCompletionGeneration, +from data_designer.engine.column_generators.utils.prompt_renderer import ( + PromptType, + create_response_recipe, ) +from data_designer.engine.models.recipes.base import ResponseRecipe +from data_designer.engine.processing.utils import deserialize_json_values from data_designer.engine.resources.resource_provider import ResourceType logger = logging.getLogger(__name__) +DEFAULT_MAX_CONVERSATION_RESTARTS = 5 +DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS = 0 + + +class WithCompletionGeneration(WithModelGeneration): + @functools.cached_property + def response_recipe(self) -> ResponseRecipe: + return create_response_recipe(self.config, self.model_config) + + @property + def max_conversation_correction_steps(self) -> int: + return DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS + + @property + def max_conversation_restarts(self) -> int: + return DEFAULT_MAX_CONVERSATION_RESTARTS + + def generate(self, data: dict) -> dict: + deserialized_record = deserialize_json_values(data) + + multi_modal_context = None + if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0: + multi_modal_context = [ + context.get_context(deserialized_record) for context in self.config.multi_modal_context + ] + + response, reasoning_trace = self.model.generate( + prompt=self.prompt_renderer.render( + record=deserialized_record, + prompt_template=self.config.prompt, + prompt_type=PromptType.USER_PROMPT, + ), + system_prompt=self.prompt_renderer.render( + record=deserialized_record, + prompt_template=self.config.system_prompt, + prompt_type=PromptType.SYSTEM_PROMPT, + ), + parser=self.response_recipe.parse, + multi_modal_context=multi_modal_context, + max_correction_steps=self.max_conversation_correction_steps, + max_conversation_restarts=self.max_conversation_restarts, + purpose=f"running generation for column '{self.config.name}'", + ) + + data[self.config.name] = deserialize_json_values(self.response_recipe.serialize_output(response)) + + if reasoning_trace: + data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace + + return data + + class LLMTextCellGenerator(WithCompletionGeneration, ColumnGenerator[LLMTextColumnConfig]): @staticmethod def metadata() -> GeneratorMetadata: diff --git a/src/data_designer/engine/dataset_builders/column_wise_builder.py b/src/data_designer/engine/dataset_builders/column_wise_builder.py index ae6c54cc..78a5e9fa 100644 --- a/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -18,7 +18,7 @@ ProcessorType, ) from data_designer.engine.column_generators.generators.base import ColumnGenerator, GenerationStrategy -from data_designer.engine.column_generators.generators.generation_mixins import WithCompletionGeneration +from data_designer.engine.column_generators.generators.llm_completion_generators import WithCompletionGeneration from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError from data_designer.engine.dataset_builders.multi_column_configs import ( diff --git a/tests/engine/column_generators/generators/test_llm_completion_generators.py b/tests/engine/column_generators/generators/test_llm_completion_generators.py index ab398aed..3a411fc9 100644 --- a/tests/engine/column_generators/generators/test_llm_completion_generators.py +++ b/tests/engine/column_generators/generators/test_llm_completion_generators.py @@ -11,12 +11,10 @@ LLMStructuredColumnConfig, LLMTextColumnConfig, ) -from data_designer.engine.column_generators.generators.generation_mixins import ( +from data_designer.engine.column_generators.generators.llm_completion_generators import ( DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS, DEFAULT_MAX_CONVERSATION_RESTARTS, REASONING_TRACE_COLUMN_POSTFIX, -) -from data_designer.engine.column_generators.generators.llm_completion_generators import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, LLMStructuredCellGenerator, @@ -96,7 +94,7 @@ def test_generate_method(): assert call_args[1]["multi_modal_context"] is None -@patch("data_designer.engine.column_generators.generators.generation_mixins.logger", autospec=True) +@patch("data_designer.engine.column_generators.generators.base.logger", autospec=True) def test_log_pre_generation(mock_logger): generator, mock_resource_provider, _, mock_model_config, _, _, _ = _create_generator_with_mocks() mock_model_config.model_dump_json.return_value = '{"test": "config"}' From 5504c8dd1b4745e27c6590e2424cc4cb26a7944d Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 18:17:07 -0700 Subject: [PATCH 08/64] Added embedding generator --- src/data_designer/config/column_configs.py | 23 +++++++++++++ src/data_designer/config/column_types.py | 8 +++++ src/data_designer/config/models.py | 2 +- .../config/utils/visualization.py | 15 +++++++- .../generators/embedding_generators.py | 34 +++++++++++++++++++ .../engine/column_generators/registry.py | 3 ++ src/data_designer/engine/models/facade.py | 6 +++- src/data_designer/engine/models/registry.py | 29 ++++++++++------ src/data_designer/essentials/__init__.py | 4 +++ 9 files changed, 111 insertions(+), 13 deletions(-) create mode 100644 src/data_designer/engine/column_generators/generators/embedding_generators.py diff --git a/src/data_designer/config/column_configs.py b/src/data_designer/config/column_configs.py index d19b6a9e..c5468f19 100644 --- a/src/data_designer/config/column_configs.py +++ b/src/data_designer/config/column_configs.py @@ -377,3 +377,26 @@ class SeedDatasetColumnConfig(SingleColumnConfig): """ column_type: Literal["seed-dataset"] = "seed-dataset" + + +class EmbeddingColumnConfig(SingleColumnConfig): + """Configuration for embedding generation columns. + + Embedding columns generate embeddings for text input using a specified model. + + Attributes: + column_type: Discriminator field, always "embedding" for this configuration type. + target_column: The column to generate embeddings for. + model_alias: The model to use for embedding generation. + chunk_separator: Optional separator to split the text in the target column into chunks. For example, if chunk_separator + is '\n', the text will be split into chunks of text separated by newlines and embeddings generated for each chunk. + """ + + column_type: Literal["embedding"] = "embedding" + target_column: str + model_alias: str + chunk_separator: Optional[str] = None + + @property + def required_columns(self) -> list[str]: + return [self.target_column] diff --git a/src/data_designer/config/column_types.py b/src/data_designer/config/column_types.py index 50ba498d..aab55c4d 100644 --- a/src/data_designer/config/column_types.py +++ b/src/data_designer/config/column_types.py @@ -7,6 +7,7 @@ from ..plugin_manager import PluginManager from .column_configs import ( + EmbeddingColumnConfig, ExpressionColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, @@ -31,6 +32,7 @@ SamplerColumnConfig, SeedDatasetColumnConfig, ValidationColumnConfig, + EmbeddingColumnConfig, ] ColumnConfigT = plugin_manager.inject_into_column_config_type_union(ColumnConfigT) @@ -50,6 +52,7 @@ DataDesignerColumnType.SEED_DATASET: "🌱", DataDesignerColumnType.SAMPLER: "🎲", DataDesignerColumnType.VALIDATION: "πŸ”", + DataDesignerColumnType.EMBEDDING: "🧬", } COLUMN_TYPE_EMOJI_MAP.update( {DataDesignerColumnType(p.name): p.emoji for p in plugin_manager.get_column_generator_plugins()} @@ -66,6 +69,7 @@ def column_type_used_in_execution_dag(column_type: Union[str, DataDesignerColumn DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_TEXT, DataDesignerColumnType.VALIDATION, + DataDesignerColumnType.EMBEDDING, } dag_column_types.update(plugin_manager.get_plugin_column_types(DataDesignerColumnType)) return column_type in dag_column_types @@ -79,6 +83,7 @@ def column_type_is_llm_generated(column_type: Union[str, DataDesignerColumnType] DataDesignerColumnType.LLM_CODE, DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, + DataDesignerColumnType.EMBEDDING, } llm_generated_column_types.update( plugin_manager.get_plugin_column_types( @@ -117,6 +122,8 @@ def get_column_config_from_kwargs(name: str, column_type: DataDesignerColumnType return SamplerColumnConfig(name=name, **_resolve_sampler_kwargs(name, kwargs)) if column_type == DataDesignerColumnType.SEED_DATASET: return SeedDatasetColumnConfig(name=name, **kwargs) + if column_type == DataDesignerColumnType.EMBEDDING: + return EmbeddingColumnConfig(name=name, **kwargs) if plugin := plugin_manager.get_column_generator_plugin_if_exists(column_type.value): return plugin.config_cls(name=name, **kwargs) raise InvalidColumnTypeError(f"πŸ›‘ {column_type} is not a valid column type.") # pragma: no cover @@ -131,6 +138,7 @@ def get_column_display_order() -> list[DataDesignerColumnType]: DataDesignerColumnType.LLM_CODE, DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, + DataDesignerColumnType.EMBEDDING, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EXPRESSION, ] diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 17698346..481633ac 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -207,7 +207,7 @@ def _is_value_in_range(self, value: float, min_value: float, max_value: float) - class GenerationType(str, Enum): CHAT_COMPLETION = "chat-completion" - TEXT_EMBEDDING = "text-embedding" + EMBEDDING = "embedding" IMAGE_GENERATION = "image-generation" diff --git a/src/data_designer/config/utils/visualization.py b/src/data_designer/config/utils/visualization.py index 26ab4ad3..0972daf7 100644 --- a/src/data_designer/config/utils/visualization.py +++ b/src/data_designer/config/utils/visualization.py @@ -8,7 +8,7 @@ from functools import cached_property import json import os -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union import numpy as np import pandas as pd @@ -171,6 +171,7 @@ def display_sample_record( + config_builder.get_columns_of_type(DataDesignerColumnType.EXPRESSION) + config_builder.get_columns_of_type(DataDesignerColumnType.LLM_TEXT) + config_builder.get_columns_of_type(DataDesignerColumnType.LLM_STRUCTURED) + + config_builder.get_columns_of_type(DataDesignerColumnType.EMBEDDING) ) if len(non_code_columns) > 0: table = Table(title="Generated Columns", **table_kws) @@ -178,6 +179,10 @@ def display_sample_record( table.add_column("Value") for col in non_code_columns: if not col.drop: + if col.column_type == DataDesignerColumnType.EMBEDDING: + record[col.name]["embeddings"] = [ + get_truncated_list_as_string(embd) for embd in record[col.name].get("embeddings") + ] table.add_row(col.name, convert_to_row_element(record[col.name])) render_list.append(pad_console_element(table)) @@ -237,6 +242,14 @@ def display_sample_record( console.print(Group(*render_list), markup=False) +def get_truncated_list_as_string(long_list: list[Any], max_items: int = 2) -> str: + if len(long_list) > max_items: + truncated_part = long_list[:max_items] + return f"[{', '.join(str(x) for x in truncated_part)} ...]" + else: + return str(long_list) + + def display_sampler_table( sampler_params: dict[SamplerType, ConfigBase], title: Optional[str] = None, diff --git a/src/data_designer/engine/column_generators/generators/embedding_generators.py b/src/data_designer/engine/column_generators/generators/embedding_generators.py new file mode 100644 index 00000000..ec827805 --- /dev/null +++ b/src/data_designer/engine/column_generators/generators/embedding_generators.py @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from data_designer.config.column_configs import EmbeddingColumnConfig +from data_designer.engine.column_generators.generators.base import ( + ColumnGenerator, + GenerationStrategy, + GeneratorMetadata, + WithModelGeneration, +) +from data_designer.engine.processing.utils import deserialize_json_values + + +class EmbeddingCellGenerator(WithModelGeneration, ColumnGenerator[EmbeddingColumnConfig]): + @staticmethod + def metadata() -> GeneratorMetadata: + return GeneratorMetadata( + name="embedding_cell_generator", + description="Generate embeddings for a text column.", + generation_strategy=GenerationStrategy.CELL_BY_CELL, + required_resources=None, + ) + + def generate(self, data: dict) -> dict: + deserialized_record = deserialize_json_values(data) + input_text = deserialized_record[self.config.target_column] + input_chunks = input_text.split(self.config.chunk_separator) if self.config.chunk_separator else [input_text] + embeddings = self.model.generate_text_embeddings(input_texts=input_chunks) + data[self.config.name] = { + "embeddings": embeddings, + "num_embeddings": len(embeddings), + "dimension": len(embeddings[0]) if len(embeddings) > 0 else 0, + } + return data diff --git a/src/data_designer/engine/column_generators/registry.py b/src/data_designer/engine/column_generators/registry.py index 56a176ae..961eac1a 100644 --- a/src/data_designer/engine/column_generators/registry.py +++ b/src/data_designer/engine/column_generators/registry.py @@ -3,6 +3,7 @@ from data_designer.config.base import ConfigBase from data_designer.config.column_configs import ( + EmbeddingColumnConfig, ExpressionColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, @@ -12,6 +13,7 @@ ) from data_designer.config.column_types import DataDesignerColumnType from data_designer.engine.column_generators.generators.base import ColumnGenerator +from data_designer.engine.column_generators.generators.embedding_generators import EmbeddingCellGenerator from data_designer.engine.column_generators.generators.expression import ExpressionColumnGenerator from data_designer.engine.column_generators.generators.llm_completion_generators import ( LLMCodeCellGenerator, @@ -40,6 +42,7 @@ def create_default_column_generator_registry(with_plugins: bool = True) -> Colum registry.register(DataDesignerColumnType.LLM_CODE, LLMCodeCellGenerator, LLMCodeColumnConfig) registry.register(DataDesignerColumnType.LLM_JUDGE, LLMJudgeCellGenerator, LLMJudgeColumnConfig) registry.register(DataDesignerColumnType.EXPRESSION, ExpressionColumnGenerator, ExpressionColumnConfig) + registry.register(DataDesignerColumnType.EMBEDDING, EmbeddingCellGenerator, EmbeddingColumnConfig) registry.register(DataDesignerColumnType.SAMPLER, SamplerColumnGenerator, SamplerMultiColumnConfig) registry.register(DataDesignerColumnType.SEED_DATASET, SeedDatasetColumnGenerator, SeedDatasetMultiColumnConfig) registry.register(DataDesignerColumnType.VALIDATION, ValidationColumnGenerator, ValidationColumnConfig) diff --git a/src/data_designer/engine/models/facade.py b/src/data_designer/engine/models/facade.py index ea72d4c3..c205a4ca 100644 --- a/src/data_designer/engine/models/facade.py +++ b/src/data_designer/engine/models/facade.py @@ -11,7 +11,7 @@ from litellm.types.router import DeploymentTypedDict, LiteLLM_Params from litellm.types.utils import EmbeddingResponse, ModelResponse -from data_designer.config.models import ModelConfig, ModelProvider +from data_designer.config.models import GenerationType, ModelConfig, ModelProvider from data_designer.engine.model_provider import ModelProviderRegistry from data_designer.engine.models.errors import ( GenerationValidationFailureError, @@ -49,6 +49,10 @@ def model_name(self) -> str: def model_provider(self) -> ModelProvider: return self._model_provider_registry.get_provider(self._model_config.provider) + @property + def model_generation_type(self) -> GenerationType: + return self._model_config.generation_type + @property def model_provider_name(self) -> str: return self.model_provider.name diff --git a/src/data_designer/engine/models/registry.py b/src/data_designer/engine/models/registry.py index aafd8c80..4330ea18 100644 --- a/src/data_designer/engine/models/registry.py +++ b/src/data_designer/engine/models/registry.py @@ -5,7 +5,7 @@ import logging -from data_designer.config.models import ModelConfig +from data_designer.config.models import GenerationType, ModelConfig from data_designer.engine.model_provider import ModelProvider, ModelProviderRegistry from data_designer.engine.models.facade import ModelFacade from data_designer.engine.models.litellm_overrides import apply_litellm_patches @@ -81,15 +81,24 @@ def run_health_check(self, model_aliases: set[str]) -> None: f" |-- πŸ‘€ Checking {model.model_name!r} in provider named {model.model_provider_name!r} for model alias {model.model_alias!r}..." ) try: - model.generate( - prompt="Hello!", - parser=lambda x: x, - system_prompt="You are a helpful assistant.", - max_correction_steps=0, - max_conversation_restarts=0, - skip_usage_tracking=True, - purpose="running health checks", - ) + if model.model_generation_type == GenerationType.EMBEDDING: + model.generate_text_embeddings( + input_texts=["Hello!"], + skip_usage_tracking=True, + purpose="running health checks", + ) + elif model.model_generation_type == GenerationType.CHAT_COMPLETION: + model.generate( + prompt="Hello!", + parser=lambda x: x, + system_prompt="You are a helpful assistant.", + max_correction_steps=0, + max_conversation_restarts=0, + skip_usage_tracking=True, + purpose="running health checks", + ) + else: + raise ValueError(f"Unsupported generation type: {model.model_generation_type}") logger.info(" |-- βœ… Passed!") except Exception as e: logger.error(" |-- ❌ Failed!") diff --git a/src/data_designer/essentials/__init__.py b/src/data_designer/essentials/__init__.py index 8cd8eb92..ee43519c 100644 --- a/src/data_designer/essentials/__init__.py +++ b/src/data_designer/essentials/__init__.py @@ -6,6 +6,7 @@ from ..config.analysis.column_profilers import JudgeScoreProfilerConfig from ..config.column_configs import ( + EmbeddingColumnConfig, ExpressionColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, @@ -22,6 +23,7 @@ from ..config.dataset_builders import BuildStage from ..config.datastore import DatastoreSettings from ..config.models import ( + GenerationType, ImageContext, ImageFormat, InferenceParameters, @@ -91,8 +93,10 @@ "DatastoreSettings", "DatetimeSamplerParams", "DropColumnsProcessorConfig", + "EmbeddingColumnConfig", "ExpressionColumnConfig", "GaussianSamplerParams", + "GenerationType", "IndexRange", "InfoType", "ImageContext", From 4b6f877875fa93f718d31211323f9a34207630b7 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 18:20:57 -0700 Subject: [PATCH 09/64] chunk_separator -> chunk_pattern --- src/data_designer/config/column_configs.py | 7 ++++--- .../column_generators/generators/embedding_generators.py | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/data_designer/config/column_configs.py b/src/data_designer/config/column_configs.py index c5468f19..339be35d 100644 --- a/src/data_designer/config/column_configs.py +++ b/src/data_designer/config/column_configs.py @@ -388,14 +388,15 @@ class EmbeddingColumnConfig(SingleColumnConfig): column_type: Discriminator field, always "embedding" for this configuration type. target_column: The column to generate embeddings for. model_alias: The model to use for embedding generation. - chunk_separator: Optional separator to split the text in the target column into chunks. For example, if chunk_separator - is '\n', the text will be split into chunks of text separated by newlines and embeddings generated for each chunk. + chunk_pattern: Optional regex pattern to split the text in the target column into chunks. For example, if chunk_pattern + is r'\n+', the text will be split into chunks using one or more newlines as separators and embeddings generated for each chunk. + If not provided, the entire text will be embedded as a single chunk. """ column_type: Literal["embedding"] = "embedding" target_column: str model_alias: str - chunk_separator: Optional[str] = None + chunk_pattern: Optional[str] = None @property def required_columns(self) -> list[str]: diff --git a/src/data_designer/engine/column_generators/generators/embedding_generators.py b/src/data_designer/engine/column_generators/generators/embedding_generators.py index ec827805..ac791d4f 100644 --- a/src/data_designer/engine/column_generators/generators/embedding_generators.py +++ b/src/data_designer/engine/column_generators/generators/embedding_generators.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import re + from data_designer.config.column_configs import EmbeddingColumnConfig from data_designer.engine.column_generators.generators.base import ( ColumnGenerator, @@ -24,7 +26,7 @@ def metadata() -> GeneratorMetadata: def generate(self, data: dict) -> dict: deserialized_record = deserialize_json_values(data) input_text = deserialized_record[self.config.target_column] - input_chunks = input_text.split(self.config.chunk_separator) if self.config.chunk_separator else [input_text] + input_chunks = re.split(self.config.chunk_pattern, input_text) if self.config.chunk_pattern else [input_text] embeddings = self.model.generate_text_embeddings(input_texts=input_chunks) data[self.config.name] = { "embeddings": embeddings, From 04fc0f3645062f15b392b70cc64feea2e1d11cab Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 25 Nov 2025 18:22:49 -0700 Subject: [PATCH 10/64] update tests --- tests/config/test_columns.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/config/test_columns.py b/tests/config/test_columns.py index f0f5c51a..f7763b07 100644 --- a/tests/config/test_columns.py +++ b/tests/config/test_columns.py @@ -49,6 +49,7 @@ def test_data_designer_column_type_get_display_order(): DataDesignerColumnType.LLM_CODE, DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, + DataDesignerColumnType.EMBEDDING, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EXPRESSION, ] @@ -59,6 +60,7 @@ def test_data_designer_column_type_is_llm_generated(): assert column_type_is_llm_generated(DataDesignerColumnType.LLM_CODE) assert column_type_is_llm_generated(DataDesignerColumnType.LLM_STRUCTURED) assert column_type_is_llm_generated(DataDesignerColumnType.LLM_JUDGE) + assert column_type_is_llm_generated(DataDesignerColumnType.EMBEDDING) assert not column_type_is_llm_generated(DataDesignerColumnType.SAMPLER) assert not column_type_is_llm_generated(DataDesignerColumnType.VALIDATION) assert not column_type_is_llm_generated(DataDesignerColumnType.EXPRESSION) @@ -72,6 +74,7 @@ def test_data_designer_column_type_is_in_dag(): assert column_type_used_in_execution_dag(DataDesignerColumnType.LLM_STRUCTURED) assert column_type_used_in_execution_dag(DataDesignerColumnType.LLM_TEXT) assert column_type_used_in_execution_dag(DataDesignerColumnType.VALIDATION) + assert column_type_used_in_execution_dag(DataDesignerColumnType.EMBEDDING) assert not column_type_used_in_execution_dag(DataDesignerColumnType.SAMPLER) assert not column_type_used_in_execution_dag(DataDesignerColumnType.SEED_DATASET) From 26d6da1917326fbb57a6e88cf3392145a4f69362 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 26 Nov 2025 09:44:05 -0700 Subject: [PATCH 11/64] rename for consistency --- .../generators/{embedding_generators.py => embedding.py} | 3 ++- .../{llm_completion_generators.py => llm_completion.py} | 0 src/data_designer/engine/column_generators/registry.py | 4 ++-- .../engine/dataset_builders/column_wise_builder.py | 2 +- .../generators/test_llm_completion_generators.py | 2 +- tests/engine/column_generators/test_registry.py | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) rename src/data_designer/engine/column_generators/generators/{embedding_generators.py => embedding.py} (91%) rename src/data_designer/engine/column_generators/generators/{llm_completion_generators.py => llm_completion.py} (100%) diff --git a/src/data_designer/engine/column_generators/generators/embedding_generators.py b/src/data_designer/engine/column_generators/generators/embedding.py similarity index 91% rename from src/data_designer/engine/column_generators/generators/embedding_generators.py rename to src/data_designer/engine/column_generators/generators/embedding.py index ac791d4f..d9981ccd 100644 --- a/src/data_designer/engine/column_generators/generators/embedding_generators.py +++ b/src/data_designer/engine/column_generators/generators/embedding.py @@ -11,6 +11,7 @@ WithModelGeneration, ) from data_designer.engine.processing.utils import deserialize_json_values +from data_designer.engine.resources.resource_provider import ResourceType class EmbeddingCellGenerator(WithModelGeneration, ColumnGenerator[EmbeddingColumnConfig]): @@ -20,7 +21,7 @@ def metadata() -> GeneratorMetadata: name="embedding_cell_generator", description="Generate embeddings for a text column.", generation_strategy=GenerationStrategy.CELL_BY_CELL, - required_resources=None, + required_resources=[ResourceType.MODEL_REGISTRY], ) def generate(self, data: dict) -> dict: diff --git a/src/data_designer/engine/column_generators/generators/llm_completion_generators.py b/src/data_designer/engine/column_generators/generators/llm_completion.py similarity index 100% rename from src/data_designer/engine/column_generators/generators/llm_completion_generators.py rename to src/data_designer/engine/column_generators/generators/llm_completion.py diff --git a/src/data_designer/engine/column_generators/registry.py b/src/data_designer/engine/column_generators/registry.py index 961eac1a..7171e561 100644 --- a/src/data_designer/engine/column_generators/registry.py +++ b/src/data_designer/engine/column_generators/registry.py @@ -13,9 +13,9 @@ ) from data_designer.config.column_types import DataDesignerColumnType from data_designer.engine.column_generators.generators.base import ColumnGenerator -from data_designer.engine.column_generators.generators.embedding_generators import EmbeddingCellGenerator +from data_designer.engine.column_generators.generators.embedding import EmbeddingCellGenerator from data_designer.engine.column_generators.generators.expression import ExpressionColumnGenerator -from data_designer.engine.column_generators.generators.llm_completion_generators import ( +from data_designer.engine.column_generators.generators.llm_completion import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, LLMStructuredCellGenerator, diff --git a/src/data_designer/engine/dataset_builders/column_wise_builder.py b/src/data_designer/engine/dataset_builders/column_wise_builder.py index 78a5e9fa..ff9289ee 100644 --- a/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -18,7 +18,7 @@ ProcessorType, ) from data_designer.engine.column_generators.generators.base import ColumnGenerator, GenerationStrategy -from data_designer.engine.column_generators.generators.llm_completion_generators import WithCompletionGeneration +from data_designer.engine.column_generators.generators.llm_completion import WithCompletionGeneration from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError from data_designer.engine.dataset_builders.multi_column_configs import ( diff --git a/tests/engine/column_generators/generators/test_llm_completion_generators.py b/tests/engine/column_generators/generators/test_llm_completion_generators.py index 3a411fc9..0b787b7e 100644 --- a/tests/engine/column_generators/generators/test_llm_completion_generators.py +++ b/tests/engine/column_generators/generators/test_llm_completion_generators.py @@ -11,7 +11,7 @@ LLMStructuredColumnConfig, LLMTextColumnConfig, ) -from data_designer.engine.column_generators.generators.llm_completion_generators import ( +from data_designer.engine.column_generators.generators.llm_completion import ( DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS, DEFAULT_MAX_CONVERSATION_RESTARTS, REASONING_TRACE_COLUMN_POSTFIX, diff --git a/tests/engine/column_generators/test_registry.py b/tests/engine/column_generators/test_registry.py index 57457b94..0d325937 100644 --- a/tests/engine/column_generators/test_registry.py +++ b/tests/engine/column_generators/test_registry.py @@ -3,7 +3,7 @@ from data_designer.config.column_types import DataDesignerColumnType from data_designer.engine.column_generators.generators.expression import ExpressionColumnGenerator -from data_designer.engine.column_generators.generators.llm_completion_generators import ( +from data_designer.engine.column_generators.generators.llm_completion import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, LLMStructuredCellGenerator, From 6facbd2c8a710052fc76c3c33c3c451dca04c697 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 26 Nov 2025 11:04:00 -0700 Subject: [PATCH 12/64] Restructure InferenceParameters -> CompletionInferenceParameters, BaseInferenceParameters, EmbeddingInferenceParameters --- .../config/default_model_settings.py | 18 +++-- src/data_designer/config/models.py | 79 ++++++++++++++++--- src/data_designer/essentials/__init__.py | 6 +- tests/cli/conftest.py | 10 +-- .../cli/controllers/test_model_controller.py | 4 +- tests/cli/services/test_model_service.py | 10 ++- tests/config/test_config_builder.py | 12 +-- tests/config/test_default_model_settings.py | 8 +- tests/config/test_models.py | 64 ++++++++------- tests/conftest.py | 4 +- tests/engine/models/conftest.py | 6 +- tests/engine/models/test_model_registry.py | 6 +- tests/essentials/test_init.py | 12 +++ 13 files changed, 162 insertions(+), 77 deletions(-) diff --git a/src/data_designer/config/default_model_settings.py b/src/data_designer/config/default_model_settings.py index 33d6dad4..cb565178 100644 --- a/src/data_designer/config/default_model_settings.py +++ b/src/data_designer/config/default_model_settings.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Any, Literal, Optional -from .models import InferenceParameters, ModelConfig, ModelProvider +from .models import CompletionInferenceParameters, ModelConfig, ModelProvider from .utils.constants import ( MANAGED_ASSETS_PATH, MODEL_CONFIGS_FILE_PATH, @@ -21,28 +21,30 @@ logger = logging.getLogger(__name__) -def get_default_text_alias_inference_parameters() -> InferenceParameters: - return InferenceParameters( +def get_default_text_alias_inference_parameters() -> CompletionInferenceParameters: + return CompletionInferenceParameters( temperature=0.85, top_p=0.95, ) -def get_default_reasoning_alias_inference_parameters() -> InferenceParameters: - return InferenceParameters( +def get_default_reasoning_alias_inference_parameters() -> CompletionInferenceParameters: + return CompletionInferenceParameters( temperature=0.35, top_p=0.95, ) -def get_default_vision_alias_inference_parameters() -> InferenceParameters: - return InferenceParameters( +def get_default_vision_alias_inference_parameters() -> CompletionInferenceParameters: + return CompletionInferenceParameters( temperature=0.85, top_p=0.95, ) -def get_default_inference_parameters(model_alias: Literal["text", "reasoning", "vision"]) -> InferenceParameters: +def get_default_inference_parameters( + model_alias: Literal["text", "reasoning", "vision"], +) -> CompletionInferenceParameters: if model_alias == "reasoning": return get_default_reasoning_alias_inference_parameters() elif model_alias == "vision": diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 481633ac..1df7055e 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -5,7 +5,7 @@ from enum import Enum import logging from pathlib import Path -from typing import Any, Generic, List, Optional, TypeVar, Union +from typing import Any, Generic, List, Literal, Optional, TypeVar, Union import numpy as np from pydantic import BaseModel, Field, model_validator @@ -136,10 +136,7 @@ def sample(self) -> float: DistributionT: TypeAlias = Union[UniformDistribution, ManualDistribution] -class InferenceParameters(ConfigBase): - temperature: Optional[Union[float, DistributionT]] = None - top_p: Optional[Union[float, DistributionT]] = None - max_tokens: Optional[int] = Field(default=None, ge=1) +class BaseInferenceParameters(ConfigBase, ABC): max_parallel_requests: int = Field(default=4, ge=1) timeout: Optional[int] = Field(default=None, ge=1) extra_body: Optional[dict[str, Any]] = None @@ -147,6 +144,21 @@ class InferenceParameters(ConfigBase): @property def generate_kwargs(self) -> dict[str, Union[float, int]]: result = {} + if self.timeout is not None: + result["timeout"] = self.timeout + if self.extra_body is not None and self.extra_body != {}: + result["extra_body"] = self.extra_body + return result + + +class CompletionInferenceParameters(BaseInferenceParameters): + temperature: Optional[Union[float, DistributionT]] = None + top_p: Optional[Union[float, DistributionT]] = None + max_tokens: Optional[int] = Field(default=None, ge=1) + + @property + def generate_kwargs(self) -> dict[str, Union[float, int]]: + result = super().generate_kwargs if self.temperature is not None: result["temperature"] = ( self.temperature.sample() if hasattr(self.temperature, "sample") else self.temperature @@ -155,10 +167,6 @@ def generate_kwargs(self) -> dict[str, Union[float, int]]: result["top_p"] = self.top_p.sample() if hasattr(self.top_p, "sample") else self.top_p if self.max_tokens is not None: result["max_tokens"] = self.max_tokens - if self.timeout is not None: - result["timeout"] = self.timeout - if self.extra_body is not None and self.extra_body != {}: - result["extra_body"] = self.extra_body return result @model_validator(mode="after") @@ -205,6 +213,40 @@ def _is_value_in_range(self, value: float, min_value: float, max_value: float) - return min_value <= value <= max_value +# Maintain backwards compatibility with a deprecation warning +class InferenceParameters(CompletionInferenceParameters): + """ + Deprecated: Use CompletionInferenceParameters instead. + This alias will be removed in a future version. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + logger.warning( + "InferenceParameters is deprecated and will be removed in a future version. " + "Use CompletionInferenceParameters instead." + ) + super().__init__(*args, **kwargs) + + +class EmbeddingInferenceParameters(BaseInferenceParameters): + encoding_format: Optional[Literal["float", "base64"]] = "float" + dimensions: Optional[int] = None + + @property + def generate_kwargs(self) -> dict[str, Union[float, int]]: + result = super().generate_kwargs + if self.encoding_format is not None: + result["encoding_format"] = self.encoding_format + if self.dimensions is not None: + result["dimensions"] = self.dimensions + return result + + +InferenceParametersT: TypeAlias = Union[ + InferenceParameters, CompletionInferenceParameters, EmbeddingInferenceParameters +] + + class GenerationType(str, Enum): CHAT_COMPLETION = "chat-completion" EMBEDDING = "embedding" @@ -214,10 +256,25 @@ class GenerationType(str, Enum): class ModelConfig(ConfigBase): alias: str model: str - inference_parameters: InferenceParameters = Field(default_factory=InferenceParameters) - generation_type: GenerationType = GenerationType.CHAT_COMPLETION + inference_parameters: InferenceParametersT = Field(default_factory=CompletionInferenceParameters) provider: Optional[str] = None + @model_validator(mode="after") + def _normalize_deprecated_inference_parameters(self) -> Self: + """Normalize deprecated InferenceParameters to CompletionInferenceParameters.""" + if isinstance(self.inference_parameters, InferenceParameters): + self.inference_parameters = CompletionInferenceParameters(**self.inference_parameters.model_dump()) + return self + + @property + def generation_type(self) -> GenerationType: + if isinstance(self.inference_parameters, CompletionInferenceParameters): + return GenerationType.CHAT_COMPLETION + elif isinstance(self.inference_parameters, EmbeddingInferenceParameters): + return GenerationType.EMBEDDING + else: + raise ValueError(f"Unsupported inference parameters type: {type(self.inference_parameters)}") + class ModelProvider(ConfigBase): name: str diff --git a/src/data_designer/essentials/__init__.py b/src/data_designer/essentials/__init__.py index ee43519c..cd1dd6ba 100644 --- a/src/data_designer/essentials/__init__.py +++ b/src/data_designer/essentials/__init__.py @@ -23,6 +23,8 @@ from ..config.dataset_builders import BuildStage from ..config.datastore import DatastoreSettings from ..config.models import ( + CompletionInferenceParameters, + EmbeddingInferenceParameters, GenerationType, ImageContext, ImageFormat, @@ -80,20 +82,22 @@ "BernoulliMixtureSamplerParams", "BernoulliSamplerParams", "BinomialSamplerParams", + "BuildStage", "CategorySamplerParams", "CodeLang", "CodeValidatorParams", "ColumnInequalityConstraint", + "CompletionInferenceParameters", "configure_logging", "DataDesignerColumnType", "DataDesignerConfig", "DataDesignerConfigBuilder", - "BuildStage", "DatastoreSeedDatasetReference", "DatastoreSettings", "DatetimeSamplerParams", "DropColumnsProcessorConfig", "EmbeddingColumnConfig", + "EmbeddingInferenceParameters", "ExpressionColumnConfig", "GaussianSamplerParams", "GenerationType", diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index 758e837e..66a06347 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -9,16 +9,16 @@ from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository from data_designer.cli.services.model_service import ModelService from data_designer.cli.services.provider_service import ProviderService -from data_designer.config.models import InferenceParameters, ModelConfig, ModelProvider +from data_designer.config.models import CompletionInferenceParameters, ModelConfig, ModelProvider @pytest.fixture -def stub_inference_parameters() -> InferenceParameters: - return InferenceParameters(temperature=0.7, top_p=0.9, max_tokens=2048, max_parallel_requests=4) +def stub_inference_parameters() -> CompletionInferenceParameters: + return CompletionInferenceParameters(temperature=0.7, top_p=0.9, max_tokens=2048, max_parallel_requests=4) @pytest.fixture -def stub_model_configs(stub_inference_parameters: InferenceParameters) -> list[ModelConfig]: +def stub_model_configs(stub_inference_parameters: CompletionInferenceParameters) -> list[ModelConfig]: return [ ModelConfig( alias="test-alias-1", @@ -41,7 +41,7 @@ def stub_new_model_config() -> ModelConfig: alias="test-alias-3", model="test-model-3", provider="test-provider-1", - inference_parameters=InferenceParameters( + inference_parameters=CompletionInferenceParameters( temperature=0.7, top_p=0.9, max_tokens=2048, diff --git a/tests/cli/controllers/test_model_controller.py b/tests/cli/controllers/test_model_controller.py index b630b04a..4f718ca4 100644 --- a/tests/cli/controllers/test_model_controller.py +++ b/tests/cli/controllers/test_model_controller.py @@ -9,7 +9,7 @@ from data_designer.cli.controllers.model_controller import ModelController from data_designer.cli.repositories.model_repository import ModelConfigRegistry from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository -from data_designer.config.models import InferenceParameters, ModelConfig +from data_designer.config.models import CompletionInferenceParameters, ModelConfig @pytest.fixture @@ -141,7 +141,7 @@ def test_run_updates_model( alias="test-alias-1-updated", model="test-model-1-updated", provider="test-provider-1", - inference_parameters=InferenceParameters(temperature=0.8, top_p=0.95, max_tokens=1024), + inference_parameters=CompletionInferenceParameters(temperature=0.8, top_p=0.95, max_tokens=1024), ) mock_builder = MagicMock() diff --git a/tests/cli/services/test_model_service.py b/tests/cli/services/test_model_service.py index 1d9bf5aa..4287eee8 100644 --- a/tests/cli/services/test_model_service.py +++ b/tests/cli/services/test_model_service.py @@ -7,7 +7,7 @@ from data_designer.cli.repositories.model_repository import ModelRepository from data_designer.cli.services.model_service import ModelService -from data_designer.config.models import InferenceParameters, ModelConfig +from data_designer.config.models import CompletionInferenceParameters, ModelConfig def test_list_all(stub_model_service: ModelService, stub_model_configs: list[ModelConfig]): @@ -30,7 +30,9 @@ def test_add( assert stub_model_service.list_all() == stub_model_configs + [stub_new_model_config] -def test_add_duplicate_alias(stub_model_service: ModelService, stub_inference_parameters: InferenceParameters): +def test_add_duplicate_alias( + stub_model_service: ModelService, stub_inference_parameters: CompletionInferenceParameters +): """Test adding a model with an alias that already exists.""" duplicate_model = ModelConfig( alias="test-alias-1", @@ -61,7 +63,9 @@ def test_update_nonexistent_model(stub_model_service: ModelService, stub_new_mod stub_model_service.update("nonexistent", stub_new_model_config) -def test_update_to_existing_alias(stub_model_service: ModelService, stub_inference_parameters: InferenceParameters): +def test_update_to_existing_alias( + stub_model_service: ModelService, stub_inference_parameters: CompletionInferenceParameters +): """Test updating a model to an alias that already exists.""" updated_model = ModelConfig( alias="test-alias-2", # Already exists diff --git a/tests/config/test_config_builder.py b/tests/config/test_config_builder.py index aab8112a..57741e59 100644 --- a/tests/config/test_config_builder.py +++ b/tests/config/test_config_builder.py @@ -26,7 +26,7 @@ from data_designer.config.data_designer_config import DataDesignerConfig from data_designer.config.datastore import DatastoreSettings from data_designer.config.errors import BuilderConfigurationError, InvalidColumnTypeError, InvalidConfigError -from data_designer.config.models import InferenceParameters, ModelConfig +from data_designer.config.models import CompletionInferenceParameters, ModelConfig from data_designer.config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint from data_designer.config.sampler_params import SamplerType, UUIDSamplerParams from data_designer.config.seed import DatastoreSeedDatasetReference, SamplingStrategy @@ -670,7 +670,7 @@ def test_add_model_config(stub_empty_builder): new_model_config = ModelConfig( alias="new-model", model="openai/gpt-4", - inference_parameters=InferenceParameters( + inference_parameters=CompletionInferenceParameters( temperature=0.7, top_p=0.95, max_tokens=1024, @@ -691,7 +691,7 @@ def test_add_model_config(stub_empty_builder): alias="provider-model", model="anthropic/claude-3", provider="anthropic", - inference_parameters=InferenceParameters(temperature=0.8), + inference_parameters=CompletionInferenceParameters(temperature=0.8), ) stub_empty_builder.add_model_config(provider_model_config) @@ -717,7 +717,7 @@ def test_add_model_config_duplicate_alias(stub_empty_builder): duplicate_model_config = ModelConfig( alias="stub-model", model="different/model", - inference_parameters=InferenceParameters(temperature=0.5), + inference_parameters=CompletionInferenceParameters(temperature=0.5), ) with pytest.raises( @@ -733,12 +733,12 @@ def test_delete_model_config(stub_empty_builder): model_config_1 = ModelConfig( alias="model-to-delete", model="model/delete", - inference_parameters=InferenceParameters(temperature=0.5), + inference_parameters=CompletionInferenceParameters(temperature=0.5), ) model_config_2 = ModelConfig( alias="model-to-keep", model="model/keep", - inference_parameters=InferenceParameters(temperature=0.6), + inference_parameters=CompletionInferenceParameters(temperature=0.6), ) stub_empty_builder.add_model_config(model_config_1) stub_empty_builder.add_model_config(model_config_2) diff --git a/tests/config/test_default_model_settings.py b/tests/config/test_default_model_settings.py index 222bb410..8f389a69 100644 --- a/tests/config/test_default_model_settings.py +++ b/tests/config/test_default_model_settings.py @@ -18,20 +18,20 @@ get_default_providers, resolve_seed_default_model_settings, ) -from data_designer.config.models import InferenceParameters +from data_designer.config.models import CompletionInferenceParameters from data_designer.config.utils.visualization import get_nvidia_api_key, get_openai_api_key def test_get_default_inference_parameters(): - assert get_default_inference_parameters("text") == InferenceParameters( + assert get_default_inference_parameters("text") == CompletionInferenceParameters( temperature=0.85, top_p=0.95, ) - assert get_default_inference_parameters("reasoning") == InferenceParameters( + assert get_default_inference_parameters("reasoning") == CompletionInferenceParameters( temperature=0.35, top_p=0.95, ) - assert get_default_inference_parameters("vision") == InferenceParameters( + assert get_default_inference_parameters("vision") == CompletionInferenceParameters( temperature=0.85, top_p=0.95, ) diff --git a/tests/config/test_models.py b/tests/config/test_models.py index 6a3d7b25..f1f65401 100644 --- a/tests/config/test_models.py +++ b/tests/config/test_models.py @@ -11,9 +11,9 @@ from data_designer.config.errors import InvalidConfigError from data_designer.config.models import ( + CompletionInferenceParameters, ImageContext, ImageFormat, - InferenceParameters, ManualDistribution, ManualDistributionParams, ModalityDataType, @@ -46,13 +46,13 @@ def test_image_context_validate_image_format(): def test_inference_parameters_default_construction(): - empty_inference_parameters = InferenceParameters() + empty_inference_parameters = CompletionInferenceParameters() assert empty_inference_parameters.generate_kwargs == {} assert empty_inference_parameters.max_parallel_requests == 4 def test_inference_parameters_generate_kwargs(): - assert InferenceParameters( + assert CompletionInferenceParameters( temperature=0.95, top_p=0.95, max_tokens=100, @@ -67,9 +67,9 @@ def test_inference_parameters_generate_kwargs(): "extra_body": {"reasoning_effort": "high"}, } - assert InferenceParameters().generate_kwargs == {} + assert CompletionInferenceParameters().generate_kwargs == {} - inference_parameters_kwargs = InferenceParameters( + inference_parameters_kwargs = CompletionInferenceParameters( temperature=UniformDistribution(params=UniformDistributionParams(low=0.0, high=1.0)), top_p=ManualDistribution(params=ManualDistributionParams(values=[0.0, 1.0], weights=[0.5, 0.5])), ).generate_kwargs @@ -131,32 +131,38 @@ def test_inference_parameters_temperature_validation(): # All temp values provide in a manual destribution should be valid with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters( + CompletionInferenceParameters( temperature=ManualDistribution(params=ManualDistributionParams(values=[0.5, 2.5], weights=[0.5, 0.5])) ) # High and low values of uniform distribution should be valid with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(temperature=UniformDistribution(params=UniformDistributionParams(low=0.5, high=2.5))) + CompletionInferenceParameters( + temperature=UniformDistribution(params=UniformDistributionParams(low=0.5, high=2.5)) + ) with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(temperature=UniformDistribution(params=UniformDistributionParams(low=-0.5, high=2.0))) + CompletionInferenceParameters( + temperature=UniformDistribution(params=UniformDistributionParams(low=-0.5, high=2.0)) + ) # Static values should be valid with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(temperature=3.0) + CompletionInferenceParameters(temperature=3.0) with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(temperature=-1.0) + CompletionInferenceParameters(temperature=-1.0) # Valid temperature values shouldn't raise validation errors try: - InferenceParameters(temperature=0.1) - InferenceParameters(temperature=UniformDistribution(params=UniformDistributionParams(low=0.5, high=2.0))) - InferenceParameters( + CompletionInferenceParameters(temperature=0.1) + CompletionInferenceParameters( + temperature=UniformDistribution(params=UniformDistributionParams(low=0.5, high=2.0)) + ) + CompletionInferenceParameters( temperature=ManualDistribution(params=ManualDistributionParams(values=[0.5, 2.0], weights=[0.5, 0.5])) ) except Exception: - pytest.fail("Unexpected exception raised during InferenceParameters temperature validation") + pytest.fail("Unexpected exception raised during CompletionInferenceParameters temperature validation") def test_generation_parameters_top_p_validation(): @@ -164,31 +170,31 @@ def test_generation_parameters_top_p_validation(): # All top_p values provide in a manual destribution should be valid with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters( + CompletionInferenceParameters( top_p=ManualDistribution(params=ManualDistributionParams(values=[0.5, 1.5], weights=[0.5, 0.5])) ) # High and low values of uniform distribution should be valid with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(top_p=UniformDistribution(params=UniformDistributionParams(low=0.5, high=1.5))) + CompletionInferenceParameters(top_p=UniformDistribution(params=UniformDistributionParams(low=0.5, high=1.5))) with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(top_p=UniformDistribution(params=UniformDistributionParams(low=-0.5, high=1.0))) + CompletionInferenceParameters(top_p=UniformDistribution(params=UniformDistributionParams(low=-0.5, high=1.0))) # Static values should be valid with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(top_p=1.5) + CompletionInferenceParameters(top_p=1.5) with pytest.raises(ValidationError, match=expected_error_msg): - InferenceParameters(top_p=-0.1) + CompletionInferenceParameters(top_p=-0.1) # Valid top_p values shouldn't raise validation errors try: - InferenceParameters(top_p=0.1) - InferenceParameters(top_p=UniformDistribution(params=UniformDistributionParams(low=0.5, high=1.0))) - InferenceParameters( + CompletionInferenceParameters(top_p=0.1) + CompletionInferenceParameters(top_p=UniformDistribution(params=UniformDistributionParams(low=0.5, high=1.0))) + CompletionInferenceParameters( top_p=ManualDistribution(params=ManualDistributionParams(values=[0.5, 1.0], weights=[0.5, 0.5])) ) except Exception: - pytest.fail("Unexpected exception raised during InferenceParameters top_p validation") + pytest.fail("Unexpected exception raised during CompletionInferenceParameters top_p validation") def test_generation_parameters_max_tokens_validation(): @@ -196,15 +202,15 @@ def test_generation_parameters_max_tokens_validation(): ValidationError, match="Input should be greater than or equal to 1", ): - InferenceParameters(max_tokens=0) + CompletionInferenceParameters(max_tokens=0) # Valid max_tokens values shouldn't raise validation errors try: - InferenceParameters(max_tokens=128_000) - InferenceParameters(max_tokens=4096) - InferenceParameters(max_tokens=1) + CompletionInferenceParameters(max_tokens=128_000) + CompletionInferenceParameters(max_tokens=4096) + CompletionInferenceParameters(max_tokens=1) except Exception: - pytest.fail("Unexpected exception raised during InferenceParameters max_tokens validation") + pytest.fail("Unexpected exception raised during CompletionInferenceParameters max_tokens validation") def test_load_model_configs(): @@ -250,4 +256,4 @@ def test_load_model_configs(): def test_model_config_default_construction(): model_config = ModelConfig(alias="test", model="test") - assert model_config.inference_parameters == InferenceParameters() + assert model_config.inference_parameters == CompletionInferenceParameters() diff --git a/tests/conftest.py b/tests/conftest.py index 31dc0057..46b5d318 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,7 +17,7 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.data_designer_config import DataDesignerConfig from data_designer.config.datastore import DatastoreSettings -from data_designer.config.models import InferenceParameters, ModelConfig, ModelProvider +from data_designer.config.models import CompletionInferenceParameters, ModelConfig, ModelProvider @pytest.fixture @@ -135,7 +135,7 @@ def stub_model_configs() -> list[ModelConfig]: ModelConfig( alias="stub-model", model="stub-model", - inference_parameters=InferenceParameters( + inference_parameters=CompletionInferenceParameters( temperature=0.9, top_p=0.9, max_tokens=2048, diff --git a/tests/engine/models/conftest.py b/tests/engine/models/conftest.py index 95e6941f..7edcd073 100644 --- a/tests/engine/models/conftest.py +++ b/tests/engine/models/conftest.py @@ -5,7 +5,7 @@ import pytest -from data_designer.config.models import InferenceParameters, ModelConfig +from data_designer.config.models import CompletionInferenceParameters, ModelConfig from data_designer.engine.model_provider import ModelProvider, ModelProviderRegistry from data_designer.engine.models.registry import ModelRegistry, create_model_registry from data_designer.engine.secret_resolver import SecretsFileResolver @@ -38,7 +38,7 @@ def stub_model_configs() -> list[ModelConfig]: alias="stub-text", model="stub-model-text", provider="stub-model-provider", - inference_parameters=InferenceParameters( + inference_parameters=CompletionInferenceParameters( temperature=0.80, top_p=0.95, max_tokens=100, max_parallel_requests=10, timeout=100 ), ), @@ -46,7 +46,7 @@ def stub_model_configs() -> list[ModelConfig]: alias="stub-reasoning", model="stub-model-reasoning", provider="stub-model-provider", - inference_parameters=InferenceParameters( + inference_parameters=CompletionInferenceParameters( temperature=0.80, top_p=0.95, max_tokens=100, max_parallel_requests=10, timeout=100 ), ), diff --git a/tests/engine/models/test_model_registry.py b/tests/engine/models/test_model_registry.py index 571b9605..83e3b650 100644 --- a/tests/engine/models/test_model_registry.py +++ b/tests/engine/models/test_model_registry.py @@ -6,7 +6,7 @@ from litellm import AuthenticationError import pytest -from data_designer.config.models import InferenceParameters, ModelConfig +from data_designer.config.models import CompletionInferenceParameters, ModelConfig from data_designer.engine.models.errors import ModelAuthenticationError from data_designer.engine.models.facade import ModelFacade from data_designer.engine.models.registry import ModelRegistry, create_model_registry @@ -24,7 +24,7 @@ def stub_new_model_config(): alias="stub-vision", model="stub-model-vision", provider="stub-model-provider", - inference_parameters=InferenceParameters( + inference_parameters=CompletionInferenceParameters( temperature=0.80, top_p=0.95, max_tokens=100, max_parallel_requests=10, timeout=100 ), ) @@ -36,7 +36,7 @@ def stub_no_usage_config(): alias="no-usage", model="no-usage-model", provider="stub-model-provider", - inference_parameters=InferenceParameters(), + inference_parameters=CompletionInferenceParameters(), ) diff --git a/tests/essentials/test_init.py b/tests/essentials/test_init.py index 89f8388a..d810bba3 100644 --- a/tests/essentials/test_init.py +++ b/tests/essentials/test_init.py @@ -17,14 +17,17 @@ CodeLang, CodeValidatorParams, ColumnInequalityConstraint, + CompletionInferenceParameters, DataDesignerColumnType, DataDesignerConfig, DataDesignerConfigBuilder, DatastoreSeedDatasetReference, DatastoreSettings, DatetimeSamplerParams, + EmbeddingInferenceParameters, ExpressionColumnConfig, GaussianSamplerParams, + GenerationType, ImageContext, ImageFormat, InferenceParameters, @@ -109,6 +112,9 @@ def test_model_config_imports(): assert ImageContext is not None assert ImageFormat is not None assert InferenceParameters is not None + assert CompletionInferenceParameters is not None + assert EmbeddingInferenceParameters is not None + assert GenerationType is not None assert ManualDistribution is not None assert ManualDistributionParams is not None assert Modality is not None @@ -232,6 +238,7 @@ def test_all_contains_column_configs(): assert "Score" in __all__ assert "SeedDatasetColumnConfig" in __all__ assert "ValidationColumnConfig" in __all__ + assert "EmbeddingColumnConfig" in __all__ def test_all_contains_sampler_params(): @@ -250,6 +257,8 @@ def test_all_contains_sampler_params(): assert "TimeDeltaSamplerParams" in __all__ assert "UniformSamplerParams" in __all__ assert "UUIDSamplerParams" in __all__ + assert "PersonFromFakerSamplerParams" in __all__ + assert "ProcessorType" in __all__ def test_all_contains_constraints(): @@ -263,6 +272,9 @@ def test_all_contains_model_configs(): assert "ImageContext" in __all__ assert "ImageFormat" in __all__ assert "InferenceParameters" in __all__ + assert "CompletionInferenceParameters" in __all__ + assert "EmbeddingInferenceParameters" in __all__ + assert "GenerationType" in __all__ assert "ManualDistribution" in __all__ assert "ManualDistributionParams" in __all__ assert "Modality" in __all__ From 2c1b2676fe0234016a7e13fe57171da2295eaf7c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 26 Nov 2025 13:04:18 -0700 Subject: [PATCH 13/64] Remove purpose from consolidated kwargs --- src/data_designer/config/models.py | 2 +- .../engine/column_generators/generators/embedding.py | 1 + src/data_designer/engine/models/facade.py | 2 ++ tests/engine/models/test_facade.py | 8 ++++---- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 1df7055e..7b129556 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -229,7 +229,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class EmbeddingInferenceParameters(BaseInferenceParameters): - encoding_format: Optional[Literal["float", "base64"]] = "float" + encoding_format: Optional[Literal["float", "base64"]] = None dimensions: Optional[int] = None @property diff --git a/src/data_designer/engine/column_generators/generators/embedding.py b/src/data_designer/engine/column_generators/generators/embedding.py index d9981ccd..48fc309f 100644 --- a/src/data_designer/engine/column_generators/generators/embedding.py +++ b/src/data_designer/engine/column_generators/generators/embedding.py @@ -28,6 +28,7 @@ def generate(self, data: dict) -> dict: deserialized_record = deserialize_json_values(data) input_text = deserialized_record[self.config.target_column] input_chunks = re.split(self.config.chunk_pattern, input_text) if self.config.chunk_pattern else [input_text] + input_chunks = [chunk.strip() for chunk in input_chunks if chunk.strip()] embeddings = self.model.generate_text_embeddings(input_texts=input_chunks) data[self.config.name] = { "embeddings": embeddings, diff --git a/src/data_designer/engine/models/facade.py b/src/data_designer/engine/models/facade.py index c205a4ca..6b98c0a7 100644 --- a/src/data_designer/engine/models/facade.py +++ b/src/data_designer/engine/models/facade.py @@ -91,6 +91,8 @@ def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = self._track_usage(response) def consolidate_kwargs(self, **kwargs) -> dict[str, Any]: + # Remove purpose from kwargs to avoid passing it to the model + kwargs.pop("purpose", None) kwargs = {**self._model_config.inference_parameters.generate_kwargs, **kwargs} if self.model_provider.extra_body: kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} diff --git a/tests/engine/models/test_facade.py b/tests/engine/models/test_facade.py index afe27730..8765d0ab 100644 --- a/tests/engine/models/test_facade.py +++ b/tests/engine/models/test_facade.py @@ -116,17 +116,17 @@ def test_usage_stats_property(stub_model_facade): def test_consolidate_kwargs(stub_model_configs, stub_model_facade): - # Model config generate kwargs are used as base - result = stub_model_facade.consolidate_kwargs() + # Model config generate kwargs are used as base, and purpose is removed + result = stub_model_facade.consolidate_kwargs(purpose="test") assert result == stub_model_configs[0].inference_parameters.generate_kwargs # kwargs overrides model config generate kwargs - result = stub_model_facade.consolidate_kwargs(temperature=0.01) + result = stub_model_facade.consolidate_kwargs(temperature=0.01, purpose="test") assert result == {**stub_model_configs[0].inference_parameters.generate_kwargs, "temperature": 0.01} # Provider extra_body overrides all other kwargs stub_model_facade.model_provider.extra_body = {"foo_provider": "bar_provider"} - result = stub_model_facade.consolidate_kwargs(extra_body={"foo": "bar"}) + result = stub_model_facade.consolidate_kwargs(extra_body={"foo": "bar"}, purpose="test") assert result == { **stub_model_configs[0].inference_parameters.generate_kwargs, "extra_body": {"foo_provider": "bar_provider", "foo": "bar"}, From 4b1492baf805adc0719d73857b1f19a219f49375 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 2 Dec 2025 11:38:33 -0700 Subject: [PATCH 14/64] WithModelConfiguration.inference_parameters should should be typed with BaseInferenceParameters --- src/data_designer/engine/column_generators/generators/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_designer/engine/column_generators/generators/base.py b/src/data_designer/engine/column_generators/generators/base.py index 8977a63b..580c96a6 100644 --- a/src/data_designer/engine/column_generators/generators/base.py +++ b/src/data_designer/engine/column_generators/generators/base.py @@ -9,7 +9,7 @@ import pandas as pd from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP -from data_designer.config.models import InferenceParameters, ModelConfig +from data_designer.config.models import BaseInferenceParameters, ModelConfig from data_designer.config.utils.type_helpers import StrEnum from data_designer.engine.column_generators.utils.prompt_renderer import ( RecordBasedPromptRenderer, @@ -81,7 +81,7 @@ def model_config(self) -> ModelConfig: return self.resource_provider.model_registry.get_model_config(model_alias=self.config.model_alias) @functools.cached_property - def inference_parameters(self) -> InferenceParameters: + def inference_parameters(self) -> BaseInferenceParameters: return self.model_config.inference_parameters @functools.cached_property From c445caf53f213a54b80b3df71a0c00334ccf519b Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 2 Dec 2025 14:37:07 -0700 Subject: [PATCH 15/64] Type as WithModelGeneration --- .../engine/dataset_builders/column_wise_builder.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/data_designer/engine/dataset_builders/column_wise_builder.py b/src/data_designer/engine/dataset_builders/column_wise_builder.py index ff9289ee..2e30407c 100644 --- a/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -17,8 +17,11 @@ ProcessorConfig, ProcessorType, ) -from data_designer.engine.column_generators.generators.base import ColumnGenerator, GenerationStrategy -from data_designer.engine.column_generators.generators.llm_completion import WithCompletionGeneration +from data_designer.engine.column_generators.generators.base import ( + ColumnGenerator, + GenerationStrategy, + WithModelGeneration, +) from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError from data_designer.engine.dataset_builders.multi_column_configs import ( @@ -169,7 +172,7 @@ def _run_from_scratch_column_generator(self, generator: ColumnGenerator) -> None def _run_cell_by_cell_generator(self, generator: ColumnGenerator) -> None: max_workers = MAX_CONCURRENCY_PER_NON_LLM_GENERATOR - if isinstance(generator, WithCompletionGeneration): + if isinstance(generator, WithModelGeneration): max_workers = generator.inference_parameters.max_parallel_requests self._fan_out_with_threads(generator, max_workers=max_workers) @@ -183,7 +186,7 @@ def _run_model_health_check_if_needed(self) -> bool: set(config.model_alias for config in self.llm_generated_column_configs) ) - def _fan_out_with_threads(self, generator: WithCompletionGeneration, max_workers: int) -> None: + def _fan_out_with_threads(self, generator: WithModelGeneration, max_workers: int) -> None: if generator.generation_strategy != GenerationStrategy.CELL_BY_CELL: raise DatasetGenerationError( f"Generator {generator.metadata().name} is not a {GenerationStrategy.CELL_BY_CELL} " From 4b8aa2bf9258c1a1fc3be10ff1d817ae797ed2d7 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 2 Dec 2025 16:06:52 -0700 Subject: [PATCH 16/64] Add image generation modality --- src/data_designer/config/column_configs.py | 40 +++++++++++++++ src/data_designer/config/column_types.py | 8 +++ src/data_designer/config/models.py | 20 +++++++- .../column_generators/generators/base.py | 14 ------ .../column_generators/generators/image.py | 49 +++++++++++++++++++ .../generators/llm_completion.py | 12 +++++ .../engine/column_generators/registry.py | 4 +- src/data_designer/engine/models/facade.py | 36 +++++++++++++- src/data_designer/engine/models/registry.py | 6 +++ src/data_designer/essentials/__init__.py | 4 ++ 10 files changed, 176 insertions(+), 17 deletions(-) create mode 100644 src/data_designer/engine/column_generators/generators/image.py diff --git a/src/data_designer/config/column_configs.py b/src/data_designer/config/column_configs.py index 339be35d..eb93f9f0 100644 --- a/src/data_designer/config/column_configs.py +++ b/src/data_designer/config/column_configs.py @@ -401,3 +401,43 @@ class EmbeddingColumnConfig(SingleColumnConfig): @property def required_columns(self) -> list[str]: return [self.target_column] + + +class ImageGenerationColumnConfig(SingleColumnConfig): + """Configuration for image generation columns. + + Image columns generate images using a specified model. + + Attributes: + column_type: Discriminator field, always "image-generation" for this configuration type. + prompt: Prompt template for image generation. Supports Jinja2 templating to + reference other columns (e.g., "Generate an image of a {{ character_name }}"). + Must be a valid Jinja2 template. + model_alias: The model to use for image generation. + """ + + column_type: Literal["image-generation"] = "image-generation" + prompt: str + model_alias: str + + @property + def required_columns(self) -> list[str]: + """Get columns referenced in the prompt template. + + Returns: + List of unique column names referenced in Jinja2 templates. + """ + return list(get_prompt_template_keywords(self.prompt)) + + @model_validator(mode="after") + def assert_prompt_valid_jinja(self) -> Self: + """Validate that prompt is a valid Jinja2 template. + + Returns: + The validated instance. + + Raises: + InvalidConfigError: If prompt contains invalid Jinja2 syntax. + """ + assert_valid_jinja2_template(self.prompt) + return self diff --git a/src/data_designer/config/column_types.py b/src/data_designer/config/column_types.py index aab55c4d..efdeb094 100644 --- a/src/data_designer/config/column_types.py +++ b/src/data_designer/config/column_types.py @@ -9,6 +9,7 @@ from .column_configs import ( EmbeddingColumnConfig, ExpressionColumnConfig, + ImageGenerationColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -33,6 +34,7 @@ SeedDatasetColumnConfig, ValidationColumnConfig, EmbeddingColumnConfig, + ImageGenerationColumnConfig, ] ColumnConfigT = plugin_manager.inject_into_column_config_type_union(ColumnConfigT) @@ -53,6 +55,7 @@ DataDesignerColumnType.SAMPLER: "🎲", DataDesignerColumnType.VALIDATION: "πŸ”", DataDesignerColumnType.EMBEDDING: "🧬", + DataDesignerColumnType.IMAGE_GENERATION: "πŸ–ΌοΈ", } COLUMN_TYPE_EMOJI_MAP.update( {DataDesignerColumnType(p.name): p.emoji for p in plugin_manager.get_column_generator_plugins()} @@ -70,6 +73,7 @@ def column_type_used_in_execution_dag(column_type: Union[str, DataDesignerColumn DataDesignerColumnType.LLM_TEXT, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EMBEDDING, + DataDesignerColumnType.IMAGE_GENERATION, } dag_column_types.update(plugin_manager.get_plugin_column_types(DataDesignerColumnType)) return column_type in dag_column_types @@ -84,6 +88,7 @@ def column_type_is_llm_generated(column_type: Union[str, DataDesignerColumnType] DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, DataDesignerColumnType.EMBEDDING, + DataDesignerColumnType.IMAGE_GENERATION, } llm_generated_column_types.update( plugin_manager.get_plugin_column_types( @@ -124,6 +129,8 @@ def get_column_config_from_kwargs(name: str, column_type: DataDesignerColumnType return SeedDatasetColumnConfig(name=name, **kwargs) if column_type == DataDesignerColumnType.EMBEDDING: return EmbeddingColumnConfig(name=name, **kwargs) + if column_type == DataDesignerColumnType.IMAGE_GENERATION: + return ImageGenerationColumnConfig(name=name, **kwargs) if plugin := plugin_manager.get_column_generator_plugin_if_exists(column_type.value): return plugin.config_cls(name=name, **kwargs) raise InvalidColumnTypeError(f"πŸ›‘ {column_type} is not a valid column type.") # pragma: no cover @@ -139,6 +146,7 @@ def get_column_display_order() -> list[DataDesignerColumnType]: DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, DataDesignerColumnType.EMBEDDING, + DataDesignerColumnType.IMAGE_GENERATION, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EXPRESSION, ] diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 7b129556..6e535038 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -242,8 +242,24 @@ def generate_kwargs(self) -> dict[str, Union[float, int]]: return result +class ImageGenerationInferenceParameters(BaseInferenceParameters): + quality: str + size: str + output_format: Optional[ModalityDataType] = ModalityDataType.BASE64 + + @property + def generate_kwargs(self) -> dict[str, Union[float, int]]: + result = super().generate_kwargs + result["size"] = self.size + result["quality"] = self.quality + result["response_format"] = ( + self.output_format.value if self.output_format == ModalityDataType.URL else "b64_json" + ) + return result + + InferenceParametersT: TypeAlias = Union[ - InferenceParameters, CompletionInferenceParameters, EmbeddingInferenceParameters + InferenceParameters, CompletionInferenceParameters, EmbeddingInferenceParameters, ImageGenerationInferenceParameters ] @@ -272,6 +288,8 @@ def generation_type(self) -> GenerationType: return GenerationType.CHAT_COMPLETION elif isinstance(self.inference_parameters, EmbeddingInferenceParameters): return GenerationType.EMBEDDING + elif isinstance(self.inference_parameters, ImageGenerationInferenceParameters): + return GenerationType.IMAGE_GENERATION else: raise ValueError(f"Unsupported inference parameters type: {type(self.inference_parameters)}") diff --git a/src/data_designer/engine/column_generators/generators/base.py b/src/data_designer/engine/column_generators/generators/base.py index 580c96a6..a98038b3 100644 --- a/src/data_designer/engine/column_generators/generators/base.py +++ b/src/data_designer/engine/column_generators/generators/base.py @@ -11,9 +11,6 @@ from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP from data_designer.config.models import BaseInferenceParameters, ModelConfig from data_designer.config.utils.type_helpers import StrEnum -from data_designer.engine.column_generators.utils.prompt_renderer import ( - RecordBasedPromptRenderer, -) from data_designer.engine.configurable_task import ConfigurableTask, ConfigurableTaskMetadata, DataT, TaskConfigT from data_designer.engine.models.facade import ModelFacade @@ -84,17 +81,6 @@ def model_config(self) -> ModelConfig: def inference_parameters(self) -> BaseInferenceParameters: return self.model_config.inference_parameters - @functools.cached_property - def prompt_renderer(self) -> RecordBasedPromptRenderer: - return RecordBasedPromptRenderer( - response_recipe=self.response_recipe, - error_message_context={ - "column_name": self.config.name, - "column_type": self.config.column_type, - "model_alias": self.config.model_alias, - }, - ) - def log_pre_generation(self) -> None: emoji = COLUMN_TYPE_EMOJI_MAP[self.config.column_type] logger.info(f"{emoji} Preparing {self.config.column_type} column generation") diff --git a/src/data_designer/engine/column_generators/generators/image.py b/src/data_designer/engine/column_generators/generators/image.py new file mode 100644 index 00000000..f7cfba89 --- /dev/null +++ b/src/data_designer/engine/column_generators/generators/image.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +from litellm.types.utils import ImageResponse + +from data_designer.config.column_configs import ImageGenerationColumnConfig +from data_designer.config.models import ModalityDataType +from data_designer.engine.column_generators.generators.base import ( + ColumnGenerator, + GenerationStrategy, + GeneratorMetadata, + WithModelGeneration, +) +from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering +from data_designer.engine.processing.utils import deserialize_json_values +from data_designer.engine.resources.resource_provider import ResourceType + + +class ImageCellGenerator( + WithModelGeneration, WithJinja2UserTemplateRendering, ColumnGenerator[ImageGenerationColumnConfig] +): + @staticmethod + def metadata() -> GeneratorMetadata: + return GeneratorMetadata( + name="image_cell_generator", + description="Generate images using a specified model.", + generation_strategy=GenerationStrategy.CELL_BY_CELL, + required_resources=[ResourceType.MODEL_REGISTRY], + ) + + def generate(self, data: dict) -> dict: + deserialized_record = deserialize_json_values(data) + missing_columns = list(set(self.config.required_columns) - set(data.keys())) + if len(missing_columns) > 0: + error_msg = ( + f"There was an error preparing the Jinja2 expression template. " + f"The following columns {missing_columns} are missing!" + ) + raise ValueError(error_msg) + + self.prepare_jinja2_template_renderer(self.config.prompt, list(deserialized_record.keys())) + prompt = self.render_template(deserialized_record) + image_response: ImageResponse = self.model.generate_image(prompt=prompt) + if self.model_config.inference_parameters.output_format == ModalityDataType.URL: + data[self.config.name] = image_response.data[0].url + else: + data[self.config.name] = image_response.data[0].b64_json + return data diff --git a/src/data_designer/engine/column_generators/generators/llm_completion.py b/src/data_designer/engine/column_generators/generators/llm_completion.py index 5665ba85..8fae174b 100644 --- a/src/data_designer/engine/column_generators/generators/llm_completion.py +++ b/src/data_designer/engine/column_generators/generators/llm_completion.py @@ -19,6 +19,7 @@ ) from data_designer.engine.column_generators.utils.prompt_renderer import ( PromptType, + RecordBasedPromptRenderer, create_response_recipe, ) from data_designer.engine.models.recipes.base import ResponseRecipe @@ -45,6 +46,17 @@ def max_conversation_correction_steps(self) -> int: def max_conversation_restarts(self) -> int: return DEFAULT_MAX_CONVERSATION_RESTARTS + @functools.cached_property + def prompt_renderer(self) -> RecordBasedPromptRenderer: + return RecordBasedPromptRenderer( + response_recipe=self.response_recipe, + error_message_context={ + "column_name": self.config.name, + "column_type": self.config.column_type, + "model_alias": self.config.model_alias, + }, + ) + def generate(self, data: dict) -> dict: deserialized_record = deserialize_json_values(data) diff --git a/src/data_designer/engine/column_generators/registry.py b/src/data_designer/engine/column_generators/registry.py index 7171e561..3d000729 100644 --- a/src/data_designer/engine/column_generators/registry.py +++ b/src/data_designer/engine/column_generators/registry.py @@ -5,6 +5,7 @@ from data_designer.config.column_configs import ( EmbeddingColumnConfig, ExpressionColumnConfig, + ImageGenerationColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -15,6 +16,7 @@ from data_designer.engine.column_generators.generators.base import ColumnGenerator from data_designer.engine.column_generators.generators.embedding import EmbeddingCellGenerator from data_designer.engine.column_generators.generators.expression import ExpressionColumnGenerator +from data_designer.engine.column_generators.generators.image import ImageCellGenerator from data_designer.engine.column_generators.generators.llm_completion import ( LLMCodeCellGenerator, LLMJudgeCellGenerator, @@ -47,7 +49,7 @@ def create_default_column_generator_registry(with_plugins: bool = True) -> Colum registry.register(DataDesignerColumnType.SEED_DATASET, SeedDatasetColumnGenerator, SeedDatasetMultiColumnConfig) registry.register(DataDesignerColumnType.VALIDATION, ValidationColumnGenerator, ValidationColumnConfig) registry.register(DataDesignerColumnType.LLM_STRUCTURED, LLMStructuredCellGenerator, LLMStructuredColumnConfig) - + registry.register(DataDesignerColumnType.IMAGE_GENERATION, ImageCellGenerator, ImageGenerationColumnConfig) if with_plugins: for plugin in PluginRegistry().get_plugins(PluginType.COLUMN_GENERATOR): registry.register( diff --git a/src/data_designer/engine/models/facade.py b/src/data_designer/engine/models/facade.py index 6b98c0a7..33c79797 100644 --- a/src/data_designer/engine/models/facade.py +++ b/src/data_designer/engine/models/facade.py @@ -9,7 +9,7 @@ from typing import Any from litellm.types.router import DeploymentTypedDict, LiteLLM_Params -from litellm.types.utils import EmbeddingResponse, ModelResponse +from litellm.types.utils import EmbeddingResponse, ImageResponse, ImageUsage, ModelResponse from data_designer.config.models import GenerationType, ModelConfig, ModelProvider from data_designer.engine.model_provider import ModelProviderRegistry @@ -131,6 +131,27 @@ def generate_text_embeddings( if not skip_usage_tracking and response is not None: self._track_usage_from_embedding(response) + @catch_llm_exceptions + def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> ImageResponse: + logger.debug( + f"Generating image with model {self.model_name!r}...", + extra={"model": self.model_name, "prompt": prompt}, + ) + kwargs = self.consolidate_kwargs(**kwargs) + response = None + try: + response = self._router.image_generation(prompt=prompt, model=self.model_name, **kwargs) + logger.debug( + f"Received image from model {self.model_name!r}", + extra={"model": self.model_name, "response": response}, + ) + return response + except Exception as e: + raise e + finally: + if not skip_usage_tracking and response is not None: + self._track_usage_from_image(response) + @catch_llm_exceptions def generate( self, @@ -280,3 +301,16 @@ def _track_usage_from_embedding(self, response: EmbeddingResponse | None) -> Non ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) + + def _track_usage_from_image(self, response: ImageResponse | None) -> None: + if response is None: + self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) + return + if response.usage is not None and isinstance(response.usage, ImageUsage): + self._usage_stats.extend( + token_usage=TokenUsageStats( + prompt_tokens=response.usage.input_tokens, + completion_tokens=response.usage.output_tokens, + ), + request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), + ) diff --git a/src/data_designer/engine/models/registry.py b/src/data_designer/engine/models/registry.py index 4330ea18..91025684 100644 --- a/src/data_designer/engine/models/registry.py +++ b/src/data_designer/engine/models/registry.py @@ -97,6 +97,12 @@ def run_health_check(self, model_aliases: set[str]) -> None: skip_usage_tracking=True, purpose="running health checks", ) + elif model.model_generation_type == GenerationType.IMAGE_GENERATION: + model.generate_image( + prompt="Generate a simple pixel", + skip_usage_tracking=True, + purpose="running health checks", + ) else: raise ValueError(f"Unsupported generation type: {model.model_generation_type}") logger.info(" |-- βœ… Passed!") diff --git a/src/data_designer/essentials/__init__.py b/src/data_designer/essentials/__init__.py index cd1dd6ba..e8c6091c 100644 --- a/src/data_designer/essentials/__init__.py +++ b/src/data_designer/essentials/__init__.py @@ -8,6 +8,7 @@ from ..config.column_configs import ( EmbeddingColumnConfig, ExpressionColumnConfig, + ImageGenerationColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -28,6 +29,7 @@ GenerationType, ImageContext, ImageFormat, + ImageGenerationInferenceParameters, InferenceParameters, ManualDistribution, ManualDistributionParams, @@ -105,6 +107,8 @@ "InfoType", "ImageContext", "ImageFormat", + "ImageGenerationColumnConfig", + "ImageGenerationInferenceParameters", "InferenceParameters", "JudgeScoreProfilerConfig", "LLMCodeColumnConfig", From 2c5933f789b8e1dc47d40be56f3ff76741850d10 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 2 Dec 2025 17:49:32 -0700 Subject: [PATCH 17/64] update return type for generate_kwargs --- src/data_designer/config/models.py | 10 ++++------ tests/config/test_columns.py | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 6e535038..9d0ee6a6 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -142,7 +142,7 @@ class BaseInferenceParameters(ConfigBase, ABC): extra_body: Optional[dict[str, Any]] = None @property - def generate_kwargs(self) -> dict[str, Union[float, int]]: + def generate_kwargs(self) -> dict[str, Any]: result = {} if self.timeout is not None: result["timeout"] = self.timeout @@ -157,7 +157,7 @@ class CompletionInferenceParameters(BaseInferenceParameters): max_tokens: Optional[int] = Field(default=None, ge=1) @property - def generate_kwargs(self) -> dict[str, Union[float, int]]: + def generate_kwargs(self) -> dict[str, Any]: result = super().generate_kwargs if self.temperature is not None: result["temperature"] = ( @@ -248,13 +248,11 @@ class ImageGenerationInferenceParameters(BaseInferenceParameters): output_format: Optional[ModalityDataType] = ModalityDataType.BASE64 @property - def generate_kwargs(self) -> dict[str, Union[float, int]]: + def generate_kwargs(self) -> dict[str, Any]: result = super().generate_kwargs result["size"] = self.size result["quality"] = self.quality - result["response_format"] = ( - self.output_format.value if self.output_format == ModalityDataType.URL else "b64_json" - ) + result["response_format"] = "b64_json" if self.output_format == ModalityDataType.BASE64 else self.output_format return result diff --git a/tests/config/test_columns.py b/tests/config/test_columns.py index f7763b07..2e74695f 100644 --- a/tests/config/test_columns.py +++ b/tests/config/test_columns.py @@ -50,6 +50,7 @@ def test_data_designer_column_type_get_display_order(): DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, DataDesignerColumnType.EMBEDDING, + DataDesignerColumnType.IMAGE_GENERATION, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EXPRESSION, ] @@ -61,6 +62,7 @@ def test_data_designer_column_type_is_llm_generated(): assert column_type_is_llm_generated(DataDesignerColumnType.LLM_STRUCTURED) assert column_type_is_llm_generated(DataDesignerColumnType.LLM_JUDGE) assert column_type_is_llm_generated(DataDesignerColumnType.EMBEDDING) + assert column_type_is_llm_generated(DataDesignerColumnType.IMAGE_GENERATION) assert not column_type_is_llm_generated(DataDesignerColumnType.SAMPLER) assert not column_type_is_llm_generated(DataDesignerColumnType.VALIDATION) assert not column_type_is_llm_generated(DataDesignerColumnType.EXPRESSION) @@ -75,6 +77,7 @@ def test_data_designer_column_type_is_in_dag(): assert column_type_used_in_execution_dag(DataDesignerColumnType.LLM_TEXT) assert column_type_used_in_execution_dag(DataDesignerColumnType.VALIDATION) assert column_type_used_in_execution_dag(DataDesignerColumnType.EMBEDDING) + assert column_type_used_in_execution_dag(DataDesignerColumnType.IMAGE_GENERATION) assert not column_type_used_in_execution_dag(DataDesignerColumnType.SAMPLER) assert not column_type_used_in_execution_dag(DataDesignerColumnType.SEED_DATASET) From c6c29d4fdca3a292d06abdbfaee11c2f66269cfb Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 3 Dec 2025 10:25:17 -0700 Subject: [PATCH 18/64] make generation_type a field of ModelConfig as opposed to a prop resolved based on the type of InferenceParameters --- src/data_designer/config/models.py | 25 +++++++++------- tests/config/test_models.py | 47 +++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index 9d0ee6a6..b10deca0 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -271,6 +271,7 @@ class ModelConfig(ConfigBase): alias: str model: str inference_parameters: InferenceParametersT = Field(default_factory=CompletionInferenceParameters) + generation_type: GenerationType = Field(default=GenerationType.CHAT_COMPLETION) provider: Optional[str] = None @model_validator(mode="after") @@ -280,16 +281,20 @@ def _normalize_deprecated_inference_parameters(self) -> Self: self.inference_parameters = CompletionInferenceParameters(**self.inference_parameters.model_dump()) return self - @property - def generation_type(self) -> GenerationType: - if isinstance(self.inference_parameters, CompletionInferenceParameters): - return GenerationType.CHAT_COMPLETION - elif isinstance(self.inference_parameters, EmbeddingInferenceParameters): - return GenerationType.EMBEDDING - elif isinstance(self.inference_parameters, ImageGenerationInferenceParameters): - return GenerationType.IMAGE_GENERATION - else: - raise ValueError(f"Unsupported inference parameters type: {type(self.inference_parameters)}") + @model_validator(mode="after") + def _validate_generation_type(self) -> Self: + generation_type_instance_map = { + GenerationType.CHAT_COMPLETION: CompletionInferenceParameters, + GenerationType.EMBEDDING: EmbeddingInferenceParameters, + GenerationType.IMAGE_GENERATION: ImageGenerationInferenceParameters, + } + if self.generation_type not in generation_type_instance_map: + raise ValueError(f"Invalid generation type: {self.generation_type}") + if not isinstance(self.inference_parameters, generation_type_instance_map[self.generation_type]): + raise ValueError( + f"Inference parameters must be an instance of {generation_type_instance_map[self.generation_type].__name__!r} when generation_type is {self.generation_type!r}" + ) + return self class ModelProvider(ConfigBase): diff --git a/tests/config/test_models.py b/tests/config/test_models.py index f1f65401..40f6afe9 100644 --- a/tests/config/test_models.py +++ b/tests/config/test_models.py @@ -12,8 +12,11 @@ from data_designer.config.errors import InvalidConfigError from data_designer.config.models import ( CompletionInferenceParameters, + EmbeddingInferenceParameters, + GenerationType, ImageContext, ImageFormat, + ImageGenerationInferenceParameters, ManualDistribution, ManualDistributionParams, ModalityDataType, @@ -254,6 +257,48 @@ def test_load_model_configs(): load_model_configs(tmp_file.name) -def test_model_config_default_construction(): +def test_model_config_construction(): + # test default construction model_config = ModelConfig(alias="test", model="test") assert model_config.inference_parameters == CompletionInferenceParameters() + assert model_config.generation_type == GenerationType.CHAT_COMPLETION + + # test construction with completion inference parameters + completion_params = CompletionInferenceParameters(temperature=0.5, top_p=0.5, max_tokens=100) + model_config = ModelConfig(alias="test", model="test", inference_parameters=completion_params) + assert model_config.inference_parameters == completion_params + assert model_config.generation_type == GenerationType.CHAT_COMPLETION + + # test construction with embedding inference parameters + embedding_params = EmbeddingInferenceParameters(dimensions=100) + model_config = ModelConfig( + alias="test", model="test", generation_type=GenerationType.EMBEDDING, inference_parameters=embedding_params + ) + assert model_config.inference_parameters == embedding_params + assert model_config.generation_type == GenerationType.EMBEDDING + + # test construction with image generation inference parameters + image_generation_params = ImageGenerationInferenceParameters(size="1024x1024", quality="standard") + model_config = ModelConfig( + alias="test", + model="test", + generation_type=GenerationType.IMAGE_GENERATION, + inference_parameters=image_generation_params, + ) + assert model_config.inference_parameters == image_generation_params + assert model_config.generation_type == GenerationType.IMAGE_GENERATION + + +def test_model_config_invalid_generation_type(): + with pytest.raises(ValidationError, match="Input should be"): + ModelConfig(alias="test", model="test", generation_type="invalid_generation_type") + with pytest.raises( + ValidationError, + match="Inference parameters must be an instance of 'EmbeddingInferenceParameters' when generation_type is 'embedding'", + ): + ModelConfig( + alias="test", + model="test", + generation_type=GenerationType.EMBEDDING, + inference_parameters=CompletionInferenceParameters(), + ) From 06a724b4090df4f150d348dd7c5e9b67b562daa4 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 3 Dec 2025 11:07:57 -0700 Subject: [PATCH 19/64] remove regex based chunking from embedding generator --- src/data_designer/config/column_configs.py | 7 +--- src/data_designer/config/models.py | 2 +- .../column_generators/generators/embedding.py | 10 ++--- src/data_designer/engine/processing/utils.py | 38 +++++++++++++++++++ tests/engine/processing/test_utils.py | 17 +++++++++ 5 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/data_designer/config/column_configs.py b/src/data_designer/config/column_configs.py index eb93f9f0..a3bef936 100644 --- a/src/data_designer/config/column_configs.py +++ b/src/data_designer/config/column_configs.py @@ -386,17 +386,14 @@ class EmbeddingColumnConfig(SingleColumnConfig): Attributes: column_type: Discriminator field, always "embedding" for this configuration type. - target_column: The column to generate embeddings for. + target_column: The column to generate embeddings for. The column could be a single text string or a list of text strings in stringified JSON format. + If it is a list of text strings in stringified JSON format, the embeddings will be generated for each text string. model_alias: The model to use for embedding generation. - chunk_pattern: Optional regex pattern to split the text in the target column into chunks. For example, if chunk_pattern - is r'\n+', the text will be split into chunks using one or more newlines as separators and embeddings generated for each chunk. - If not provided, the entire text will be embedded as a single chunk. """ column_type: Literal["embedding"] = "embedding" target_column: str model_alias: str - chunk_pattern: Optional[str] = None @property def required_columns(self) -> list[str]: diff --git a/src/data_designer/config/models.py b/src/data_designer/config/models.py index b10deca0..4b3ae12c 100644 --- a/src/data_designer/config/models.py +++ b/src/data_designer/config/models.py @@ -271,7 +271,7 @@ class ModelConfig(ConfigBase): alias: str model: str inference_parameters: InferenceParametersT = Field(default_factory=CompletionInferenceParameters) - generation_type: GenerationType = Field(default=GenerationType.CHAT_COMPLETION) + generation_type: Optional[GenerationType] = Field(default=GenerationType.CHAT_COMPLETION) provider: Optional[str] = None @model_validator(mode="after") diff --git a/src/data_designer/engine/column_generators/generators/embedding.py b/src/data_designer/engine/column_generators/generators/embedding.py index 48fc309f..ed738e8f 100644 --- a/src/data_designer/engine/column_generators/generators/embedding.py +++ b/src/data_designer/engine/column_generators/generators/embedding.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import re from data_designer.config.column_configs import EmbeddingColumnConfig from data_designer.engine.column_generators.generators.base import ( @@ -10,7 +9,7 @@ GeneratorMetadata, WithModelGeneration, ) -from data_designer.engine.processing.utils import deserialize_json_values +from data_designer.engine.processing.utils import deserialize_json_values, parse_list_string from data_designer.engine.resources.resource_provider import ResourceType @@ -26,10 +25,9 @@ def metadata() -> GeneratorMetadata: def generate(self, data: dict) -> dict: deserialized_record = deserialize_json_values(data) - input_text = deserialized_record[self.config.target_column] - input_chunks = re.split(self.config.chunk_pattern, input_text) if self.config.chunk_pattern else [input_text] - input_chunks = [chunk.strip() for chunk in input_chunks if chunk.strip()] - embeddings = self.model.generate_text_embeddings(input_texts=input_chunks) + input_texts = parse_list_string(deserialized_record[self.config.target_column]) + embeddings = self.model.generate_text_embeddings(input_texts=input_texts) + data[self.config.name] = { "embeddings": embeddings, "num_embeddings": len(embeddings), diff --git a/src/data_designer/engine/processing/utils.py b/src/data_designer/engine/processing/utils.py index 3579b3bd..5d42c40e 100644 --- a/src/data_designer/engine/processing/utils.py +++ b/src/data_designer/engine/processing/utils.py @@ -1,8 +1,10 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import ast import json import logging +import re from typing import Any, TypeVar, Union, overload import pandas as pd @@ -100,6 +102,42 @@ def deserialize_json_values(data): return data +def parse_list_string(text: str) -> list[str]: + """Parse a list from a string, handling JSON arrays, Python lists, and trailing commas.""" + text = text.strip() + + # Try JSON first + try: + list_obj = json.loads(text) + if isinstance(list_obj, list): + return _clean_whitespace(list_obj) + except json.JSONDecodeError: + pass + + # Remove trailing commas before closing brackets (common in JSON-like strings) + text_cleaned = re.sub(r",\s*]", "]", text) + text_cleaned = re.sub(r",\s*}", "}", text_cleaned) + + # Try JSON again with cleaned text + try: + return _clean_whitespace(json.loads(text_cleaned)) + except json.JSONDecodeError: + pass + + # Try Python literal eval (handles single quotes) + try: + return _clean_whitespace(ast.literal_eval(text_cleaned)) + except (ValueError, SyntaxError): + pass + + # If all else fails, return the original text + return [text.strip()] + + +def _clean_whitespace(texts: list[str]) -> list[str]: + return [text.strip() for text in texts] + + def _verify_columns_are_unique(datasets: list[pd.DataFrame]) -> None: joined_columns = set() for df in datasets: diff --git a/tests/engine/processing/test_utils.py b/tests/engine/processing/test_utils.py index a41e0ec2..dec0fe6a 100644 --- a/tests/engine/processing/test_utils.py +++ b/tests/engine/processing/test_utils.py @@ -9,6 +9,7 @@ from data_designer.engine.processing.utils import ( concat_datasets, deserialize_json_values, + parse_list_string, ) @@ -116,3 +117,19 @@ def test_concat_datasets_logging(mock_logger, stub_sample_dataframes): def test_deserialize_json_values_scenarios(test_case, input_data, expected_result): result = deserialize_json_values(input_data) assert result == expected_result + + +@pytest.mark.parametrize( + "input_string,expected_result", + [ + ('["a", "b", "c"]', ["a", "b", "c"]), # valid stringified json array + ('[" a ", " b", "c "]', ["a", "b", "c"]), # valid stringified json array with whitespace + ('["a", "b", "c",]', ["a", "b", "c"]), # valid stringified json array with trailing comma + ("['a', 'b', 'c']", ["a", "b", "c"]), # valid python-style list with single quotes + ("['a', 'b', 'c', ]", ["a", "b", "c"]), # valid python-style list with trailing comma + ("simple string ", ["simple string"]), # simple string with whitespace + ], +) +def test_parse_list_string_scenarios(input_string, expected_result): + result = parse_list_string(input_string) + assert result == expected_result From f291033e6e1e0debdf31f10f732931c724370afe Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 4 Feb 2026 09:59:52 -0700 Subject: [PATCH 20/64] save progress --- .../src/data_designer/config/__init__.py | 6 + .../data_designer/config/column_configs.py | 11 +- .../src/data_designer/config/models.py | 65 ++- .../config/utils/visualization.py | 128 ++++++ .../column_generators/generators/image.py | 20 +- .../src/data_designer/engine/models/facade.py | 116 ++++- .../integrations/huggingface/client.py | 419 ++++++++++++++++++ pyproject.toml | 3 + uv.lock | 412 ++++++++++++++++- 9 files changed, 1148 insertions(+), 32 deletions(-) create mode 100644 packages/data-designer/src/data_designer/integrations/huggingface/client.py diff --git a/packages/data-designer-config/src/data_designer/config/__init__.py b/packages/data-designer-config/src/data_designer/config/__init__.py index 0ebf06be..46122609 100644 --- a/packages/data-designer-config/src/data_designer/config/__init__.py +++ b/packages/data-designer-config/src/data_designer/config/__init__.py @@ -15,6 +15,7 @@ from data_designer.config.column_configs import ( # noqa: F401 EmbeddingColumnConfig, ExpressionColumnConfig, + ImageGenerationColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -34,7 +35,9 @@ ToolConfig, ) from data_designer.config.models import ( # noqa: F401 + ChatCompletionImageInferenceParams, ChatCompletionInferenceParams, + DiffusionImageInferenceParams, EmbeddingInferenceParams, GenerationType, ImageContext, @@ -117,6 +120,7 @@ # column_configs "EmbeddingColumnConfig": (_MOD_COLUMN_CONFIGS, "EmbeddingColumnConfig"), "ExpressionColumnConfig": (_MOD_COLUMN_CONFIGS, "ExpressionColumnConfig"), + "ImageGenerationColumnConfig": (_MOD_COLUMN_CONFIGS, "ImageGenerationColumnConfig"), "LLMCodeColumnConfig": (_MOD_COLUMN_CONFIGS, "LLMCodeColumnConfig"), "LLMJudgeColumnConfig": (_MOD_COLUMN_CONFIGS, "LLMJudgeColumnConfig"), "LLMStructuredColumnConfig": (_MOD_COLUMN_CONFIGS, "LLMStructuredColumnConfig"), @@ -138,7 +142,9 @@ "MCPProvider": (_MOD_MCP, "MCPProvider"), "ToolConfig": (_MOD_MCP, "ToolConfig"), # models + "ChatCompletionImageInferenceParams": (_MOD_MODELS, "ChatCompletionImageInferenceParams"), "ChatCompletionInferenceParams": (_MOD_MODELS, "ChatCompletionInferenceParams"), + "DiffusionImageInferenceParams": (_MOD_MODELS, "DiffusionImageInferenceParams"), "EmbeddingInferenceParams": (_MOD_MODELS, "EmbeddingInferenceParams"), "GenerationType": (_MOD_MODELS, "GenerationType"), "ImageContext": (_MOD_MODELS, "ImageContext"), diff --git a/packages/data-designer-config/src/data_designer/config/column_configs.py b/packages/data-designer-config/src/data_designer/config/column_configs.py index ee5efa80..9e1f5737 100644 --- a/packages/data-designer-config/src/data_designer/config/column_configs.py +++ b/packages/data-designer-config/src/data_designer/config/column_configs.py @@ -480,7 +480,14 @@ def side_effect_columns(self) -> list[str]: class ImageGenerationColumnConfig(SingleColumnConfig): """Configuration for image generation columns. - Image columns generate images using a specified model. + Image columns generate images using either autoregressive or diffusion models. + The API used is automatically determined by the model's inference parameters: + + - **Autoregressive models** (ChatCompletionImageInferenceParams): + GPT-5, gpt-image-*, Gemini image generation models via chat completions API + + - **Diffusion models** (DiffusionImageInferenceParams): + DALL-E, Imagen, Stable Diffusion via image_generation API Attributes: column_type: Discriminator field, always "image-generation" for this configuration type. @@ -505,7 +512,7 @@ def required_columns(self) -> list[str]: Returns: List of unique column names referenced in Jinja2 templates. """ - return list(extract_keywords_from_jinja2_template(self.expr)) + return list(extract_keywords_from_jinja2_template(self.prompt)) @model_validator(mode="after") def assert_prompt_valid_jinja(self) -> Self: diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 5e9b3518..203ddbdb 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -242,7 +242,8 @@ def sample(self) -> float: class GenerationType(str, Enum): CHAT_COMPLETION = "chat-completion" EMBEDDING = "embedding" - IMAGE_GENERATION = "image-generation" + CHAT_COMPLETION_IMAGE = "chat-completion-image" + DIFFUSION_IMAGE = "diffusion-image" class BaseInferenceParams(ConfigBase, ABC): @@ -415,23 +416,64 @@ def generate_kwargs(self) -> dict[str, float | int]: return result -class ImageGenerationInferenceParams(BaseInferenceParams): - generation_type: Literal[GenerationType.IMAGE_GENERATION] = GenerationType.IMAGE_GENERATION +class ChatCompletionImageInferenceParams(BaseInferenceParams): + """Configuration for image generation using autoregressive models via chat completions API. + + Uses the standard chat completions API for autoregressive multimodal models + that can generate images (GPT-5, gpt-image-*, Gemini image generation, etc.). + + Attributes: + generation_type: Type of generation, always "chat-completion-image" for this class. + quality: Optional quality setting for image generation (e.g., "standard", "hd"). + size: Optional size specification for generated images (e.g., "1024x1024", "1792x1024"). + """ + + generation_type: Literal[GenerationType.CHAT_COMPLETION_IMAGE] = GenerationType.CHAT_COMPLETION_IMAGE + quality: str | None = None + size: str | None = None + + @property + def generate_kwargs(self) -> dict[str, Any]: + result = super().generate_kwargs + if self.quality is not None: + result["quality"] = self.quality + if self.size is not None: + result["size"] = self.size + return result + + +class DiffusionImageInferenceParams(BaseInferenceParams): + """Configuration for image generation using diffusion models via image_generation API. + + Uses the legacy image_generation API for diffusion models like DALL-E, Imagen, + and Stable Diffusion. + + Attributes: + generation_type: Type of generation, always "diffusion-image" for this class. + quality: Quality setting for image generation (e.g., "standard", "hd"). + size: Size specification for generated images (e.g., "1024x1024", "1792x1024"). + output_format: Format of the output ("url" or "base64"). Default: "base64". + """ + + generation_type: Literal[GenerationType.DIFFUSION_IMAGE] = GenerationType.DIFFUSION_IMAGE quality: str size: str - output_format: ModalityDataType | None = ModalityDataType.BASE64 + output_format: ModalityDataType = ModalityDataType.BASE64 @property def generate_kwargs(self) -> dict[str, Any]: result = super().generate_kwargs result["size"] = self.size result["quality"] = self.quality - result["response_format"] = "b64_json" if self.output_format == ModalityDataType.BASE64 else self.output_format + result["response_format"] = "b64_json" if self.output_format == ModalityDataType.BASE64 else "url" return result InferenceParamsT: TypeAlias = Annotated[ - ChatCompletionInferenceParams | EmbeddingInferenceParams | ImageGenerationInferenceParams, + ChatCompletionInferenceParams + | EmbeddingInferenceParams + | ChatCompletionImageInferenceParams + | DiffusionImageInferenceParams, Field(discriminator="generation_type"), ] @@ -464,8 +506,15 @@ def generation_type(self) -> GenerationType: def _convert_inference_parameters(cls, value: Any) -> Any: """Convert raw dict to appropriate inference parameters type based on field presence.""" if isinstance(value, dict): - # Infer type from presence of embedding-specific fields - if "encoding_format" in value or "dimensions" in value: + # Check for explicit generation_type first + gen_type = value.get("generation_type") + + # Infer type from generation_type or field presence + if gen_type == "chat-completion-image": + return ChatCompletionImageInferenceParams(**value) + elif gen_type == "diffusion-image": + return DiffusionImageInferenceParams(**value) + elif gen_type == "embedding" or "encoding_format" in value or "dimensions" in value: return EmbeddingInferenceParams(**value) else: return ChatCompletionInferenceParams(**value) diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 7e5c79a9..38189068 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -3,6 +3,8 @@ from __future__ import annotations +import base64 +import io import json import os from collections import OrderedDict @@ -39,6 +41,93 @@ console = Console() +def _is_base64_image(value: str) -> bool: + """Check if a string is base64-encoded image data.""" + if not isinstance(value, str): + return False + # Check if it starts with data URI scheme + if value.startswith("data:image/"): + return True + # Check if it looks like base64 (at least 100 chars, contains only base64 chars) + if len(value) > 100 and all( + c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in value[:100] + ): + try: + # Try to decode a small portion to verify it's valid base64 + base64.b64decode(value[:100]) + return True + except Exception: + return False + return False + + +def _is_image_url(value: str) -> bool: + """Check if a string is an image URL.""" + if not isinstance(value, str): + return False + return value.startswith(("http://", "https://")) and any( + ext in value.lower() for ext in [".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"] + ) + + +def _display_image_if_in_notebook(image_data: str, col_name: str, max_width: int = 512) -> bool: + """Display image with caption in Jupyter notebook if available. + + Args: + image_data: Base64-encoded image data or data URI. + col_name: Name of the column (used for caption). + max_width: Maximum width for the displayed image in pixels. + + Returns: + True if image was displayed, False otherwise. + """ + try: + # Check if we're in a Jupyter environment + from IPython.display import HTML, display + from PIL import Image as PILImage + + get_ipython() # This will raise NameError if not in IPython/Jupyter + + # Decode the image + if image_data.startswith("data:image/"): + # Extract base64 from data URI + base64_data = image_data.split(",", 1)[1] if "," in image_data else image_data + else: + base64_data = image_data + + image_bytes = base64.b64decode(base64_data) + + # Open image with PIL and resize if needed + img = PILImage.open(io.BytesIO(image_bytes)) + + # Resize if image is too large + if img.width > max_width: + ratio = max_width / img.width + new_height = int(img.height * ratio) + img = img.resize((max_width, new_height), PILImage.Resampling.LANCZOS) + + # Convert back to base64 for HTML display + buffered = io.BytesIO() + img.save(buffered, format=img.format or "PNG") + img_base64 = base64.b64encode(buffered.getvalue()).decode() + + # Create HTML with caption and image in left-aligned container + html = f""" +
+
πŸ–ΌοΈ {col_name}
+ +
+ """ + display(HTML(html)) + return True + except (ImportError, NameError): + # Not in a notebook environment + return False + except Exception as e: + console.print(f"[yellow]⚠️ Could not display image for column '{col_name}': {e}[/yellow]") + return False + + def get_nvidia_api_key() -> str | None: return os.getenv(NVIDIA_API_KEY_ENV_VAR_NAME) @@ -217,6 +306,40 @@ def display_sample_record( table.add_row(col.name, convert_to_row_element(record[col.name])) render_list.append(pad_console_element(table)) + # Collect image generation columns (will be displayed at the end) + image_columns = config_builder.get_columns_of_type(DataDesignerColumnType.IMAGE_GENERATION) + images_to_display_later = [] + if len(image_columns) > 0: + # Check if we're in a notebook to decide display style + try: + get_ipython() + in_notebook = True + except NameError: + in_notebook = False + + # Create table for image columns + table = Table(title="Images", **table_kws) + table.add_column("Name") + table.add_column("Preview") + + for col in image_columns: + if col.drop: + continue + image_data = record[col.name] + if _is_base64_image(image_data): + preview = f"" + if in_notebook: + images_to_display_later.append((col.name, image_data)) + elif _is_image_url(image_data): + preview = f"" + if in_notebook: + images_to_display_later.append((col.name, image_data)) + else: + preview = str(image_data)[:100] + "..." if len(str(image_data)) > 100 else str(image_data) + table.add_row(col.name, preview) + + render_list.append(pad_console_element(table)) + for col in config_builder.get_columns_of_type(DataDesignerColumnType.LLM_CODE): panel = Panel( Syntax( @@ -281,6 +404,11 @@ def display_sample_record( console.print(Group(*render_list), markup=False) + # Display images at the bottom with captions (only in notebook) + if len(images_to_display_later) > 0: + for col_name, image_data in images_to_display_later: + _display_image_if_in_notebook(image_data, col_name) + def get_truncated_list_as_string(long_list: list[Any], max_items: int = 2) -> str: if max_items <= 0: diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 3279de85..f59573c6 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -2,16 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 -from litellm.types.utils import ImageResponse - from data_designer.config.column_configs import ImageGenerationColumnConfig -from data_designer.config.models import ModalityDataType from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering from data_designer.engine.processing.utils import deserialize_json_values class ImageCellGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorWithModel[ImageGenerationColumnConfig]): + """Generator for image columns using either autoregressive or diffusion models. + + Automatically detects the appropriate API based on the model's inference parameters: + - ChatCompletionImageGenerationInferenceParams β†’ Responses API (GPT-5, gpt-image-*, Gemini) + - DiffusionImageGenerationInferenceParams β†’ image_generation API (DALL-E, Imagen, Stable Diffusion) + """ + @staticmethod def get_generation_strategy() -> GenerationStrategy: return GenerationStrategy.CELL_BY_CELL @@ -28,9 +32,9 @@ def generate(self, data: dict) -> dict: self.prepare_jinja2_template_renderer(self.config.prompt, list(deserialized_record.keys())) prompt = self.render_template(deserialized_record) - image_response: ImageResponse = self.model.generate_image(prompt=prompt) - if self.model_config.inference_parameters.output_format == ModalityDataType.URL: - data[self.config.name] = image_response.data[0].url - else: - data[self.config.name] = image_response.data[0].b64_json + + # Generate image (automatically routes to appropriate API based on inference params) + # Returns base64-encoded image data or URL depending on configuration + image_data = self.model.generate_image(prompt=prompt) + data[self.config.name] = image_data return data diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index dd5a1a71..ed7b7715 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -156,27 +156,113 @@ def generate_text_embeddings( self._track_usage_from_embedding(response) @catch_llm_exceptions - def generate_image( - self, prompt: str, skip_usage_tracking: bool = False, **kwargs - ) -> litellm.types.utils.ImageResponse: + def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: + """Generate image using either autoregressive or diffusion models. + + Automatically detects the appropriate API based on the model's generation_type: + - CHAT_COMPLETION_IMAGE β†’ chat/completions API (GPT-5, gpt-image-*, Gemini) + - DIFFUSION_IMAGE β†’ image_generation API (DALL-E, Imagen, Stable Diffusion) + + Args: + prompt: The prompt for image generation. + skip_usage_tracking: Whether to skip usage tracking. Default: False. + **kwargs: Additional arguments to pass to the model. + + Returns: + Base64-encoded image data (without data URI prefix for autoregressive models). + For diffusion models: URL string or base64 data depending on output_format. + """ + from data_designer.config.models import GenerationType + logger.debug( f"Generating image with model {self.model_name!r}...", extra={"model": self.model_name, "prompt": prompt}, ) + + # Determine which API to use based on generation_type + gen_type = self.model_generation_type + + if gen_type == GenerationType.DIFFUSION_IMAGE: + return self._generate_image_diffusion(prompt, skip_usage_tracking, **kwargs) + else: + # Default to chat-completion (CHAT_COMPLETION_IMAGE or backward compatibility) + return self._generate_image_chat_completion(prompt, skip_usage_tracking, **kwargs) + + def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: + """Generate image using autoregressive model via chat completions API.""" + kwargs = self.consolidate_kwargs(**kwargs) + + # Build messages for image generation + messages = [ChatMessage.as_user(content=prompt)] + + response = None + try: + response = self.completion( + messages=messages, + skip_usage_tracking=skip_usage_tracking, + **kwargs, + ) + logger.debug( + f"Received image from autoregressive model {self.model_name!r}", + extra={"model": self.model_name, "response": response}, + ) + + # Check if response has images attribute (some models return images here) + if hasattr(response.choices[0].message, "images") and response.choices[0].message.images: + # Extract base64 from first image + first_image = response.choices[0].message.images[0] + if isinstance(first_image, dict) and "image_url" in first_image: + image_url = first_image["image_url"] + if isinstance(image_url, dict) and "url" in image_url: + # Extract base64 data from data URL + url = image_url["url"] + if url.startswith("data:image/"): + # Remove data URI prefix to get pure base64 + return url.split(",", 1)[1] if "," in url else url + return url + elif isinstance(image_url, str): + if image_url.startswith("data:image/"): + return image_url.split(",", 1)[1] if "," in image_url else image_url + return image_url + return str(first_image) + + # If no images attribute, check content for base64 or image data + content = response.choices[0].message.content or "" + if content.startswith("data:image/"): + # Remove data URI prefix + return content.split(",", 1)[1] if "," in content else content + + # Return content as-is (might be base64 or other format) + return content + + except Exception as e: + raise e + + def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: + """Generate image using diffusion model via image_generation API.""" + from data_designer.config.models import ModalityDataType + kwargs = self.consolidate_kwargs(**kwargs) response = None try: response = self._router.image_generation(prompt=prompt, model=self.model_name, **kwargs) logger.debug( - f"Received image from model {self.model_name!r}", + f"Received image from diffusion model {self.model_name!r}", extra={"model": self.model_name, "response": response}, ) - return response + + # Return URL or base64 based on output_format + output_format = getattr(self._model_config.inference_parameters, "output_format", ModalityDataType.BASE64) + if output_format == ModalityDataType.URL: + return response.data[0].url + else: + return response.data[0].b64_json + except Exception as e: raise e finally: if not skip_usage_tracking and response is not None: - self._track_usage_from_image(response) + self._track_usage_from_image_diffusion(response) @catch_llm_exceptions def generate( @@ -365,28 +451,32 @@ def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingRes request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) - def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingResponse | None) -> None: + def _track_usage_from_response(self, response: litellm.types.utils.ResponseResponse | None) -> None: + """Track usage from Responses API response.""" if response is None: self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) return - if response.usage is not None and response.usage.prompt_tokens is not None: + if response.usage is not None: + input_tokens = getattr(response.usage, "input_tokens", 0) or 0 + output_tokens = getattr(response.usage, "output_tokens", 0) or 0 self._usage_stats.extend( token_usage=TokenUsageStats( - prompt_tokens=response.usage.prompt_tokens, - completion_tokens=0, + input_tokens=input_tokens, + output_tokens=output_tokens, ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) - def _track_usage_from_image(self, response: litellm.types.utils.ImageResponse | None) -> None: + def _track_usage_from_image_diffusion(self, response: litellm.types.utils.ImageResponse | None) -> None: + """Track usage from image_generation API response.""" if response is None: self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) return if response.usage is not None and isinstance(response.usage, litellm.types.utils.ImageUsage): self._usage_stats.extend( token_usage=TokenUsageStats( - prompt_tokens=response.usage.input_tokens, - completion_tokens=response.usage.output_tokens, + input_tokens=response.usage.input_tokens, + output_tokens=response.usage.output_tokens, ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) diff --git a/packages/data-designer/src/data_designer/integrations/huggingface/client.py b/packages/data-designer/src/data_designer/integrations/huggingface/client.py new file mode 100644 index 00000000..fe789785 --- /dev/null +++ b/packages/data-designer/src/data_designer/integrations/huggingface/client.py @@ -0,0 +1,419 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import json +import logging +import tempfile +from pathlib import Path + +from huggingface_hub import HfApi +from huggingface_hub.errors import HFValidationError +from huggingface_hub.utils import HfHubHTTPError, validate_repo_id + +from data_designer.config.utils.constants import HUGGINGFACE_HUB_DATASET_URL_PREFIX +from data_designer.engine.dataset_builders.artifact_storage import ( + FINAL_DATASET_FOLDER_NAME, + METADATA_FILENAME, + PROCESSORS_OUTPUTS_FOLDER_NAME, + SDG_CONFIG_FILENAME, +) +from data_designer.errors import DataDesignerError +from data_designer.integrations.huggingface.dataset_card import DataDesignerDatasetCard +from data_designer.logging import RandomEmoji + +logger = logging.getLogger(__name__) + + +class HuggingFaceHubClientUploadError(DataDesignerError): + """Error during Hugging Face dataset upload.""" + + +class HuggingFaceHubClient: + """Client for interacting with Hugging Face Hub to upload datasets.""" + + def __init__(self, token: str | None = None): + """Initialize Hugging Face Hub client. + + Args: + token: Hugging Face API token. If None, the token is automatically + resolved from HF_TOKEN environment variable or cached credentials + from `huggingface-cli login`. + """ + self._token = token + self._api = HfApi(token=token) + + @property + def has_token(self) -> bool: + """Check if a token was explicitly provided. + + Returns: + True if a token was provided during initialization, False otherwise. + """ + return self._token is not None + + def upload_dataset( + self, + repo_id: str, + base_dataset_path: Path, + description: str, + *, + private: bool = False, + ) -> str: + """Upload dataset to Hugging Face Hub. + + Uploads the complete dataset including: + - Main parquet batch files from parquet-files/ β†’ data/ + - Processor output batch files from processors-files/{name}/ β†’ {name}/ + - Existing sdg.json and metadata.json files + - Auto-generated README.md (dataset card) + + Args: + repo_id: Hugging Face dataset repo ID (e.g., "username/dataset-name") + base_dataset_path: Path to base_dataset_path (contains parquet-files/, sdg.json, etc.) + description: Custom description text for dataset card + private: Whether to create private repo + + Returns: + URL to the uploaded dataset + + Raises: + HuggingFaceUploadError: If validation fails or upload encounters errors + """ + logger.info(f"πŸ€— Uploading dataset to Hugging Face Hub: {repo_id}") + + self._validate_repo_id(repo_id=repo_id) + self._validate_dataset_path(base_dataset_path=base_dataset_path) + self._create_or_get_repo(repo_id=repo_id, private=private) + + logger.info(f" |-- {RandomEmoji.data()} Uploading dataset card...") + try: + self._upload_dataset_card( + repo_id=repo_id, + metadata_path=base_dataset_path / METADATA_FILENAME, + sdg_path=base_dataset_path / SDG_CONFIG_FILENAME, + description=description, + ) + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to upload dataset card: {e}") from e + + self._upload_main_dataset_files(repo_id=repo_id, parquet_folder=base_dataset_path / FINAL_DATASET_FOLDER_NAME) + self._upload_processor_files( + repo_id=repo_id, processors_folder=base_dataset_path / PROCESSORS_OUTPUTS_FOLDER_NAME + ) + self._upload_config_files( + repo_id=repo_id, + metadata_path=base_dataset_path / METADATA_FILENAME, + sdg_path=base_dataset_path / SDG_CONFIG_FILENAME, + ) + + url = f"{HUGGINGFACE_HUB_DATASET_URL_PREFIX}{repo_id}" + logger.info(f" |-- {RandomEmoji.success()} Dataset uploaded successfully! View at: {url}") + return url + + def _create_or_get_repo(self, repo_id: str, *, private: bool = False) -> None: + """Create or get existing repository on Hugging Face Hub. + + Args: + repo_id: Hugging Face dataset repo ID + private: Whether to create private repo + + Raises: + HuggingFaceUploadError: If repository creation fails + """ + logger.info(f" |-- {RandomEmoji.working()} Checking if repository exists...") + try: + repo_exists = self._api.repo_exists(repo_id=repo_id, repo_type="dataset") + if repo_exists: + logger.info(f" |-- {RandomEmoji.success()} Repository already exists, updating content...") + else: + logger.info(f" |-- {RandomEmoji.working()} Creating new repository...") + + self._api.create_repo( + repo_id=repo_id, + repo_type="dataset", + exist_ok=True, + private=private, + ) + except HfHubHTTPError as e: + if e.response.status_code == 401: + raise HuggingFaceHubClientUploadError( + "Authentication failed. Please provide a valid Hugging Face token. " + "You can set it via the token parameter or HF_TOKEN environment variable, " + "or run 'huggingface-cli login'." + ) from e + elif e.response.status_code == 403: + raise HuggingFaceHubClientUploadError( + f"Permission denied. You don't have access to create repository '{repo_id}'. " + "Check your token permissions or repository ownership." + ) from e + else: + raise HuggingFaceHubClientUploadError(f"Failed to create repository '{repo_id}': {e}") from e + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Unexpected error creating repository '{repo_id}': {e}") from e + + def _upload_main_dataset_files(self, repo_id: str, parquet_folder: Path) -> None: + """Upload main parquet dataset files. + + Args: + repo_id: Hugging Face dataset repo ID + parquet_folder: Path to folder containing parquet files + + Raises: + HuggingFaceUploadError: If upload fails + """ + logger.info(f" |-- {RandomEmoji.loading()} Uploading main dataset files...") + try: + self._api.upload_folder( + repo_id=repo_id, + folder_path=str(parquet_folder), + path_in_repo="data", + repo_type="dataset", + commit_message="Upload main dataset files", + ) + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to upload parquet files: {e}") from e + + def _upload_processor_files(self, repo_id: str, processors_folder: Path) -> None: + """Upload processor output files. + + Args: + repo_id: Hugging Face dataset repo ID + processors_folder: Path to folder containing processor output directories + + Raises: + HuggingFaceUploadError: If upload fails + """ + if not processors_folder.exists(): + return + + processor_dirs = [d for d in processors_folder.iterdir() if d.is_dir()] + if not processor_dirs: + return + + logger.info(f" |-- {RandomEmoji.loading()} Uploading processor outputs ({len(processor_dirs)} processors)...") + for processor_dir in processor_dirs: + try: + self._api.upload_folder( + repo_id=repo_id, + folder_path=str(processor_dir), + path_in_repo=processor_dir.name, + repo_type="dataset", + commit_message=f"Upload {processor_dir.name} processor outputs", + ) + except Exception as e: + raise HuggingFaceHubClientUploadError( + f"Failed to upload processor outputs for '{processor_dir.name}': {e}" + ) from e + + def _upload_config_files(self, repo_id: str, metadata_path: Path, sdg_path: Path) -> None: + """Upload configuration files (sdg.json and metadata.json). + + Args: + repo_id: Hugging Face dataset repo ID + metadata_path: Path to metadata.json file + sdg_path: Path to sdg.json file + + Raises: + HuggingFaceUploadError: If upload fails + """ + logger.info(f" |-- {RandomEmoji.loading()} Uploading configuration files...") + + if sdg_path.exists(): + try: + self._api.upload_file( + repo_id=repo_id, + path_or_fileobj=str(sdg_path), + path_in_repo=SDG_CONFIG_FILENAME, + repo_type="dataset", + commit_message="Upload sdg.json", + ) + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to upload sdg.json: {e}") from e + + if metadata_path.exists(): + tmp_path = None + try: + updated_metadata = self._update_metadata_paths(metadata_path) + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp_file: + json.dump(updated_metadata, tmp_file, indent=2) + tmp_path = tmp_file.name + + self._api.upload_file( + repo_id=repo_id, + path_or_fileobj=tmp_path, + path_in_repo=METADATA_FILENAME, + repo_type="dataset", + commit_message=f"Upload {METADATA_FILENAME}", + ) + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to upload {METADATA_FILENAME}: {e}") from e + finally: + if tmp_path and Path(tmp_path).exists(): + Path(tmp_path).unlink() + + def _upload_dataset_card(self, repo_id: str, metadata_path: Path, sdg_path: Path, description: str) -> None: + """Generate and upload dataset card from metadata.json. + + Args: + repo_id: Hugging Face dataset repo ID + metadata_path: Path to metadata.json file + sdg_path: Path to sdg.json file + description: Custom description text for dataset card + + Raises: + HuggingFaceUploadError: If dataset card generation or upload fails + """ + try: + with open(metadata_path) as f: + metadata = json.load(f) + except json.JSONDecodeError as e: + raise HuggingFaceHubClientUploadError(f"Failed to parse {METADATA_FILENAME}: {e}") from e + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to read {METADATA_FILENAME}: {e}") from e + + sdg_config = None + if sdg_path.exists(): + try: + with open(sdg_path) as f: + sdg_config = json.load(f) + except json.JSONDecodeError as e: + raise HuggingFaceHubClientUploadError(f"Failed to parse sdg.json: {e}") from e + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to read sdg.json: {e}") from e + + try: + card = DataDesignerDatasetCard.from_metadata( + metadata=metadata, + sdg_config=sdg_config, + repo_id=repo_id, + description=description, + ) + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to generate dataset card: {e}") from e + + try: + card.push_to_hub(repo_id, repo_type="dataset") + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to push dataset card to hub: {e}") from e + + @staticmethod + def _validate_repo_id(repo_id: str) -> None: + """Validate Hugging Face dataset repository ID format. + + Args: + repo_id: Repository ID to validate + + Raises: + HuggingFaceHubClientUploadError: If repo_id format is invalid + """ + # Check if repo_id is empty + if not repo_id or not repo_id.strip(): + raise HuggingFaceHubClientUploadError("repo_id must be a non-empty string") + + # Check for exactly one slash (username/dataset-name format). This is not enforced by huggingface_hub's validator. + if repo_id.count("/") != 1: + raise HuggingFaceHubClientUploadError( + f"Invalid repo_id format: '{repo_id}'. Expected format: 'username/dataset-name'" + ) + + # Use huggingface_hub's validator for additional checks (characters, length, etc.) + try: + validate_repo_id(repo_id) + except HFValidationError as e: + raise HuggingFaceHubClientUploadError(f"Invalid repo_id format: '{repo_id}': {e}") from e + + @staticmethod + def _update_metadata_paths(metadata_path: Path) -> dict: + """Update file paths in metadata.json to match Hugging Face dataset repository structure. + + Local paths: + - parquet-files/batch_00000.parquet β†’ data/batch_00000.parquet + - processors-files/processor1/batch_00000.parquet β†’ processor1/batch_00000.parquet + + Args: + metadata_path: Path to metadata.json file + + Returns: + Updated metadata dictionary with corrected paths + """ + with open(metadata_path) as f: + metadata = json.load(f) + + if "file_paths" in metadata: + updated_file_paths = {} + + # Update parquet files path: parquet-files/ β†’ data/ + if FINAL_DATASET_FOLDER_NAME in metadata["file_paths"]: + updated_file_paths["data"] = [ + path.replace(f"{FINAL_DATASET_FOLDER_NAME}/", "data/") + for path in metadata["file_paths"][FINAL_DATASET_FOLDER_NAME] + ] + + # Update processor files paths: processors-files/{name}/ β†’ {name}/ + if "processor-files" in metadata["file_paths"]: + updated_file_paths["processor-files"] = {} + for processor_name, paths in metadata["file_paths"]["processor-files"].items(): + updated_file_paths["processor-files"][processor_name] = [ + path.replace(f"{PROCESSORS_OUTPUTS_FOLDER_NAME}/{processor_name}/", f"{processor_name}/") + for path in paths + ] + + metadata["file_paths"] = updated_file_paths + + return metadata + + @staticmethod + def _validate_dataset_path(base_dataset_path: Path) -> None: + """Validate dataset directory structure. + + Args: + base_dataset_path: Path to dataset directory + + Raises: + HuggingFaceUploadError: If directory structure is invalid + """ + if not base_dataset_path.exists(): + raise HuggingFaceHubClientUploadError(f"Dataset path does not exist: {base_dataset_path}") + + if not base_dataset_path.is_dir(): + raise HuggingFaceHubClientUploadError(f"Dataset path is not a directory: {base_dataset_path}") + + metadata_path = base_dataset_path / METADATA_FILENAME + if not metadata_path.exists(): + raise HuggingFaceHubClientUploadError(f"Required file not found: {metadata_path}") + + if not metadata_path.is_file(): + raise HuggingFaceHubClientUploadError(f"{METADATA_FILENAME} is not a file: {metadata_path}") + + parquet_dir = base_dataset_path / FINAL_DATASET_FOLDER_NAME + if not parquet_dir.exists(): + raise HuggingFaceHubClientUploadError( + f"Required directory not found: {parquet_dir}. " + "Dataset must contain parquet-files directory with batch files." + ) + + if not parquet_dir.is_dir(): + raise HuggingFaceHubClientUploadError(f"parquet-files is not a directory: {parquet_dir}") + + if not any(parquet_dir.glob("*.parquet")): + raise HuggingFaceHubClientUploadError( + f"parquet-files directory is empty: {parquet_dir}. At least one .parquet file is required." + ) + + try: + with open(metadata_path) as f: + json.load(f) + except json.JSONDecodeError as e: + raise HuggingFaceHubClientUploadError(f"Invalid JSON in {METADATA_FILENAME}: {e}") + + sdg_path = base_dataset_path / SDG_CONFIG_FILENAME + if sdg_path.exists(): + if not sdg_path.is_file(): + raise HuggingFaceHubClientUploadError(f"{SDG_CONFIG_FILENAME} is not a file: {sdg_path}") + try: + with open(sdg_path) as f: + json.load(f) + except json.JSONDecodeError as e: + raise HuggingFaceHubClientUploadError(f"Invalid JSON in {SDG_CONFIG_FILENAME}: {e}") diff --git a/pyproject.toml b/pyproject.toml index f6d019d2..8f0271dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,9 @@ name = "data-designer-workspace" version = "0.0.0" # Placeholder, never used since package = false description = "DataDesigner monorepo workspace" requires-python = ">=3.10" +dependencies = [ + "matplotlib>=3.10.8", +] [build-system] requires = ["hatchling"] diff --git a/uv.lock b/uv.lock index 6e0185dc..03f7b959 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.14'", @@ -595,6 +595,163 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl", hash = "sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417", size = 7294, upload-time = "2025-07-25T14:02:02.896Z" }, ] +[[package]] +name = "contourpy" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/a3/da4153ec8fe25d263aa48c1a4cbde7f49b59af86f0b6f7862788c60da737/contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934", size = 268551, upload-time = "2025-04-15T17:34:46.581Z" }, + { url = "https://files.pythonhosted.org/packages/2f/6c/330de89ae1087eb622bfca0177d32a7ece50c3ef07b28002de4757d9d875/contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989", size = 253399, upload-time = "2025-04-15T17:34:51.427Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/20c6726b1b7f81a8bee5271bed5c165f0a8e1f572578a9d27e2ccb763cb2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9be002b31c558d1ddf1b9b415b162c603405414bacd6932d031c5b5a8b757f0d", size = 312061, upload-time = "2025-04-15T17:34:55.961Z" }, + { url = "https://files.pythonhosted.org/packages/22/fc/a9665c88f8a2473f823cf1ec601de9e5375050f1958cbb356cdf06ef1ab6/contourpy-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2e74acbcba3bfdb6d9d8384cdc4f9260cae86ed9beee8bd5f54fee49a430b9", size = 351956, upload-time = "2025-04-15T17:35:00.992Z" }, + { url = "https://files.pythonhosted.org/packages/25/eb/9f0a0238f305ad8fb7ef42481020d6e20cf15e46be99a1fcf939546a177e/contourpy-1.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e259bced5549ac64410162adc973c5e2fb77f04df4a439d00b478e57a0e65512", size = 320872, upload-time = "2025-04-15T17:35:06.177Z" }, + { url = "https://files.pythonhosted.org/packages/32/5c/1ee32d1c7956923202f00cf8d2a14a62ed7517bdc0ee1e55301227fc273c/contourpy-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad687a04bc802cbe8b9c399c07162a3c35e227e2daccf1668eb1f278cb698631", size = 325027, upload-time = "2025-04-15T17:35:11.244Z" }, + { url = "https://files.pythonhosted.org/packages/83/bf/9baed89785ba743ef329c2b07fd0611d12bfecbedbdd3eeecf929d8d3b52/contourpy-1.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cdd22595308f53ef2f891040ab2b93d79192513ffccbd7fe19be7aa773a5e09f", size = 1306641, upload-time = "2025-04-15T17:35:26.701Z" }, + { url = "https://files.pythonhosted.org/packages/d4/cc/74e5e83d1e35de2d28bd97033426b450bc4fd96e092a1f7a63dc7369b55d/contourpy-1.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4f54d6a2defe9f257327b0f243612dd051cc43825587520b1bf74a31e2f6ef2", size = 1374075, upload-time = "2025-04-15T17:35:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/0c/42/17f3b798fd5e033b46a16f8d9fcb39f1aba051307f5ebf441bad1ecf78f8/contourpy-1.3.2-cp310-cp310-win32.whl", hash = "sha256:f939a054192ddc596e031e50bb13b657ce318cf13d264f095ce9db7dc6ae81c0", size = 177534, upload-time = "2025-04-15T17:35:46.554Z" }, + { url = "https://files.pythonhosted.org/packages/54/ec/5162b8582f2c994721018d0c9ece9dc6ff769d298a8ac6b6a652c307e7df/contourpy-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c440093bbc8fc21c637c03bafcbef95ccd963bc6e0514ad887932c18ca2a759a", size = 221188, upload-time = "2025-04-15T17:35:50.064Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b9/ede788a0b56fc5b071639d06c33cb893f68b1178938f3425debebe2dab78/contourpy-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a37a2fb93d4df3fc4c0e363ea4d16f83195fc09c891bc8ce072b9d084853445", size = 269636, upload-time = "2025-04-15T17:35:54.473Z" }, + { url = "https://files.pythonhosted.org/packages/e6/75/3469f011d64b8bbfa04f709bfc23e1dd71be54d05b1b083be9f5b22750d1/contourpy-1.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7cd50c38f500bbcc9b6a46643a40e0913673f869315d8e70de0438817cb7773", size = 254636, upload-time = "2025-04-15T17:35:58.283Z" }, + { url = "https://files.pythonhosted.org/packages/8d/2f/95adb8dae08ce0ebca4fd8e7ad653159565d9739128b2d5977806656fcd2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6658ccc7251a4433eebd89ed2672c2ed96fba367fd25ca9512aa92a4b46c4f1", size = 313053, upload-time = "2025-04-15T17:36:03.235Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a6/8ccf97a50f31adfa36917707fe39c9a0cbc24b3bbb58185577f119736cc9/contourpy-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:70771a461aaeb335df14deb6c97439973d253ae70660ca085eec25241137ef43", size = 352985, upload-time = "2025-04-15T17:36:08.275Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b6/7925ab9b77386143f39d9c3243fdd101621b4532eb126743201160ffa7e6/contourpy-1.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65a887a6e8c4cd0897507d814b14c54a8c2e2aa4ac9f7686292f9769fcf9a6ab", size = 323750, upload-time = "2025-04-15T17:36:13.29Z" }, + { url = "https://files.pythonhosted.org/packages/c2/f3/20c5d1ef4f4748e52d60771b8560cf00b69d5c6368b5c2e9311bcfa2a08b/contourpy-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3859783aefa2b8355697f16642695a5b9792e7a46ab86da1118a4a23a51a33d7", size = 326246, upload-time = "2025-04-15T17:36:18.329Z" }, + { url = "https://files.pythonhosted.org/packages/8c/e5/9dae809e7e0b2d9d70c52b3d24cba134dd3dad979eb3e5e71f5df22ed1f5/contourpy-1.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eab0f6db315fa4d70f1d8ab514e527f0366ec021ff853d7ed6a2d33605cf4b83", size = 1308728, upload-time = "2025-04-15T17:36:33.878Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4a/0058ba34aeea35c0b442ae61a4f4d4ca84d6df8f91309bc2d43bb8dd248f/contourpy-1.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d91a3ccc7fea94ca0acab82ceb77f396d50a1f67412efe4c526f5d20264e6ecd", size = 1375762, upload-time = "2025-04-15T17:36:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/09/33/7174bdfc8b7767ef2c08ed81244762d93d5c579336fc0b51ca57b33d1b80/contourpy-1.3.2-cp311-cp311-win32.whl", hash = "sha256:1c48188778d4d2f3d48e4643fb15d8608b1d01e4b4d6b0548d9b336c28fc9b6f", size = 178196, upload-time = "2025-04-15T17:36:55.002Z" }, + { url = "https://files.pythonhosted.org/packages/5e/fe/4029038b4e1c4485cef18e480b0e2cd2d755448bb071eb9977caac80b77b/contourpy-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:5ebac872ba09cb8f2131c46b8739a7ff71de28a24c869bcad554477eb089a878", size = 222017, upload-time = "2025-04-15T17:36:58.576Z" }, + { url = "https://files.pythonhosted.org/packages/34/f7/44785876384eff370c251d58fd65f6ad7f39adce4a093c934d4a67a7c6b6/contourpy-1.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4caf2bcd2969402bf77edc4cb6034c7dd7c0803213b3523f111eb7460a51b8d2", size = 271580, upload-time = "2025-04-15T17:37:03.105Z" }, + { url = "https://files.pythonhosted.org/packages/93/3b/0004767622a9826ea3d95f0e9d98cd8729015768075d61f9fea8eeca42a8/contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82199cb78276249796419fe36b7386bd8d2cc3f28b3bc19fe2454fe2e26c4c15", size = 255530, upload-time = "2025-04-15T17:37:07.026Z" }, + { url = "https://files.pythonhosted.org/packages/e7/bb/7bd49e1f4fa805772d9fd130e0d375554ebc771ed7172f48dfcd4ca61549/contourpy-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:106fab697af11456fcba3e352ad50effe493a90f893fca6c2ca5c033820cea92", size = 307688, upload-time = "2025-04-15T17:37:11.481Z" }, + { url = "https://files.pythonhosted.org/packages/fc/97/e1d5dbbfa170725ef78357a9a0edc996b09ae4af170927ba8ce977e60a5f/contourpy-1.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d14f12932a8d620e307f715857107b1d1845cc44fdb5da2bc8e850f5ceba9f87", size = 347331, upload-time = "2025-04-15T17:37:18.212Z" }, + { url = "https://files.pythonhosted.org/packages/6f/66/e69e6e904f5ecf6901be3dd16e7e54d41b6ec6ae3405a535286d4418ffb4/contourpy-1.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:532fd26e715560721bb0d5fc7610fce279b3699b018600ab999d1be895b09415", size = 318963, upload-time = "2025-04-15T17:37:22.76Z" }, + { url = "https://files.pythonhosted.org/packages/a8/32/b8a1c8965e4f72482ff2d1ac2cd670ce0b542f203c8e1d34e7c3e6925da7/contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b383144cf2d2c29f01a1e8170f50dacf0eac02d64139dcd709a8ac4eb3cfe", size = 323681, upload-time = "2025-04-15T17:37:33.001Z" }, + { url = "https://files.pythonhosted.org/packages/30/c6/12a7e6811d08757c7162a541ca4c5c6a34c0f4e98ef2b338791093518e40/contourpy-1.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c49f73e61f1f774650a55d221803b101d966ca0c5a2d6d5e4320ec3997489441", size = 1308674, upload-time = "2025-04-15T17:37:48.64Z" }, + { url = "https://files.pythonhosted.org/packages/2a/8a/bebe5a3f68b484d3a2b8ffaf84704b3e343ef1addea528132ef148e22b3b/contourpy-1.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d80b2c0300583228ac98d0a927a1ba6a2ba6b8a742463c564f1d419ee5b211e", size = 1380480, upload-time = "2025-04-15T17:38:06.7Z" }, + { url = "https://files.pythonhosted.org/packages/34/db/fcd325f19b5978fb509a7d55e06d99f5f856294c1991097534360b307cf1/contourpy-1.3.2-cp312-cp312-win32.whl", hash = "sha256:90df94c89a91b7362e1142cbee7568f86514412ab8a2c0d0fca72d7e91b62912", size = 178489, upload-time = "2025-04-15T17:38:10.338Z" }, + { url = "https://files.pythonhosted.org/packages/01/c8/fadd0b92ffa7b5eb5949bf340a63a4a496a6930a6c37a7ba0f12acb076d6/contourpy-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c942a01d9163e2e5cfb05cb66110121b8d07ad438a17f9e766317bcb62abf73", size = 223042, upload-time = "2025-04-15T17:38:14.239Z" }, + { url = "https://files.pythonhosted.org/packages/2e/61/5673f7e364b31e4e7ef6f61a4b5121c5f170f941895912f773d95270f3a2/contourpy-1.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:de39db2604ae755316cb5967728f4bea92685884b1e767b7c24e983ef5f771cb", size = 271630, upload-time = "2025-04-15T17:38:19.142Z" }, + { url = "https://files.pythonhosted.org/packages/ff/66/a40badddd1223822c95798c55292844b7e871e50f6bfd9f158cb25e0bd39/contourpy-1.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f9e896f447c5c8618f1edb2bafa9a4030f22a575ec418ad70611450720b5b08", size = 255670, upload-time = "2025-04-15T17:38:23.688Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c7/cf9fdee8200805c9bc3b148f49cb9482a4e3ea2719e772602a425c9b09f8/contourpy-1.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71e2bd4a1c4188f5c2b8d274da78faab884b59df20df63c34f74aa1813c4427c", size = 306694, upload-time = "2025-04-15T17:38:28.238Z" }, + { url = "https://files.pythonhosted.org/packages/dd/e7/ccb9bec80e1ba121efbffad7f38021021cda5be87532ec16fd96533bb2e0/contourpy-1.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de425af81b6cea33101ae95ece1f696af39446db9682a0b56daaa48cfc29f38f", size = 345986, upload-time = "2025-04-15T17:38:33.502Z" }, + { url = "https://files.pythonhosted.org/packages/dc/49/ca13bb2da90391fa4219fdb23b078d6065ada886658ac7818e5441448b78/contourpy-1.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:977e98a0e0480d3fe292246417239d2d45435904afd6d7332d8455981c408b85", size = 318060, upload-time = "2025-04-15T17:38:38.672Z" }, + { url = "https://files.pythonhosted.org/packages/c8/65/5245ce8c548a8422236c13ffcdcdada6a2a812c361e9e0c70548bb40b661/contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:434f0adf84911c924519d2b08fc10491dd282b20bdd3fa8f60fd816ea0b48841", size = 322747, upload-time = "2025-04-15T17:38:43.712Z" }, + { url = "https://files.pythonhosted.org/packages/72/30/669b8eb48e0a01c660ead3752a25b44fdb2e5ebc13a55782f639170772f9/contourpy-1.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c66c4906cdbc50e9cba65978823e6e00b45682eb09adbb78c9775b74eb222422", size = 1308895, upload-time = "2025-04-15T17:39:00.224Z" }, + { url = "https://files.pythonhosted.org/packages/05/5a/b569f4250decee6e8d54498be7bdf29021a4c256e77fe8138c8319ef8eb3/contourpy-1.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b7fc0cd78ba2f4695fd0a6ad81a19e7e3ab825c31b577f384aa9d7817dc3bef", size = 1379098, upload-time = "2025-04-15T17:43:29.649Z" }, + { url = "https://files.pythonhosted.org/packages/19/ba/b227c3886d120e60e41b28740ac3617b2f2b971b9f601c835661194579f1/contourpy-1.3.2-cp313-cp313-win32.whl", hash = "sha256:15ce6ab60957ca74cff444fe66d9045c1fd3e92c8936894ebd1f3eef2fff075f", size = 178535, upload-time = "2025-04-15T17:44:44.532Z" }, + { url = "https://files.pythonhosted.org/packages/12/6e/2fed56cd47ca739b43e892707ae9a13790a486a3173be063681ca67d2262/contourpy-1.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e1578f7eafce927b168752ed7e22646dad6cd9bca673c60bff55889fa236ebf9", size = 223096, upload-time = "2025-04-15T17:44:48.194Z" }, + { url = "https://files.pythonhosted.org/packages/54/4c/e76fe2a03014a7c767d79ea35c86a747e9325537a8b7627e0e5b3ba266b4/contourpy-1.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0475b1f6604896bc7c53bb070e355e9321e1bc0d381735421a2d2068ec56531f", size = 285090, upload-time = "2025-04-15T17:43:34.084Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e2/5aba47debd55d668e00baf9651b721e7733975dc9fc27264a62b0dd26eb8/contourpy-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c85bb486e9be652314bb5b9e2e3b0d1b2e643d5eec4992c0fbe8ac71775da739", size = 268643, upload-time = "2025-04-15T17:43:38.626Z" }, + { url = "https://files.pythonhosted.org/packages/a1/37/cd45f1f051fe6230f751cc5cdd2728bb3a203f5619510ef11e732109593c/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:745b57db7758f3ffc05a10254edd3182a2a83402a89c00957a8e8a22f5582823", size = 310443, upload-time = "2025-04-15T17:43:44.522Z" }, + { url = "https://files.pythonhosted.org/packages/8b/a2/36ea6140c306c9ff6dd38e3bcec80b3b018474ef4d17eb68ceecd26675f4/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:970e9173dbd7eba9b4e01aab19215a48ee5dd3f43cef736eebde064a171f89a5", size = 349865, upload-time = "2025-04-15T17:43:49.545Z" }, + { url = "https://files.pythonhosted.org/packages/95/b7/2fc76bc539693180488f7b6cc518da7acbbb9e3b931fd9280504128bf956/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c4639a9c22230276b7bffb6a850dfc8258a2521305e1faefe804d006b2e532", size = 321162, upload-time = "2025-04-15T17:43:54.203Z" }, + { url = "https://files.pythonhosted.org/packages/f4/10/76d4f778458b0aa83f96e59d65ece72a060bacb20cfbee46cf6cd5ceba41/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc829960f34ba36aad4302e78eabf3ef16a3a100863f0d4eeddf30e8a485a03b", size = 327355, upload-time = "2025-04-15T17:44:01.025Z" }, + { url = "https://files.pythonhosted.org/packages/43/a3/10cf483ea683f9f8ab096c24bad3cce20e0d1dd9a4baa0e2093c1c962d9d/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d32530b534e986374fc19eaa77fcb87e8a99e5431499949b828312bdcd20ac52", size = 1307935, upload-time = "2025-04-15T17:44:17.322Z" }, + { url = "https://files.pythonhosted.org/packages/78/73/69dd9a024444489e22d86108e7b913f3528f56cfc312b5c5727a44188471/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e298e7e70cf4eb179cc1077be1c725b5fd131ebc81181bf0c03525c8abc297fd", size = 1372168, upload-time = "2025-04-15T17:44:33.43Z" }, + { url = "https://files.pythonhosted.org/packages/0f/1b/96d586ccf1b1a9d2004dd519b25fbf104a11589abfd05484ff12199cca21/contourpy-1.3.2-cp313-cp313t-win32.whl", hash = "sha256:d0e589ae0d55204991450bb5c23f571c64fe43adaa53f93fc902a84c96f52fe1", size = 189550, upload-time = "2025-04-15T17:44:37.092Z" }, + { url = "https://files.pythonhosted.org/packages/b0/e6/6000d0094e8a5e32ad62591c8609e269febb6e4db83a1c75ff8868b42731/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69", size = 238214, upload-time = "2025-04-15T17:44:40.827Z" }, + { url = "https://files.pythonhosted.org/packages/33/05/b26e3c6ecc05f349ee0013f0bb850a761016d89cec528a98193a48c34033/contourpy-1.3.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fd93cc7f3139b6dd7aab2f26a90dde0aa9fc264dbf70f6740d498a70b860b82c", size = 265681, upload-time = "2025-04-15T17:44:59.314Z" }, + { url = "https://files.pythonhosted.org/packages/2b/25/ac07d6ad12affa7d1ffed11b77417d0a6308170f44ff20fa1d5aa6333f03/contourpy-1.3.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:107ba8a6a7eec58bb475329e6d3b95deba9440667c4d62b9b6063942b61d7f16", size = 315101, upload-time = "2025-04-15T17:45:04.165Z" }, + { url = "https://files.pythonhosted.org/packages/8f/4d/5bb3192bbe9d3f27e3061a6a8e7733c9120e203cb8515767d30973f71030/contourpy-1.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ded1706ed0c1049224531b81128efbd5084598f18d8a2d9efae833edbd2b40ad", size = 220599, upload-time = "2025-04-15T17:45:08.456Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c0/91f1215d0d9f9f343e4773ba6c9b89e8c0cc7a64a6263f21139da639d848/contourpy-1.3.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5f5964cdad279256c084b69c3f412b7801e15356b16efa9d78aa974041903da0", size = 266807, upload-time = "2025-04-15T17:45:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/d4/79/6be7e90c955c0487e7712660d6cead01fa17bff98e0ea275737cc2bc8e71/contourpy-1.3.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49b65a95d642d4efa8f64ba12558fcb83407e58a2dfba9d796d77b63ccfcaff5", size = 318729, upload-time = "2025-04-15T17:45:20.166Z" }, + { url = "https://files.pythonhosted.org/packages/87/68/7f46fb537958e87427d98a4074bcde4b67a70b04900cfc5ce29bc2f556c1/contourpy-1.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8c5acb8dddb0752bf252e01a3035b21443158910ac16a3b0d20e7fed7d534ce5", size = 221791, upload-time = "2025-04-15T17:45:24.794Z" }, +] + +[[package]] +name = "contourpy" +version = "1.3.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version >= '3.12' and python_full_version < '3.14'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/2e/c4390a31919d8a78b90e8ecf87cd4b4c4f05a5b48d05ec17db8e5404c6f4/contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1", size = 288773, upload-time = "2025-07-26T12:01:02.277Z" }, + { url = "https://files.pythonhosted.org/packages/0d/44/c4b0b6095fef4dc9c420e041799591e3b63e9619e3044f7f4f6c21c0ab24/contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381", size = 270149, upload-time = "2025-07-26T12:01:04.072Z" }, + { url = "https://files.pythonhosted.org/packages/30/2e/dd4ced42fefac8470661d7cb7e264808425e6c5d56d175291e93890cce09/contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7", size = 329222, upload-time = "2025-07-26T12:01:05.688Z" }, + { url = "https://files.pythonhosted.org/packages/f2/74/cc6ec2548e3d276c71389ea4802a774b7aa3558223b7bade3f25787fafc2/contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1", size = 377234, upload-time = "2025-07-26T12:01:07.054Z" }, + { url = "https://files.pythonhosted.org/packages/03/b3/64ef723029f917410f75c09da54254c5f9ea90ef89b143ccadb09df14c15/contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a", size = 380555, upload-time = "2025-07-26T12:01:08.801Z" }, + { url = "https://files.pythonhosted.org/packages/5f/4b/6157f24ca425b89fe2eb7e7be642375711ab671135be21e6faa100f7448c/contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db", size = 355238, upload-time = "2025-07-26T12:01:10.319Z" }, + { url = "https://files.pythonhosted.org/packages/98/56/f914f0dd678480708a04cfd2206e7c382533249bc5001eb9f58aa693e200/contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620", size = 1326218, upload-time = "2025-07-26T12:01:12.659Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d7/4a972334a0c971acd5172389671113ae82aa7527073980c38d5868ff1161/contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f", size = 1392867, upload-time = "2025-07-26T12:01:15.533Z" }, + { url = "https://files.pythonhosted.org/packages/75/3e/f2cc6cd56dc8cff46b1a56232eabc6feea52720083ea71ab15523daab796/contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff", size = 183677, upload-time = "2025-07-26T12:01:17.088Z" }, + { url = "https://files.pythonhosted.org/packages/98/4b/9bd370b004b5c9d8045c6c33cf65bae018b27aca550a3f657cdc99acdbd8/contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42", size = 225234, upload-time = "2025-07-26T12:01:18.256Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b6/71771e02c2e004450c12b1120a5f488cad2e4d5b590b1af8bad060360fe4/contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470", size = 193123, upload-time = "2025-07-26T12:01:19.848Z" }, + { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" }, + { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" }, + { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" }, + { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" }, + { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" }, + { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" }, + { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" }, + { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" }, + { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" }, + { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" }, + { url = "https://files.pythonhosted.org/packages/68/35/0167aad910bbdb9599272bd96d01a9ec6852f36b9455cf2ca67bd4cc2d23/contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5", size = 293257, upload-time = "2025-07-26T12:01:39.367Z" }, + { url = "https://files.pythonhosted.org/packages/96/e4/7adcd9c8362745b2210728f209bfbcf7d91ba868a2c5f40d8b58f54c509b/contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1", size = 274034, upload-time = "2025-07-26T12:01:40.645Z" }, + { url = "https://files.pythonhosted.org/packages/73/23/90e31ceeed1de63058a02cb04b12f2de4b40e3bef5e082a7c18d9c8ae281/contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286", size = 334672, upload-time = "2025-07-26T12:01:41.942Z" }, + { url = "https://files.pythonhosted.org/packages/ed/93/b43d8acbe67392e659e1d984700e79eb67e2acb2bd7f62012b583a7f1b55/contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5", size = 381234, upload-time = "2025-07-26T12:01:43.499Z" }, + { url = "https://files.pythonhosted.org/packages/46/3b/bec82a3ea06f66711520f75a40c8fc0b113b2a75edb36aa633eb11c4f50f/contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67", size = 385169, upload-time = "2025-07-26T12:01:45.219Z" }, + { url = "https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9", size = 362859, upload-time = "2025-07-26T12:01:46.519Z" }, + { url = "https://files.pythonhosted.org/packages/33/71/e2a7945b7de4e58af42d708a219f3b2f4cff7386e6b6ab0a0fa0033c49a9/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659", size = 1332062, upload-time = "2025-07-26T12:01:48.964Z" }, + { url = "https://files.pythonhosted.org/packages/12/fc/4e87ac754220ccc0e807284f88e943d6d43b43843614f0a8afa469801db0/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7", size = 1403932, upload-time = "2025-07-26T12:01:51.979Z" }, + { url = "https://files.pythonhosted.org/packages/a6/2e/adc197a37443f934594112222ac1aa7dc9a98faf9c3842884df9a9d8751d/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d", size = 185024, upload-time = "2025-07-26T12:01:53.245Z" }, + { url = "https://files.pythonhosted.org/packages/18/0b/0098c214843213759692cc638fce7de5c289200a830e5035d1791d7a2338/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263", size = 226578, upload-time = "2025-07-26T12:01:54.422Z" }, + { url = "https://files.pythonhosted.org/packages/8a/9a/2f6024a0c5995243cd63afdeb3651c984f0d2bc727fd98066d40e141ad73/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9", size = 193524, upload-time = "2025-07-26T12:01:55.73Z" }, + { url = "https://files.pythonhosted.org/packages/c0/b3/f8a1a86bd3298513f500e5b1f5fd92b69896449f6cab6a146a5d52715479/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d", size = 306730, upload-time = "2025-07-26T12:01:57.051Z" }, + { url = "https://files.pythonhosted.org/packages/3f/11/4780db94ae62fc0c2053909b65dc3246bd7cecfc4f8a20d957ad43aa4ad8/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216", size = 287897, upload-time = "2025-07-26T12:01:58.663Z" }, + { url = "https://files.pythonhosted.org/packages/ae/15/e59f5f3ffdd6f3d4daa3e47114c53daabcb18574a26c21f03dc9e4e42ff0/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae", size = 326751, upload-time = "2025-07-26T12:02:00.343Z" }, + { url = "https://files.pythonhosted.org/packages/0f/81/03b45cfad088e4770b1dcf72ea78d3802d04200009fb364d18a493857210/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20", size = 375486, upload-time = "2025-07-26T12:02:02.128Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ba/49923366492ffbdd4486e970d421b289a670ae8cf539c1ea9a09822b371a/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99", size = 388106, upload-time = "2025-07-26T12:02:03.615Z" }, + { url = "https://files.pythonhosted.org/packages/9f/52/5b00ea89525f8f143651f9f03a0df371d3cbd2fccd21ca9b768c7a6500c2/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b", size = 352548, upload-time = "2025-07-26T12:02:05.165Z" }, + { url = "https://files.pythonhosted.org/packages/32/1d/a209ec1a3a3452d490f6b14dd92e72280c99ae3d1e73da74f8277d4ee08f/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a", size = 1322297, upload-time = "2025-07-26T12:02:07.379Z" }, + { url = "https://files.pythonhosted.org/packages/bc/9e/46f0e8ebdd884ca0e8877e46a3f4e633f6c9c8c4f3f6e72be3fe075994aa/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e", size = 1391023, upload-time = "2025-07-26T12:02:10.171Z" }, + { url = "https://files.pythonhosted.org/packages/b9/70/f308384a3ae9cd2209e0849f33c913f658d3326900d0ff5d378d6a1422d2/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3", size = 196157, upload-time = "2025-07-26T12:02:11.488Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dd/880f890a6663b84d9e34a6f88cded89d78f0091e0045a284427cb6b18521/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8", size = 240570, upload-time = "2025-07-26T12:02:12.754Z" }, + { url = "https://files.pythonhosted.org/packages/80/99/2adc7d8ffead633234817ef8e9a87115c8a11927a94478f6bb3d3f4d4f7d/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301", size = 199713, upload-time = "2025-07-26T12:02:14.4Z" }, + { url = "https://files.pythonhosted.org/packages/72/8b/4546f3ab60f78c514ffb7d01a0bd743f90de36f0019d1be84d0a708a580a/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a", size = 292189, upload-time = "2025-07-26T12:02:16.095Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e1/3542a9cb596cadd76fcef413f19c79216e002623158befe6daa03dbfa88c/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77", size = 273251, upload-time = "2025-07-26T12:02:17.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/71/f93e1e9471d189f79d0ce2497007731c1e6bf9ef6d1d61b911430c3db4e5/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5", size = 335810, upload-time = "2025-07-26T12:02:18.9Z" }, + { url = "https://files.pythonhosted.org/packages/91/f9/e35f4c1c93f9275d4e38681a80506b5510e9327350c51f8d4a5a724d178c/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4", size = 382871, upload-time = "2025-07-26T12:02:20.418Z" }, + { url = "https://files.pythonhosted.org/packages/b5/71/47b512f936f66a0a900d81c396a7e60d73419868fba959c61efed7a8ab46/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36", size = 386264, upload-time = "2025-07-26T12:02:21.916Z" }, + { url = "https://files.pythonhosted.org/packages/04/5f/9ff93450ba96b09c7c2b3f81c94de31c89f92292f1380261bd7195bea4ea/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3", size = 363819, upload-time = "2025-07-26T12:02:23.759Z" }, + { url = "https://files.pythonhosted.org/packages/3e/a6/0b185d4cc480ee494945cde102cb0149ae830b5fa17bf855b95f2e70ad13/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b", size = 1333650, upload-time = "2025-07-26T12:02:26.181Z" }, + { url = "https://files.pythonhosted.org/packages/43/d7/afdc95580ca56f30fbcd3060250f66cedbde69b4547028863abd8aa3b47e/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36", size = 1404833, upload-time = "2025-07-26T12:02:28.782Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e2/366af18a6d386f41132a48f033cbd2102e9b0cf6345d35ff0826cd984566/contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d", size = 189692, upload-time = "2025-07-26T12:02:30.128Z" }, + { url = "https://files.pythonhosted.org/packages/7d/c2/57f54b03d0f22d4044b8afb9ca0e184f8b1afd57b4f735c2fa70883dc601/contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd", size = 232424, upload-time = "2025-07-26T12:02:31.395Z" }, + { url = "https://files.pythonhosted.org/packages/18/79/a9416650df9b525737ab521aa181ccc42d56016d2123ddcb7b58e926a42c/contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339", size = 198300, upload-time = "2025-07-26T12:02:32.956Z" }, + { url = "https://files.pythonhosted.org/packages/1f/42/38c159a7d0f2b7b9c04c64ab317042bb6952b713ba875c1681529a2932fe/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772", size = 306769, upload-time = "2025-07-26T12:02:34.2Z" }, + { url = "https://files.pythonhosted.org/packages/c3/6c/26a8205f24bca10974e77460de68d3d7c63e282e23782f1239f226fcae6f/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77", size = 287892, upload-time = "2025-07-26T12:02:35.807Z" }, + { url = "https://files.pythonhosted.org/packages/66/06/8a475c8ab718ebfd7925661747dbb3c3ee9c82ac834ccb3570be49d129f4/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13", size = 326748, upload-time = "2025-07-26T12:02:37.193Z" }, + { url = "https://files.pythonhosted.org/packages/b4/a3/c5ca9f010a44c223f098fccd8b158bb1cb287378a31ac141f04730dc49be/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe", size = 375554, upload-time = "2025-07-26T12:02:38.894Z" }, + { url = "https://files.pythonhosted.org/packages/80/5b/68bd33ae63fac658a4145088c1e894405e07584a316738710b636c6d0333/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f", size = 388118, upload-time = "2025-07-26T12:02:40.642Z" }, + { url = "https://files.pythonhosted.org/packages/40/52/4c285a6435940ae25d7410a6c36bda5145839bc3f0beb20c707cda18b9d2/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0", size = 352555, upload-time = "2025-07-26T12:02:42.25Z" }, + { url = "https://files.pythonhosted.org/packages/24/ee/3e81e1dd174f5c7fefe50e85d0892de05ca4e26ef1c9a59c2a57e43b865a/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4", size = 1322295, upload-time = "2025-07-26T12:02:44.668Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b2/6d913d4d04e14379de429057cd169e5e00f6c2af3bb13e1710bcbdb5da12/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f", size = 1391027, upload-time = "2025-07-26T12:02:47.09Z" }, + { url = "https://files.pythonhosted.org/packages/93/8a/68a4ec5c55a2971213d29a9374913f7e9f18581945a7a31d1a39b5d2dfe5/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae", size = 202428, upload-time = "2025-07-26T12:02:48.691Z" }, + { url = "https://files.pythonhosted.org/packages/fa/96/fd9f641ffedc4fa3ace923af73b9d07e869496c9cc7a459103e6e978992f/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc", size = 250331, upload-time = "2025-07-26T12:02:50.137Z" }, + { url = "https://files.pythonhosted.org/packages/ae/8c/469afb6465b853afff216f9528ffda78a915ff880ed58813ba4faf4ba0b6/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b", size = 203831, upload-time = "2025-07-26T12:02:51.449Z" }, + { url = "https://files.pythonhosted.org/packages/a5/29/8dcfe16f0107943fa92388c23f6e05cff0ba58058c4c95b00280d4c75a14/contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497", size = 278809, upload-time = "2025-07-26T12:02:52.74Z" }, + { url = "https://files.pythonhosted.org/packages/85/a9/8b37ef4f7dafeb335daee3c8254645ef5725be4d9c6aa70b50ec46ef2f7e/contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8", size = 261593, upload-time = "2025-07-26T12:02:54.037Z" }, + { url = "https://files.pythonhosted.org/packages/0a/59/ebfb8c677c75605cc27f7122c90313fd2f375ff3c8d19a1694bda74aaa63/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e", size = 302202, upload-time = "2025-07-26T12:02:55.947Z" }, + { url = "https://files.pythonhosted.org/packages/3c/37/21972a15834d90bfbfb009b9d004779bd5a07a0ec0234e5ba8f64d5736f4/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989", size = 329207, upload-time = "2025-07-26T12:02:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" }, +] + [[package]] name = "coverage" version = "7.13.2" @@ -764,6 +921,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/c3/e90f4a4feae6410f914f8ebac129b9ae7a8c92eb60a638012dde42030a9d/cryptography-46.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6b5063083824e5509fdba180721d55909ffacccc8adbec85268b48439423d78c", size = 3438528, upload-time = "2025-10-15T23:18:26.227Z" }, ] +[[package]] +name = "cycler" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, +] + [[package]] name = "data-designer" source = { editable = "packages/data-designer" } @@ -865,6 +1031,9 @@ requires-dist = [ name = "data-designer-workspace" version = "0.0.0" source = { virtual = "." } +dependencies = [ + { name = "matplotlib" }, +] [package.dev-dependencies] dev = [ @@ -899,6 +1068,7 @@ recipes = [ ] [package.metadata] +requires-dist = [{ name = "matplotlib", specifier = ">=3.10.8" }] [package.metadata.requires-dev] dev = [ @@ -1224,6 +1394,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" }, ] +[[package]] +name = "fonttools" +version = "4.61.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/94/8a28707adb00bed1bf22dac16ccafe60faf2ade353dcb32c3617ee917307/fonttools-4.61.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c7db70d57e5e1089a274cbb2b1fd635c9a24de809a231b154965d415d6c6d24", size = 2854799, upload-time = "2025-12-12T17:29:27.5Z" }, + { url = "https://files.pythonhosted.org/packages/94/93/c2e682faaa5ee92034818d8f8a8145ae73eb83619600495dcf8503fa7771/fonttools-4.61.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5fe9fd43882620017add5eabb781ebfbc6998ee49b35bd7f8f79af1f9f99a958", size = 2403032, upload-time = "2025-12-12T17:29:30.115Z" }, + { url = "https://files.pythonhosted.org/packages/f1/62/1748f7e7e1ee41aa52279fd2e3a6d0733dc42a673b16932bad8e5d0c8b28/fonttools-4.61.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8db08051fc9e7d8bc622f2112511b8107d8f27cd89e2f64ec45e9825e8288da", size = 4897863, upload-time = "2025-12-12T17:29:32.535Z" }, + { url = "https://files.pythonhosted.org/packages/69/69/4ca02ee367d2c98edcaeb83fc278d20972502ee071214ad9d8ca85e06080/fonttools-4.61.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a76d4cb80f41ba94a6691264be76435e5f72f2cb3cab0b092a6212855f71c2f6", size = 4859076, upload-time = "2025-12-12T17:29:34.907Z" }, + { url = "https://files.pythonhosted.org/packages/8c/f5/660f9e3cefa078861a7f099107c6d203b568a6227eef163dd173bfc56bdc/fonttools-4.61.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a13fc8aeb24bad755eea8f7f9d409438eb94e82cf86b08fe77a03fbc8f6a96b1", size = 4875623, upload-time = "2025-12-12T17:29:37.33Z" }, + { url = "https://files.pythonhosted.org/packages/63/d1/9d7c5091d2276ed47795c131c1bf9316c3c1ab2789c22e2f59e0572ccd38/fonttools-4.61.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b846a1fcf8beadeb9ea4f44ec5bdde393e2f1569e17d700bfc49cd69bde75881", size = 4993327, upload-time = "2025-12-12T17:29:39.781Z" }, + { url = "https://files.pythonhosted.org/packages/6f/2d/28def73837885ae32260d07660a052b99f0aa00454867d33745dfe49dbf0/fonttools-4.61.1-cp310-cp310-win32.whl", hash = "sha256:78a7d3ab09dc47ac1a363a493e6112d8cabed7ba7caad5f54dbe2f08676d1b47", size = 1502180, upload-time = "2025-12-12T17:29:42.217Z" }, + { url = "https://files.pythonhosted.org/packages/63/fa/bfdc98abb4dd2bd491033e85e3ba69a2313c850e759a6daa014bc9433b0f/fonttools-4.61.1-cp310-cp310-win_amd64.whl", hash = "sha256:eff1ac3cc66c2ac7cda1e64b4e2f3ffef474b7335f92fc3833fc632d595fcee6", size = 1550654, upload-time = "2025-12-12T17:29:44.564Z" }, + { url = "https://files.pythonhosted.org/packages/69/12/bf9f4eaa2fad039356cc627587e30ed008c03f1cebd3034376b5ee8d1d44/fonttools-4.61.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c6604b735bb12fef8e0efd5578c9fb5d3d8532d5001ea13a19cddf295673ee09", size = 2852213, upload-time = "2025-12-12T17:29:46.675Z" }, + { url = "https://files.pythonhosted.org/packages/ac/49/4138d1acb6261499bedde1c07f8c2605d1d8f9d77a151e5507fd3ef084b6/fonttools-4.61.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ce02f38a754f207f2f06557523cd39a06438ba3aafc0639c477ac409fc64e37", size = 2401689, upload-time = "2025-12-12T17:29:48.769Z" }, + { url = "https://files.pythonhosted.org/packages/e5/fe/e6ce0fe20a40e03aef906af60aa87668696f9e4802fa283627d0b5ed777f/fonttools-4.61.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77efb033d8d7ff233385f30c62c7c79271c8885d5c9657d967ede124671bbdfb", size = 5058809, upload-time = "2025-12-12T17:29:51.701Z" }, + { url = "https://files.pythonhosted.org/packages/79/61/1ca198af22f7dd22c17ab86e9024ed3c06299cfdb08170640e9996d501a0/fonttools-4.61.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75c1a6dfac6abd407634420c93864a1e274ebc1c7531346d9254c0d8f6ca00f9", size = 5036039, upload-time = "2025-12-12T17:29:53.659Z" }, + { url = "https://files.pythonhosted.org/packages/99/cc/fa1801e408586b5fce4da9f5455af8d770f4fc57391cd5da7256bb364d38/fonttools-4.61.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0de30bfe7745c0d1ffa2b0b7048fb7123ad0d71107e10ee090fa0b16b9452e87", size = 5034714, upload-time = "2025-12-12T17:29:55.592Z" }, + { url = "https://files.pythonhosted.org/packages/bf/aa/b7aeafe65adb1b0a925f8f25725e09f078c635bc22754f3fecb7456955b0/fonttools-4.61.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58b0ee0ab5b1fc9921eccfe11d1435added19d6494dde14e323f25ad2bc30c56", size = 5158648, upload-time = "2025-12-12T17:29:57.861Z" }, + { url = "https://files.pythonhosted.org/packages/99/f9/08ea7a38663328881384c6e7777bbefc46fd7d282adfd87a7d2b84ec9d50/fonttools-4.61.1-cp311-cp311-win32.whl", hash = "sha256:f79b168428351d11e10c5aeb61a74e1851ec221081299f4cf56036a95431c43a", size = 2280681, upload-time = "2025-12-12T17:29:59.943Z" }, + { url = "https://files.pythonhosted.org/packages/07/ad/37dd1ae5fa6e01612a1fbb954f0927681f282925a86e86198ccd7b15d515/fonttools-4.61.1-cp311-cp311-win_amd64.whl", hash = "sha256:fe2efccb324948a11dd09d22136fe2ac8a97d6c1347cf0b58a911dcd529f66b7", size = 2331951, upload-time = "2025-12-12T17:30:02.254Z" }, + { url = "https://files.pythonhosted.org/packages/6f/16/7decaa24a1bd3a70c607b2e29f0adc6159f36a7e40eaba59846414765fd4/fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e", size = 2851593, upload-time = "2025-12-12T17:30:04.225Z" }, + { url = "https://files.pythonhosted.org/packages/94/98/3c4cb97c64713a8cf499b3245c3bf9a2b8fd16a3e375feff2aed78f96259/fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2", size = 2400231, upload-time = "2025-12-12T17:30:06.47Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/82dbef0f6342eb01f54bca073ac1498433d6ce71e50c3c3282b655733b31/fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796", size = 4954103, upload-time = "2025-12-12T17:30:08.432Z" }, + { url = "https://files.pythonhosted.org/packages/6c/44/f3aeac0fa98e7ad527f479e161aca6c3a1e47bb6996b053d45226fe37bf2/fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d", size = 5004295, upload-time = "2025-12-12T17:30:10.56Z" }, + { url = "https://files.pythonhosted.org/packages/14/e8/7424ced75473983b964d09f6747fa09f054a6d656f60e9ac9324cf40c743/fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8", size = 4944109, upload-time = "2025-12-12T17:30:12.874Z" }, + { url = "https://files.pythonhosted.org/packages/c8/8b/6391b257fa3d0b553d73e778f953a2f0154292a7a7a085e2374b111e5410/fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0", size = 5093598, upload-time = "2025-12-12T17:30:15.79Z" }, + { url = "https://files.pythonhosted.org/packages/d9/71/fd2ea96cdc512d92da5678a1c98c267ddd4d8c5130b76d0f7a80f9a9fde8/fonttools-4.61.1-cp312-cp312-win32.whl", hash = "sha256:75da8f28eff26defba42c52986de97b22106cb8f26515b7c22443ebc9c2d3261", size = 2269060, upload-time = "2025-12-12T17:30:18.058Z" }, + { url = "https://files.pythonhosted.org/packages/80/3b/a3e81b71aed5a688e89dfe0e2694b26b78c7d7f39a5ffd8a7d75f54a12a8/fonttools-4.61.1-cp312-cp312-win_amd64.whl", hash = "sha256:497c31ce314219888c0e2fce5ad9178ca83fe5230b01a5006726cdf3ac9f24d9", size = 2319078, upload-time = "2025-12-12T17:30:22.862Z" }, + { url = "https://files.pythonhosted.org/packages/4b/cf/00ba28b0990982530addb8dc3e9e6f2fa9cb5c20df2abdda7baa755e8fe1/fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c", size = 2846454, upload-time = "2025-12-12T17:30:24.938Z" }, + { url = "https://files.pythonhosted.org/packages/5a/ca/468c9a8446a2103ae645d14fee3f610567b7042aba85031c1c65e3ef7471/fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e", size = 2398191, upload-time = "2025-12-12T17:30:27.343Z" }, + { url = "https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5", size = 4928410, upload-time = "2025-12-12T17:30:29.771Z" }, + { url = "https://files.pythonhosted.org/packages/b0/8d/6fb3494dfe61a46258cd93d979cf4725ded4eb46c2a4ca35e4490d84daea/fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd", size = 4984460, upload-time = "2025-12-12T17:30:32.073Z" }, + { url = "https://files.pythonhosted.org/packages/f7/f1/a47f1d30b3dc00d75e7af762652d4cbc3dff5c2697a0dbd5203c81afd9c3/fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3", size = 4925800, upload-time = "2025-12-12T17:30:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/a7/01/e6ae64a0981076e8a66906fab01539799546181e32a37a0257b77e4aa88b/fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d", size = 5067859, upload-time = "2025-12-12T17:30:36.593Z" }, + { url = "https://files.pythonhosted.org/packages/73/aa/28e40b8d6809a9b5075350a86779163f074d2b617c15d22343fce81918db/fonttools-4.61.1-cp313-cp313-win32.whl", hash = "sha256:4d7092bb38c53bbc78e9255a59158b150bcdc115a1e3b3ce0b5f267dc35dd63c", size = 2267821, upload-time = "2025-12-12T17:30:38.478Z" }, + { url = "https://files.pythonhosted.org/packages/1a/59/453c06d1d83dc0951b69ef692d6b9f1846680342927df54e9a1ca91c6f90/fonttools-4.61.1-cp313-cp313-win_amd64.whl", hash = "sha256:21e7c8d76f62ab13c9472ccf74515ca5b9a761d1bde3265152a6dc58700d895b", size = 2318169, upload-time = "2025-12-12T17:30:40.951Z" }, + { url = "https://files.pythonhosted.org/packages/32/8f/4e7bf82c0cbb738d3c2206c920ca34ca74ef9dabde779030145d28665104/fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd", size = 2846094, upload-time = "2025-12-12T17:30:43.511Z" }, + { url = "https://files.pythonhosted.org/packages/71/09/d44e45d0a4f3a651f23a1e9d42de43bc643cce2971b19e784cc67d823676/fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e", size = 2396589, upload-time = "2025-12-12T17:30:45.681Z" }, + { url = "https://files.pythonhosted.org/packages/89/18/58c64cafcf8eb677a99ef593121f719e6dcbdb7d1c594ae5a10d4997ca8a/fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c", size = 4877892, upload-time = "2025-12-12T17:30:47.709Z" }, + { url = "https://files.pythonhosted.org/packages/8a/ec/9e6b38c7ba1e09eb51db849d5450f4c05b7e78481f662c3b79dbde6f3d04/fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75", size = 4972884, upload-time = "2025-12-12T17:30:49.656Z" }, + { url = "https://files.pythonhosted.org/packages/5e/87/b5339da8e0256734ba0dbbf5b6cdebb1dd79b01dc8c270989b7bcd465541/fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063", size = 4924405, upload-time = "2025-12-12T17:30:51.735Z" }, + { url = "https://files.pythonhosted.org/packages/0b/47/e3409f1e1e69c073a3a6fd8cb886eb18c0bae0ee13db2c8d5e7f8495e8b7/fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2", size = 5035553, upload-time = "2025-12-12T17:30:54.823Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b6/1f6600161b1073a984294c6c031e1a56ebf95b6164249eecf30012bb2e38/fonttools-4.61.1-cp314-cp314-win32.whl", hash = "sha256:9b666a475a65f4e839d3d10473fad6d47e0a9db14a2f4a224029c5bfde58ad2c", size = 2271915, upload-time = "2025-12-12T17:30:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/52/7b/91e7b01e37cc8eb0e1f770d08305b3655e4f002fc160fb82b3390eabacf5/fonttools-4.61.1-cp314-cp314-win_amd64.whl", hash = "sha256:4f5686e1fe5fce75d82d93c47a438a25bf0d1319d2843a926f741140b2b16e0c", size = 2323487, upload-time = "2025-12-12T17:30:59.804Z" }, + { url = "https://files.pythonhosted.org/packages/39/5c/908ad78e46c61c3e3ed70c3b58ff82ab48437faf84ec84f109592cabbd9f/fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa", size = 2929571, upload-time = "2025-12-12T17:31:02.574Z" }, + { url = "https://files.pythonhosted.org/packages/bd/41/975804132c6dea64cdbfbaa59f3518a21c137a10cccf962805b301ac6ab2/fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91", size = 2435317, upload-time = "2025-12-12T17:31:04.974Z" }, + { url = "https://files.pythonhosted.org/packages/b0/5a/aef2a0a8daf1ebaae4cfd83f84186d4a72ee08fd6a8451289fcd03ffa8a4/fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19", size = 4882124, upload-time = "2025-12-12T17:31:07.456Z" }, + { url = "https://files.pythonhosted.org/packages/80/33/d6db3485b645b81cea538c9d1c9219d5805f0877fda18777add4671c5240/fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba", size = 5100391, upload-time = "2025-12-12T17:31:09.732Z" }, + { url = "https://files.pythonhosted.org/packages/6c/d6/675ba631454043c75fcf76f0ca5463eac8eb0666ea1d7badae5fea001155/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7", size = 4978800, upload-time = "2025-12-12T17:31:11.681Z" }, + { url = "https://files.pythonhosted.org/packages/7f/33/d3ec753d547a8d2bdaedd390d4a814e8d5b45a093d558f025c6b990b554c/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118", size = 5006426, upload-time = "2025-12-12T17:31:13.764Z" }, + { url = "https://files.pythonhosted.org/packages/b4/40/cc11f378b561a67bea850ab50063366a0d1dd3f6d0a30ce0f874b0ad5664/fonttools-4.61.1-cp314-cp314t-win32.whl", hash = "sha256:aed04cabe26f30c1647ef0e8fbb207516fd40fe9472e9439695f5c6998e60ac5", size = 2335377, upload-time = "2025-12-12T17:31:16.49Z" }, + { url = "https://files.pythonhosted.org/packages/e4/ff/c9a2b66b39f8628531ea58b320d66d951267c98c6a38684daa8f50fb02f8/fonttools-4.61.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2180f14c141d2f0f3da43f3a81bc8aa4684860f6b0e6f9e165a4831f24e6a23b", size = 2400613, upload-time = "2025-12-12T17:31:18.769Z" }, + { url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" }, +] + [[package]] name = "fqdn" version = "1.5.1" @@ -2305,6 +2532,114 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/5a/736dd2f4535dbf3bf26523f9158c011389ef88dd06ec2eef67fd744f1c7b/jupytext-1.19.1-py3-none-any.whl", hash = "sha256:d8975035155d034bdfde5c0c37891425314b7ea8d3a6c4b5d18c294348714cd9", size = 170478, upload-time = "2026-01-25T21:35:11.17Z" }, ] +[[package]] +name = "kiwisolver" +version = "1.4.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/5d/8ce64e36d4e3aac5ca96996457dcf33e34e6051492399a3f1fec5657f30b/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b", size = 124159, upload-time = "2025-08-10T21:25:35.472Z" }, + { url = "https://files.pythonhosted.org/packages/96/1e/22f63ec454874378175a5f435d6ea1363dd33fb2af832c6643e4ccea0dc8/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f", size = 66578, upload-time = "2025-08-10T21:25:36.73Z" }, + { url = "https://files.pythonhosted.org/packages/41/4c/1925dcfff47a02d465121967b95151c82d11027d5ec5242771e580e731bd/kiwisolver-1.4.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84fd60810829c27ae375114cd379da1fa65e6918e1da405f356a775d49a62bcf", size = 65312, upload-time = "2025-08-10T21:25:37.658Z" }, + { url = "https://files.pythonhosted.org/packages/d4/42/0f333164e6307a0687d1eb9ad256215aae2f4bd5d28f4653d6cd319a3ba3/kiwisolver-1.4.9-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b78efa4c6e804ecdf727e580dbb9cba85624d2e1c6b5cb059c66290063bd99a9", size = 1628458, upload-time = "2025-08-10T21:25:39.067Z" }, + { url = "https://files.pythonhosted.org/packages/86/b6/2dccb977d651943995a90bfe3495c2ab2ba5cd77093d9f2318a20c9a6f59/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4efec7bcf21671db6a3294ff301d2fc861c31faa3c8740d1a94689234d1b415", size = 1225640, upload-time = "2025-08-10T21:25:40.489Z" }, + { url = "https://files.pythonhosted.org/packages/50/2b/362ebd3eec46c850ccf2bfe3e30f2fc4c008750011f38a850f088c56a1c6/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90f47e70293fc3688b71271100a1a5453aa9944a81d27ff779c108372cf5567b", size = 1244074, upload-time = "2025-08-10T21:25:42.221Z" }, + { url = "https://files.pythonhosted.org/packages/6f/bb/f09a1e66dab8984773d13184a10a29fe67125337649d26bdef547024ed6b/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fdca1def57a2e88ef339de1737a1449d6dbf5fab184c54a1fca01d541317154", size = 1293036, upload-time = "2025-08-10T21:25:43.801Z" }, + { url = "https://files.pythonhosted.org/packages/ea/01/11ecf892f201cafda0f68fa59212edaea93e96c37884b747c181303fccd1/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cf554f21be770f5111a1690d42313e140355e687e05cf82cb23d0a721a64a48", size = 2175310, upload-time = "2025-08-10T21:25:45.045Z" }, + { url = "https://files.pythonhosted.org/packages/7f/5f/bfe11d5b934f500cc004314819ea92427e6e5462706a498c1d4fc052e08f/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fc1795ac5cd0510207482c3d1d3ed781143383b8cfd36f5c645f3897ce066220", size = 2270943, upload-time = "2025-08-10T21:25:46.393Z" }, + { url = "https://files.pythonhosted.org/packages/3d/de/259f786bf71f1e03e73d87e2db1a9a3bcab64d7b4fd780167123161630ad/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ccd09f20ccdbbd341b21a67ab50a119b64a403b09288c27481575105283c1586", size = 2440488, upload-time = "2025-08-10T21:25:48.074Z" }, + { url = "https://files.pythonhosted.org/packages/1b/76/c989c278faf037c4d3421ec07a5c452cd3e09545d6dae7f87c15f54e4edf/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:540c7c72324d864406a009d72f5d6856f49693db95d1fbb46cf86febef873634", size = 2246787, upload-time = "2025-08-10T21:25:49.442Z" }, + { url = "https://files.pythonhosted.org/packages/a2/55/c2898d84ca440852e560ca9f2a0d28e6e931ac0849b896d77231929900e7/kiwisolver-1.4.9-cp310-cp310-win_amd64.whl", hash = "sha256:ede8c6d533bc6601a47ad4046080d36b8fc99f81e6f1c17b0ac3c2dc91ac7611", size = 73730, upload-time = "2025-08-10T21:25:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/e8/09/486d6ac523dd33b80b368247f238125d027964cfacb45c654841e88fb2ae/kiwisolver-1.4.9-cp310-cp310-win_arm64.whl", hash = "sha256:7b4da0d01ac866a57dd61ac258c5607b4cd677f63abaec7b148354d2b2cdd536", size = 65036, upload-time = "2025-08-10T21:25:52.063Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/c80b0d5a9d8a1a65f4f815f2afff9798b12c3b9f31f1d304dd233dd920e2/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16", size = 124167, upload-time = "2025-08-10T21:25:53.403Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c0/27fe1a68a39cf62472a300e2879ffc13c0538546c359b86f149cc19f6ac3/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089", size = 66579, upload-time = "2025-08-10T21:25:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/31/a2/a12a503ac1fd4943c50f9822678e8015a790a13b5490354c68afb8489814/kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543", size = 65309, upload-time = "2025-08-10T21:25:55.76Z" }, + { url = "https://files.pythonhosted.org/packages/66/e1/e533435c0be77c3f64040d68d7a657771194a63c279f55573188161e81ca/kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61", size = 1435596, upload-time = "2025-08-10T21:25:56.861Z" }, + { url = "https://files.pythonhosted.org/packages/67/1e/51b73c7347f9aabdc7215aa79e8b15299097dc2f8e67dee2b095faca9cb0/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1", size = 1246548, upload-time = "2025-08-10T21:25:58.246Z" }, + { url = "https://files.pythonhosted.org/packages/21/aa/72a1c5d1e430294f2d32adb9542719cfb441b5da368d09d268c7757af46c/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872", size = 1263618, upload-time = "2025-08-10T21:25:59.857Z" }, + { url = "https://files.pythonhosted.org/packages/a3/af/db1509a9e79dbf4c260ce0cfa3903ea8945f6240e9e59d1e4deb731b1a40/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26", size = 1317437, upload-time = "2025-08-10T21:26:01.105Z" }, + { url = "https://files.pythonhosted.org/packages/e0/f2/3ea5ee5d52abacdd12013a94130436e19969fa183faa1e7c7fbc89e9a42f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028", size = 2195742, upload-time = "2025-08-10T21:26:02.675Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9b/1efdd3013c2d9a2566aa6a337e9923a00590c516add9a1e89a768a3eb2fc/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771", size = 2290810, upload-time = "2025-08-10T21:26:04.009Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e5/cfdc36109ae4e67361f9bc5b41323648cb24a01b9ade18784657e022e65f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a", size = 2461579, upload-time = "2025-08-10T21:26:05.317Z" }, + { url = "https://files.pythonhosted.org/packages/62/86/b589e5e86c7610842213994cdea5add00960076bef4ae290c5fa68589cac/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464", size = 2268071, upload-time = "2025-08-10T21:26:06.686Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c6/f8df8509fd1eee6c622febe54384a96cfaf4d43bf2ccec7a0cc17e4715c9/kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2", size = 73840, upload-time = "2025-08-10T21:26:07.94Z" }, + { url = "https://files.pythonhosted.org/packages/e2/2d/16e0581daafd147bc11ac53f032a2b45eabac897f42a338d0a13c1e5c436/kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7", size = 65159, upload-time = "2025-08-10T21:26:09.048Z" }, + { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" }, + { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" }, + { url = "https://files.pythonhosted.org/packages/70/90/6d240beb0f24b74371762873e9b7f499f1e02166a2d9c5801f4dbf8fa12e/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04", size = 1474756, upload-time = "2025-08-10T21:26:13.096Z" }, + { url = "https://files.pythonhosted.org/packages/12/42/f36816eaf465220f683fb711efdd1bbf7a7005a2473d0e4ed421389bd26c/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752", size = 1276404, upload-time = "2025-08-10T21:26:14.457Z" }, + { url = "https://files.pythonhosted.org/packages/2e/64/bc2de94800adc830c476dce44e9b40fd0809cddeef1fde9fcf0f73da301f/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77", size = 1294410, upload-time = "2025-08-10T21:26:15.73Z" }, + { url = "https://files.pythonhosted.org/packages/5f/42/2dc82330a70aa8e55b6d395b11018045e58d0bb00834502bf11509f79091/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198", size = 1343631, upload-time = "2025-08-10T21:26:17.045Z" }, + { url = "https://files.pythonhosted.org/packages/22/fd/f4c67a6ed1aab149ec5a8a401c323cee7a1cbe364381bb6c9c0d564e0e20/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d", size = 2224963, upload-time = "2025-08-10T21:26:18.737Z" }, + { url = "https://files.pythonhosted.org/packages/45/aa/76720bd4cb3713314677d9ec94dcc21ced3f1baf4830adde5bb9b2430a5f/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab", size = 2321295, upload-time = "2025-08-10T21:26:20.11Z" }, + { url = "https://files.pythonhosted.org/packages/80/19/d3ec0d9ab711242f56ae0dc2fc5d70e298bb4a1f9dfab44c027668c673a1/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2", size = 2487987, upload-time = "2025-08-10T21:26:21.49Z" }, + { url = "https://files.pythonhosted.org/packages/39/e9/61e4813b2c97e86b6fdbd4dd824bf72d28bcd8d4849b8084a357bc0dd64d/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145", size = 2291817, upload-time = "2025-08-10T21:26:22.812Z" }, + { url = "https://files.pythonhosted.org/packages/a0/41/85d82b0291db7504da3c2defe35c9a8a5c9803a730f297bd823d11d5fb77/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54", size = 73895, upload-time = "2025-08-10T21:26:24.37Z" }, + { url = "https://files.pythonhosted.org/packages/e2/92/5f3068cf15ee5cb624a0c7596e67e2a0bb2adee33f71c379054a491d07da/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60", size = 64992, upload-time = "2025-08-10T21:26:25.732Z" }, + { url = "https://files.pythonhosted.org/packages/31/c1/c2686cda909742ab66c7388e9a1a8521a59eb89f8bcfbee28fc980d07e24/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8", size = 123681, upload-time = "2025-08-10T21:26:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/ca/f0/f44f50c9f5b1a1860261092e3bc91ecdc9acda848a8b8c6abfda4a24dd5c/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2", size = 66464, upload-time = "2025-08-10T21:26:27.733Z" }, + { url = "https://files.pythonhosted.org/packages/2d/7a/9d90a151f558e29c3936b8a47ac770235f436f2120aca41a6d5f3d62ae8d/kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f", size = 64961, upload-time = "2025-08-10T21:26:28.729Z" }, + { url = "https://files.pythonhosted.org/packages/e9/e9/f218a2cb3a9ffbe324ca29a9e399fa2d2866d7f348ec3a88df87fc248fc5/kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098", size = 1474607, upload-time = "2025-08-10T21:26:29.798Z" }, + { url = "https://files.pythonhosted.org/packages/d9/28/aac26d4c882f14de59041636292bc838db8961373825df23b8eeb807e198/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed", size = 1276546, upload-time = "2025-08-10T21:26:31.401Z" }, + { url = "https://files.pythonhosted.org/packages/8b/ad/8bfc1c93d4cc565e5069162f610ba2f48ff39b7de4b5b8d93f69f30c4bed/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525", size = 1294482, upload-time = "2025-08-10T21:26:32.721Z" }, + { url = "https://files.pythonhosted.org/packages/da/f1/6aca55ff798901d8ce403206d00e033191f63d82dd708a186e0ed2067e9c/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78", size = 1343720, upload-time = "2025-08-10T21:26:34.032Z" }, + { url = "https://files.pythonhosted.org/packages/d1/91/eed031876c595c81d90d0f6fc681ece250e14bf6998c3d7c419466b523b7/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b", size = 2224907, upload-time = "2025-08-10T21:26:35.824Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ec/4d1925f2e49617b9cca9c34bfa11adefad49d00db038e692a559454dfb2e/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799", size = 2321334, upload-time = "2025-08-10T21:26:37.534Z" }, + { url = "https://files.pythonhosted.org/packages/43/cb/450cd4499356f68802750c6ddc18647b8ea01ffa28f50d20598e0befe6e9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3", size = 2488313, upload-time = "2025-08-10T21:26:39.191Z" }, + { url = "https://files.pythonhosted.org/packages/71/67/fc76242bd99f885651128a5d4fa6083e5524694b7c88b489b1b55fdc491d/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c", size = 2291970, upload-time = "2025-08-10T21:26:40.828Z" }, + { url = "https://files.pythonhosted.org/packages/75/bd/f1a5d894000941739f2ae1b65a32892349423ad49c2e6d0771d0bad3fae4/kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d", size = 73894, upload-time = "2025-08-10T21:26:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/95/38/dce480814d25b99a391abbddadc78f7c117c6da34be68ca8b02d5848b424/kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2", size = 64995, upload-time = "2025-08-10T21:26:43.889Z" }, + { url = "https://files.pythonhosted.org/packages/e2/37/7d218ce5d92dadc5ebdd9070d903e0c7cf7edfe03f179433ac4d13ce659c/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1", size = 126510, upload-time = "2025-08-10T21:26:44.915Z" }, + { url = "https://files.pythonhosted.org/packages/23/b0/e85a2b48233daef4b648fb657ebbb6f8367696a2d9548a00b4ee0eb67803/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1", size = 67903, upload-time = "2025-08-10T21:26:45.934Z" }, + { url = "https://files.pythonhosted.org/packages/44/98/f2425bc0113ad7de24da6bb4dae1343476e95e1d738be7c04d31a5d037fd/kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11", size = 66402, upload-time = "2025-08-10T21:26:47.101Z" }, + { url = "https://files.pythonhosted.org/packages/98/d8/594657886df9f34c4177cc353cc28ca7e6e5eb562d37ccc233bff43bbe2a/kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c", size = 1582135, upload-time = "2025-08-10T21:26:48.665Z" }, + { url = "https://files.pythonhosted.org/packages/5c/c6/38a115b7170f8b306fc929e166340c24958347308ea3012c2b44e7e295db/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197", size = 1389409, upload-time = "2025-08-10T21:26:50.335Z" }, + { url = "https://files.pythonhosted.org/packages/bf/3b/e04883dace81f24a568bcee6eb3001da4ba05114afa622ec9b6fafdc1f5e/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c", size = 1401763, upload-time = "2025-08-10T21:26:51.867Z" }, + { url = "https://files.pythonhosted.org/packages/9f/80/20ace48e33408947af49d7d15c341eaee69e4e0304aab4b7660e234d6288/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185", size = 1453643, upload-time = "2025-08-10T21:26:53.592Z" }, + { url = "https://files.pythonhosted.org/packages/64/31/6ce4380a4cd1f515bdda976a1e90e547ccd47b67a1546d63884463c92ca9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748", size = 2330818, upload-time = "2025-08-10T21:26:55.051Z" }, + { url = "https://files.pythonhosted.org/packages/fa/e9/3f3fcba3bcc7432c795b82646306e822f3fd74df0ee81f0fa067a1f95668/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64", size = 2419963, upload-time = "2025-08-10T21:26:56.421Z" }, + { url = "https://files.pythonhosted.org/packages/99/43/7320c50e4133575c66e9f7dadead35ab22d7c012a3b09bb35647792b2a6d/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff", size = 2594639, upload-time = "2025-08-10T21:26:57.882Z" }, + { url = "https://files.pythonhosted.org/packages/65/d6/17ae4a270d4a987ef8a385b906d2bdfc9fce502d6dc0d3aea865b47f548c/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07", size = 2391741, upload-time = "2025-08-10T21:26:59.237Z" }, + { url = "https://files.pythonhosted.org/packages/2a/8f/8f6f491d595a9e5912971f3f863d81baddccc8a4d0c3749d6a0dd9ffc9df/kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c", size = 68646, upload-time = "2025-08-10T21:27:00.52Z" }, + { url = "https://files.pythonhosted.org/packages/6b/32/6cc0fbc9c54d06c2969faa9c1d29f5751a2e51809dd55c69055e62d9b426/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386", size = 123806, upload-time = "2025-08-10T21:27:01.537Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dd/2bfb1d4a4823d92e8cbb420fe024b8d2167f72079b3bb941207c42570bdf/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552", size = 66605, upload-time = "2025-08-10T21:27:03.335Z" }, + { url = "https://files.pythonhosted.org/packages/f7/69/00aafdb4e4509c2ca6064646cba9cd4b37933898f426756adb2cb92ebbed/kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3", size = 64925, upload-time = "2025-08-10T21:27:04.339Z" }, + { url = "https://files.pythonhosted.org/packages/43/dc/51acc6791aa14e5cb6d8a2e28cefb0dc2886d8862795449d021334c0df20/kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58", size = 1472414, upload-time = "2025-08-10T21:27:05.437Z" }, + { url = "https://files.pythonhosted.org/packages/3d/bb/93fa64a81db304ac8a246f834d5094fae4b13baf53c839d6bb6e81177129/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4", size = 1281272, upload-time = "2025-08-10T21:27:07.063Z" }, + { url = "https://files.pythonhosted.org/packages/70/e6/6df102916960fb8d05069d4bd92d6d9a8202d5a3e2444494e7cd50f65b7a/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df", size = 1298578, upload-time = "2025-08-10T21:27:08.452Z" }, + { url = "https://files.pythonhosted.org/packages/7c/47/e142aaa612f5343736b087864dbaebc53ea8831453fb47e7521fa8658f30/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6", size = 1345607, upload-time = "2025-08-10T21:27:10.125Z" }, + { url = "https://files.pythonhosted.org/packages/54/89/d641a746194a0f4d1a3670fb900d0dbaa786fb98341056814bc3f058fa52/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5", size = 2230150, upload-time = "2025-08-10T21:27:11.484Z" }, + { url = "https://files.pythonhosted.org/packages/aa/6b/5ee1207198febdf16ac11f78c5ae40861b809cbe0e6d2a8d5b0b3044b199/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf", size = 2325979, upload-time = "2025-08-10T21:27:12.917Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ff/b269eefd90f4ae14dcc74973d5a0f6d28d3b9bb1afd8c0340513afe6b39a/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5", size = 2491456, upload-time = "2025-08-10T21:27:14.353Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d4/10303190bd4d30de547534601e259a4fbf014eed94aae3e5521129215086/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce", size = 2294621, upload-time = "2025-08-10T21:27:15.808Z" }, + { url = "https://files.pythonhosted.org/packages/28/e0/a9a90416fce5c0be25742729c2ea52105d62eda6c4be4d803c2a7be1fa50/kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7", size = 75417, upload-time = "2025-08-10T21:27:17.436Z" }, + { url = "https://files.pythonhosted.org/packages/1f/10/6949958215b7a9a264299a7db195564e87900f709db9245e4ebdd3c70779/kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c", size = 66582, upload-time = "2025-08-10T21:27:18.436Z" }, + { url = "https://files.pythonhosted.org/packages/ec/79/60e53067903d3bc5469b369fe0dfc6b3482e2133e85dae9daa9527535991/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548", size = 126514, upload-time = "2025-08-10T21:27:19.465Z" }, + { url = "https://files.pythonhosted.org/packages/25/d1/4843d3e8d46b072c12a38c97c57fab4608d36e13fe47d47ee96b4d61ba6f/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d", size = 67905, upload-time = "2025-08-10T21:27:20.51Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ae/29ffcbd239aea8b93108de1278271ae764dfc0d803a5693914975f200596/kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c", size = 66399, upload-time = "2025-08-10T21:27:21.496Z" }, + { url = "https://files.pythonhosted.org/packages/a1/ae/d7ba902aa604152c2ceba5d352d7b62106bedbccc8e95c3934d94472bfa3/kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122", size = 1582197, upload-time = "2025-08-10T21:27:22.604Z" }, + { url = "https://files.pythonhosted.org/packages/f2/41/27c70d427eddb8bc7e4f16420a20fefc6f480312122a59a959fdfe0445ad/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64", size = 1390125, upload-time = "2025-08-10T21:27:24.036Z" }, + { url = "https://files.pythonhosted.org/packages/41/42/b3799a12bafc76d962ad69083f8b43b12bf4fe78b097b12e105d75c9b8f1/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134", size = 1402612, upload-time = "2025-08-10T21:27:25.773Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b5/a210ea073ea1cfaca1bb5c55a62307d8252f531beb364e18aa1e0888b5a0/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370", size = 1453990, upload-time = "2025-08-10T21:27:27.089Z" }, + { url = "https://files.pythonhosted.org/packages/5f/ce/a829eb8c033e977d7ea03ed32fb3c1781b4fa0433fbadfff29e39c676f32/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21", size = 2331601, upload-time = "2025-08-10T21:27:29.343Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4b/b5e97eb142eb9cd0072dacfcdcd31b1c66dc7352b0f7c7255d339c0edf00/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a", size = 2422041, upload-time = "2025-08-10T21:27:30.754Z" }, + { url = "https://files.pythonhosted.org/packages/40/be/8eb4cd53e1b85ba4edc3a9321666f12b83113a178845593307a3e7891f44/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f", size = 2594897, upload-time = "2025-08-10T21:27:32.803Z" }, + { url = "https://files.pythonhosted.org/packages/99/dd/841e9a66c4715477ea0abc78da039832fbb09dac5c35c58dc4c41a407b8a/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369", size = 2391835, upload-time = "2025-08-10T21:27:34.23Z" }, + { url = "https://files.pythonhosted.org/packages/0c/28/4b2e5c47a0da96896fdfdb006340ade064afa1e63675d01ea5ac222b6d52/kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891", size = 79988, upload-time = "2025-08-10T21:27:35.587Z" }, + { url = "https://files.pythonhosted.org/packages/80/be/3578e8afd18c88cdf9cb4cffde75a96d2be38c5a903f1ed0ceec061bd09e/kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32", size = 70260, upload-time = "2025-08-10T21:27:36.606Z" }, + { url = "https://files.pythonhosted.org/packages/a2/63/fde392691690f55b38d5dd7b3710f5353bf7a8e52de93a22968801ab8978/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4d1d9e582ad4d63062d34077a9a1e9f3c34088a2ec5135b1f7190c07cf366527", size = 60183, upload-time = "2025-08-10T21:27:37.669Z" }, + { url = "https://files.pythonhosted.org/packages/27/b1/6aad34edfdb7cced27f371866f211332bba215bfd918ad3322a58f480d8b/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:deed0c7258ceb4c44ad5ec7d9918f9f14fd05b2be86378d86cf50e63d1e7b771", size = 58675, upload-time = "2025-08-10T21:27:39.031Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1a/23d855a702bb35a76faed5ae2ba3de57d323f48b1f6b17ee2176c4849463/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a590506f303f512dff6b7f75fd2fd18e16943efee932008fe7140e5fa91d80e", size = 80277, upload-time = "2025-08-10T21:27:40.129Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5b/5239e3c2b8fb5afa1e8508f721bb77325f740ab6994d963e61b2b7abcc1e/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e09c2279a4d01f099f52d5c4b3d9e208e91edcbd1a175c9662a8b16e000fece9", size = 77994, upload-time = "2025-08-10T21:27:41.181Z" }, + { url = "https://files.pythonhosted.org/packages/f9/1c/5d4d468fb16f8410e596ed0eac02d2c68752aa7dc92997fe9d60a7147665/kiwisolver-1.4.9-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e7cdf45d594ee04d5be1b24dd9d49f3d1590959b2271fb30b5ca2b262c00fb", size = 73744, upload-time = "2025-08-10T21:27:42.254Z" }, + { url = "https://files.pythonhosted.org/packages/a3/0f/36d89194b5a32c054ce93e586d4049b6c2c22887b0eb229c61c68afd3078/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5", size = 60104, upload-time = "2025-08-10T21:27:43.287Z" }, + { url = "https://files.pythonhosted.org/packages/52/ba/4ed75f59e4658fd21fe7dde1fee0ac397c678ec3befba3fe6482d987af87/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa", size = 58592, upload-time = "2025-08-10T21:27:44.314Z" }, + { url = "https://files.pythonhosted.org/packages/33/01/a8ea7c5ea32a9b45ceeaee051a04c8ed4320f5add3c51bfa20879b765b70/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2", size = 80281, upload-time = "2025-08-10T21:27:45.369Z" }, + { url = "https://files.pythonhosted.org/packages/da/e3/dbd2ecdce306f1d07a1aaf324817ee993aab7aee9db47ceac757deabafbe/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f", size = 78009, upload-time = "2025-08-10T21:27:46.376Z" }, + { url = "https://files.pythonhosted.org/packages/da/e9/0d4add7873a73e462aeb45c036a2dead2562b825aa46ba326727b3f31016/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1", size = 73929, upload-time = "2025-08-10T21:27:48.236Z" }, +] + [[package]] name = "lark" version = "1.3.1" @@ -2578,6 +2913,81 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] +[[package]] +name = "matplotlib" +version = "3.10.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "contourpy", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "cycler" }, + { name = "fonttools" }, + { name = "kiwisolver" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "pyparsing" }, + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/76/d3c6e3a13fe484ebe7718d14e269c9569c4eb0020a968a327acb3b9a8fe6/matplotlib-3.10.8.tar.gz", hash = "sha256:2299372c19d56bcd35cf05a2738308758d32b9eaed2371898d8f5bd33f084aa3", size = 34806269, upload-time = "2025-12-10T22:56:51.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/be/a30bd917018ad220c400169fba298f2bb7003c8ccbc0c3e24ae2aacad1e8/matplotlib-3.10.8-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:00270d217d6b20d14b584c521f810d60c5c78406dc289859776550df837dcda7", size = 8239828, upload-time = "2025-12-10T22:55:02.313Z" }, + { url = "https://files.pythonhosted.org/packages/58/27/ca01e043c4841078e82cf6e80a6993dfecd315c3d79f5f3153afbb8e1ec6/matplotlib-3.10.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b3c1cc42aa184b3f738cfa18c1c1d72fd496d85467a6cf7b807936d39aa656", size = 8128050, upload-time = "2025-12-10T22:55:04.997Z" }, + { url = "https://files.pythonhosted.org/packages/cb/aa/7ab67f2b729ae6a91bcf9dcac0affb95fb8c56f7fd2b2af894ae0b0cf6fa/matplotlib-3.10.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ee40c27c795bda6a5292e9cff9890189d32f7e3a0bf04e0e3c9430c4a00c37df", size = 8700452, upload-time = "2025-12-10T22:55:07.47Z" }, + { url = "https://files.pythonhosted.org/packages/73/ae/2d5817b0acee3c49b7e7ccfbf5b273f284957cc8e270adf36375db353190/matplotlib-3.10.8-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a48f2b74020919552ea25d222d5cc6af9ca3f4eb43a93e14d068457f545c2a17", size = 9534928, upload-time = "2025-12-10T22:55:10.566Z" }, + { url = "https://files.pythonhosted.org/packages/c9/5b/8e66653e9f7c39cb2e5cab25fce4810daffa2bff02cbf5f3077cea9e942c/matplotlib-3.10.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f254d118d14a7f99d616271d6c3c27922c092dac11112670b157798b89bf4933", size = 9586377, upload-time = "2025-12-10T22:55:12.362Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e2/fd0bbadf837f81edb0d208ba8f8cb552874c3b16e27cb91a31977d90875d/matplotlib-3.10.8-cp310-cp310-win_amd64.whl", hash = "sha256:f9b587c9c7274c1613a30afabf65a272114cd6cdbe67b3406f818c79d7ab2e2a", size = 8128127, upload-time = "2025-12-10T22:55:14.436Z" }, + { url = "https://files.pythonhosted.org/packages/f8/86/de7e3a1cdcfc941483af70609edc06b83e7c8a0e0dc9ac325200a3f4d220/matplotlib-3.10.8-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6be43b667360fef5c754dda5d25a32e6307a03c204f3c0fc5468b78fa87b4160", size = 8251215, upload-time = "2025-12-10T22:55:16.175Z" }, + { url = "https://files.pythonhosted.org/packages/fd/14/baad3222f424b19ce6ad243c71de1ad9ec6b2e4eb1e458a48fdc6d120401/matplotlib-3.10.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2b336e2d91a3d7006864e0990c83b216fcdca64b5a6484912902cef87313d78", size = 8139625, upload-time = "2025-12-10T22:55:17.712Z" }, + { url = "https://files.pythonhosted.org/packages/8f/a0/7024215e95d456de5883e6732e708d8187d9753a21d32f8ddb3befc0c445/matplotlib-3.10.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:efb30e3baaea72ce5928e32bab719ab4770099079d66726a62b11b1ef7273be4", size = 8712614, upload-time = "2025-12-10T22:55:20.8Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f4/b8347351da9a5b3f41e26cf547252d861f685c6867d179a7c9d60ad50189/matplotlib-3.10.8-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d56a1efd5bfd61486c8bc968fa18734464556f0fb8e51690f4ac25d85cbbbbc2", size = 9540997, upload-time = "2025-12-10T22:55:23.258Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c0/c7b914e297efe0bc36917bf216b2acb91044b91e930e878ae12981e461e5/matplotlib-3.10.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:238b7ce5717600615c895050239ec955d91f321c209dd110db988500558e70d6", size = 9596825, upload-time = "2025-12-10T22:55:25.217Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d3/a4bbc01c237ab710a1f22b4da72f4ff6d77eb4c7735ea9811a94ae239067/matplotlib-3.10.8-cp311-cp311-win_amd64.whl", hash = "sha256:18821ace09c763ec93aef5eeff087ee493a24051936d7b9ebcad9662f66501f9", size = 8135090, upload-time = "2025-12-10T22:55:27.162Z" }, + { url = "https://files.pythonhosted.org/packages/89/dd/a0b6588f102beab33ca6f5218b31725216577b2a24172f327eaf6417d5c9/matplotlib-3.10.8-cp311-cp311-win_arm64.whl", hash = "sha256:bab485bcf8b1c7d2060b4fcb6fc368a9e6f4cd754c9c2fea281f4be21df394a2", size = 8012377, upload-time = "2025-12-10T22:55:29.185Z" }, + { url = "https://files.pythonhosted.org/packages/9e/67/f997cdcbb514012eb0d10cd2b4b332667997fb5ebe26b8d41d04962fa0e6/matplotlib-3.10.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:64fcc24778ca0404ce0cb7b6b77ae1f4c7231cdd60e6778f999ee05cbd581b9a", size = 8260453, upload-time = "2025-12-10T22:55:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/7e/65/07d5f5c7f7c994f12c768708bd2e17a4f01a2b0f44a1c9eccad872433e2e/matplotlib-3.10.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9a5ca4ac220a0cdd1ba6bcba3608547117d30468fefce49bb26f55c1a3d5c58", size = 8148321, upload-time = "2025-12-10T22:55:33.265Z" }, + { url = "https://files.pythonhosted.org/packages/3e/f3/c5195b1ae57ef85339fd7285dfb603b22c8b4e79114bae5f4f0fcf688677/matplotlib-3.10.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3ab4aabc72de4ff77b3ec33a6d78a68227bf1123465887f9905ba79184a1cc04", size = 8716944, upload-time = "2025-12-10T22:55:34.922Z" }, + { url = "https://files.pythonhosted.org/packages/00/f9/7638f5cc82ec8a7aa005de48622eecc3ed7c9854b96ba15bd76b7fd27574/matplotlib-3.10.8-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24d50994d8c5816ddc35411e50a86ab05f575e2530c02752e02538122613371f", size = 9550099, upload-time = "2025-12-10T22:55:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/57/61/78cd5920d35b29fd2a0fe894de8adf672ff52939d2e9b43cb83cd5ce1bc7/matplotlib-3.10.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:99eefd13c0dc3b3c1b4d561c1169e65fe47aab7b8158754d7c084088e2329466", size = 9613040, upload-time = "2025-12-10T22:55:38.715Z" }, + { url = "https://files.pythonhosted.org/packages/30/4e/c10f171b6e2f44d9e3a2b96efa38b1677439d79c99357600a62cc1e9594e/matplotlib-3.10.8-cp312-cp312-win_amd64.whl", hash = "sha256:dd80ecb295460a5d9d260df63c43f4afbdd832d725a531f008dad1664f458adf", size = 8142717, upload-time = "2025-12-10T22:55:41.103Z" }, + { url = "https://files.pythonhosted.org/packages/f1/76/934db220026b5fef85f45d51a738b91dea7d70207581063cd9bd8fafcf74/matplotlib-3.10.8-cp312-cp312-win_arm64.whl", hash = "sha256:3c624e43ed56313651bc18a47f838b60d7b8032ed348911c54906b130b20071b", size = 8012751, upload-time = "2025-12-10T22:55:42.684Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b9/15fd5541ef4f5b9a17eefd379356cf12175fe577424e7b1d80676516031a/matplotlib-3.10.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3f2e409836d7f5ac2f1c013110a4d50b9f7edc26328c108915f9075d7d7a91b6", size = 8261076, upload-time = "2025-12-10T22:55:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a0/2ba3473c1b66b9c74dc7107c67e9008cb1782edbe896d4c899d39ae9cf78/matplotlib-3.10.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56271f3dac49a88d7fca5060f004d9d22b865f743a12a23b1e937a0be4818ee1", size = 8148794, upload-time = "2025-12-10T22:55:46.252Z" }, + { url = "https://files.pythonhosted.org/packages/75/97/a471f1c3eb1fd6f6c24a31a5858f443891d5127e63a7788678d14e249aea/matplotlib-3.10.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a0a7f52498f72f13d4a25ea70f35f4cb60642b466cbb0a9be951b5bc3f45a486", size = 8718474, upload-time = "2025-12-10T22:55:47.864Z" }, + { url = "https://files.pythonhosted.org/packages/01/be/cd478f4b66f48256f42927d0acbcd63a26a893136456cd079c0cc24fbabf/matplotlib-3.10.8-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:646d95230efb9ca614a7a594d4fcacde0ac61d25e37dd51710b36477594963ce", size = 9549637, upload-time = "2025-12-10T22:55:50.048Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7c/8dc289776eae5109e268c4fb92baf870678dc048a25d4ac903683b86d5bf/matplotlib-3.10.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f89c151aab2e2e23cb3fe0acad1e8b82841fd265379c4cecd0f3fcb34c15e0f6", size = 9613678, upload-time = "2025-12-10T22:55:52.21Z" }, + { url = "https://files.pythonhosted.org/packages/64/40/37612487cc8a437d4dd261b32ca21fe2d79510fe74af74e1f42becb1bdb8/matplotlib-3.10.8-cp313-cp313-win_amd64.whl", hash = "sha256:e8ea3e2d4066083e264e75c829078f9e149fa119d27e19acd503de65e0b13149", size = 8142686, upload-time = "2025-12-10T22:55:54.253Z" }, + { url = "https://files.pythonhosted.org/packages/66/52/8d8a8730e968185514680c2a6625943f70269509c3dcfc0dcf7d75928cb8/matplotlib-3.10.8-cp313-cp313-win_arm64.whl", hash = "sha256:c108a1d6fa78a50646029cb6d49808ff0fc1330fda87fa6f6250c6b5369b6645", size = 8012917, upload-time = "2025-12-10T22:55:56.268Z" }, + { url = "https://files.pythonhosted.org/packages/b5/27/51fe26e1062f298af5ef66343d8ef460e090a27fea73036c76c35821df04/matplotlib-3.10.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ad3d9833a64cf48cc4300f2b406c3d0f4f4724a91c0bd5640678a6ba7c102077", size = 8305679, upload-time = "2025-12-10T22:55:57.856Z" }, + { url = "https://files.pythonhosted.org/packages/2c/1e/4de865bc591ac8e3062e835f42dd7fe7a93168d519557837f0e37513f629/matplotlib-3.10.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:eb3823f11823deade26ce3b9f40dcb4a213da7a670013929f31d5f5ed1055b22", size = 8198336, upload-time = "2025-12-10T22:55:59.371Z" }, + { url = "https://files.pythonhosted.org/packages/c6/cb/2f7b6e75fb4dce87ef91f60cac4f6e34f4c145ab036a22318ec837971300/matplotlib-3.10.8-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d9050fee89a89ed57b4fb2c1bfac9a3d0c57a0d55aed95949eedbc42070fea39", size = 8731653, upload-time = "2025-12-10T22:56:01.032Z" }, + { url = "https://files.pythonhosted.org/packages/46/b3/bd9c57d6ba670a37ab31fb87ec3e8691b947134b201f881665b28cc039ff/matplotlib-3.10.8-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b44d07310e404ba95f8c25aa5536f154c0a8ec473303535949e52eb71d0a1565", size = 9561356, upload-time = "2025-12-10T22:56:02.95Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3d/8b94a481456dfc9dfe6e39e93b5ab376e50998cddfd23f4ae3b431708f16/matplotlib-3.10.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0a33deb84c15ede243aead39f77e990469fff93ad1521163305095b77b72ce4a", size = 9614000, upload-time = "2025-12-10T22:56:05.411Z" }, + { url = "https://files.pythonhosted.org/packages/bd/cd/bc06149fe5585ba800b189a6a654a75f1f127e8aab02fd2be10df7fa500c/matplotlib-3.10.8-cp313-cp313t-win_amd64.whl", hash = "sha256:3a48a78d2786784cc2413e57397981fb45c79e968d99656706018d6e62e57958", size = 8220043, upload-time = "2025-12-10T22:56:07.551Z" }, + { url = "https://files.pythonhosted.org/packages/e3/de/b22cf255abec916562cc04eef457c13e58a1990048de0c0c3604d082355e/matplotlib-3.10.8-cp313-cp313t-win_arm64.whl", hash = "sha256:15d30132718972c2c074cd14638c7f4592bd98719e2308bccea40e0538bc0cb5", size = 8062075, upload-time = "2025-12-10T22:56:09.178Z" }, + { url = "https://files.pythonhosted.org/packages/3c/43/9c0ff7a2f11615e516c3b058e1e6e8f9614ddeca53faca06da267c48345d/matplotlib-3.10.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b53285e65d4fa4c86399979e956235deb900be5baa7fc1218ea67fbfaeaadd6f", size = 8262481, upload-time = "2025-12-10T22:56:10.885Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ca/e8ae28649fcdf039fda5ef554b40a95f50592a3c47e6f7270c9561c12b07/matplotlib-3.10.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:32f8dce744be5569bebe789e46727946041199030db8aeb2954d26013a0eb26b", size = 8151473, upload-time = "2025-12-10T22:56:12.377Z" }, + { url = "https://files.pythonhosted.org/packages/f1/6f/009d129ae70b75e88cbe7e503a12a4c0670e08ed748a902c2568909e9eb5/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cf267add95b1c88300d96ca837833d4112756045364f5c734a2276038dae27d", size = 9553896, upload-time = "2025-12-10T22:56:14.432Z" }, + { url = "https://files.pythonhosted.org/packages/f5/26/4221a741eb97967bc1fd5e4c52b9aa5a91b2f4ec05b59f6def4d820f9df9/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2cf5bd12cecf46908f286d7838b2abc6c91cda506c0445b8223a7c19a00df008", size = 9824193, upload-time = "2025-12-10T22:56:16.29Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/3abf75f38605772cf48a9daf5821cd4f563472f38b4b828c6fba6fa6d06e/matplotlib-3.10.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:41703cc95688f2516b480f7f339d8851a6035f18e100ee6a32bc0b8536a12a9c", size = 9615444, upload-time = "2025-12-10T22:56:18.155Z" }, + { url = "https://files.pythonhosted.org/packages/93/a5/de89ac80f10b8dc615807ee1133cd99ac74082581196d4d9590bea10690d/matplotlib-3.10.8-cp314-cp314-win_amd64.whl", hash = "sha256:83d282364ea9f3e52363da262ce32a09dfe241e4080dcedda3c0db059d3c1f11", size = 8272719, upload-time = "2025-12-10T22:56:20.366Z" }, + { url = "https://files.pythonhosted.org/packages/69/ce/b006495c19ccc0a137b48083168a37bd056392dee02f87dba0472f2797fe/matplotlib-3.10.8-cp314-cp314-win_arm64.whl", hash = "sha256:2c1998e92cd5999e295a731bcb2911c75f597d937341f3030cc24ef2733d78a8", size = 8144205, upload-time = "2025-12-10T22:56:22.239Z" }, + { url = "https://files.pythonhosted.org/packages/68/d9/b31116a3a855bd313c6fcdb7226926d59b041f26061c6c5b1be66a08c826/matplotlib-3.10.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b5a2b97dbdc7d4f353ebf343744f1d1f1cca8aa8bfddb4262fcf4306c3761d50", size = 8305785, upload-time = "2025-12-10T22:56:24.218Z" }, + { url = "https://files.pythonhosted.org/packages/1e/90/6effe8103f0272685767ba5f094f453784057072f49b393e3ea178fe70a5/matplotlib-3.10.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3f5c3e4da343bba819f0234186b9004faba952cc420fbc522dc4e103c1985908", size = 8198361, upload-time = "2025-12-10T22:56:26.787Z" }, + { url = "https://files.pythonhosted.org/packages/d7/65/a73188711bea603615fc0baecca1061429ac16940e2385433cc778a9d8e7/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f62550b9a30afde8c1c3ae450e5eb547d579dd69b25c2fc7a1c67f934c1717a", size = 9561357, upload-time = "2025-12-10T22:56:28.953Z" }, + { url = "https://files.pythonhosted.org/packages/f4/3d/b5c5d5d5be8ce63292567f0e2c43dde9953d3ed86ac2de0a72e93c8f07a1/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:495672de149445ec1b772ff2c9ede9b769e3cb4f0d0aa7fa730d7f59e2d4e1c1", size = 9823610, upload-time = "2025-12-10T22:56:31.455Z" }, + { url = "https://files.pythonhosted.org/packages/4d/4b/e7beb6bbd49f6bae727a12b270a2654d13c397576d25bd6786e47033300f/matplotlib-3.10.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:595ba4d8fe983b88f0eec8c26a241e16d6376fe1979086232f481f8f3f67494c", size = 9614011, upload-time = "2025-12-10T22:56:33.85Z" }, + { url = "https://files.pythonhosted.org/packages/7c/e6/76f2813d31f032e65f6f797e3f2f6e4aab95b65015924b1c51370395c28a/matplotlib-3.10.8-cp314-cp314t-win_amd64.whl", hash = "sha256:25d380fe8b1dc32cf8f0b1b448470a77afb195438bafdf1d858bfb876f3edf7b", size = 8362801, upload-time = "2025-12-10T22:56:36.107Z" }, + { url = "https://files.pythonhosted.org/packages/5d/49/d651878698a0b67f23aa28e17f45a6d6dd3d3f933fa29087fa4ce5947b5a/matplotlib-3.10.8-cp314-cp314t-win_arm64.whl", hash = "sha256:113bb52413ea508ce954a02c10ffd0d565f9c3bc7f2eddc27dfe1731e71c7b5f", size = 8192560, upload-time = "2025-12-10T22:56:38.008Z" }, + { url = "https://files.pythonhosted.org/packages/f5/43/31d59500bb950b0d188e149a2e552040528c13d6e3d6e84d0cccac593dcd/matplotlib-3.10.8-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f97aeb209c3d2511443f8797e3e5a569aebb040d4f8bc79aa3ee78a8fb9e3dd8", size = 8237252, upload-time = "2025-12-10T22:56:39.529Z" }, + { url = "https://files.pythonhosted.org/packages/0c/2c/615c09984f3c5f907f51c886538ad785cf72e0e11a3225de2c0f9442aecc/matplotlib-3.10.8-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fb061f596dad3a0f52b60dc6a5dec4a0c300dec41e058a7efe09256188d170b7", size = 8124693, upload-time = "2025-12-10T22:56:41.758Z" }, + { url = "https://files.pythonhosted.org/packages/91/e1/2757277a1c56041e1fc104b51a0f7b9a4afc8eb737865d63cababe30bc61/matplotlib-3.10.8-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:12d90df9183093fcd479f4172ac26b322b1248b15729cb57f42f71f24c7e37a3", size = 8702205, upload-time = "2025-12-10T22:56:43.415Z" }, + { url = "https://files.pythonhosted.org/packages/04/30/3afaa31c757f34b7725ab9d2ba8b48b5e89c2019c003e7d0ead143aabc5a/matplotlib-3.10.8-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6da7c2ce169267d0d066adcf63758f0604aa6c3eebf67458930f9d9b79ad1db1", size = 8249198, upload-time = "2025-12-10T22:56:45.584Z" }, + { url = "https://files.pythonhosted.org/packages/48/2f/6334aec331f57485a642a7c8be03cb286f29111ae71c46c38b363230063c/matplotlib-3.10.8-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9153c3292705be9f9c64498a8872118540c3f4123d1a1c840172edf262c8be4a", size = 8136817, upload-time = "2025-12-10T22:56:47.339Z" }, + { url = "https://files.pythonhosted.org/packages/73/e4/6d6f14b2a759c622f191b2d67e9075a3f56aaccb3be4bb9bb6890030d0a0/matplotlib-3.10.8-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ae029229a57cd1e8fe542485f27e7ca7b23aa9e8944ddb4985d0bc444f1eca2", size = 8713867, upload-time = "2025-12-10T22:56:48.954Z" }, +] + [[package]] name = "matplotlib-inline" version = "0.2.1" From 1506ab56599dbcab999246fb54c6f66118391542 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Thu, 5 Feb 2026 17:18:29 -0700 Subject: [PATCH 21/64] Simplify to ImageInferenceParams. Persist images in create mode to disk --- .../src/data_designer/config/__init__.py | 6 +- .../data_designer/config/column_configs.py | 12 +- .../src/data_designer/config/models.py | 90 ++++----- .../config/utils/visualization.py | 74 ++++++- .../column_generators/generators/image.py | 54 ++++- .../dataset_builders/column_wise_builder.py | 37 +++- .../src/data_designer/engine/models/facade.py | 185 +++++++++++++----- .../data_designer/engine/storage/__init__.py | 6 + .../engine/storage/image_storage.py | 166 ++++++++++++++++ .../integrations/huggingface/client.py | 32 +++ 10 files changed, 539 insertions(+), 123 deletions(-) create mode 100644 packages/data-designer-engine/src/data_designer/engine/storage/__init__.py create mode 100644 packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py diff --git a/packages/data-designer-config/src/data_designer/config/__init__.py b/packages/data-designer-config/src/data_designer/config/__init__.py index 1b104282..5686b506 100644 --- a/packages/data-designer-config/src/data_designer/config/__init__.py +++ b/packages/data-designer-config/src/data_designer/config/__init__.py @@ -38,13 +38,12 @@ ToolConfig, ) from data_designer.config.models import ( # noqa: F401 - ChatCompletionImageInferenceParams, ChatCompletionInferenceParams, - DiffusionImageInferenceParams, EmbeddingInferenceParams, GenerationType, ImageContext, ImageFormat, + ImageInferenceParams, ManualDistribution, ManualDistributionParams, Modality, @@ -149,13 +148,12 @@ "MCPProvider": (_MOD_MCP, "MCPProvider"), "ToolConfig": (_MOD_MCP, "ToolConfig"), # models - "ChatCompletionImageInferenceParams": (_MOD_MODELS, "ChatCompletionImageInferenceParams"), "ChatCompletionInferenceParams": (_MOD_MODELS, "ChatCompletionInferenceParams"), - "DiffusionImageInferenceParams": (_MOD_MODELS, "DiffusionImageInferenceParams"), "EmbeddingInferenceParams": (_MOD_MODELS, "EmbeddingInferenceParams"), "GenerationType": (_MOD_MODELS, "GenerationType"), "ImageContext": (_MOD_MODELS, "ImageContext"), "ImageFormat": (_MOD_MODELS, "ImageFormat"), + "ImageInferenceParams": (_MOD_MODELS, "ImageInferenceParams"), "ManualDistribution": (_MOD_MODELS, "ManualDistribution"), "ManualDistributionParams": (_MOD_MODELS, "ManualDistributionParams"), "Modality": (_MOD_MODELS, "Modality"), diff --git a/packages/data-designer-config/src/data_designer/config/column_configs.py b/packages/data-designer-config/src/data_designer/config/column_configs.py index 5a2c6be8..facbc4cf 100644 --- a/packages/data-designer-config/src/data_designer/config/column_configs.py +++ b/packages/data-designer-config/src/data_designer/config/column_configs.py @@ -489,13 +489,15 @@ class ImageGenerationColumnConfig(SingleColumnConfig): """Configuration for image generation columns. Image columns generate images using either autoregressive or diffusion models. - The API used is automatically determined by the model's inference parameters: + The API used is automatically determined based on the model name: - - **Autoregressive models** (ChatCompletionImageInferenceParams): - GPT-5, gpt-image-*, Gemini image generation models via chat completions API + - **Diffusion models** (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API + - **All other models** β†’ chat/completions API (default) - - **Diffusion models** (DiffusionImageInferenceParams): - DALL-E, Imagen, Stable Diffusion via image_generation API + Image storage behavior: + - **Create mode**: Images saved to disk with UUID filenames in `images/` folder, + dataframe stores relative paths (e.g., "images/abc123.png") + - **Preview mode**: Images stored as base64 directly in dataframe Attributes: column_type: Discriminator field, always "image-generation" for this configuration type. diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 7a6e4955..dc3533e3 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -242,8 +242,7 @@ def sample(self) -> float: class GenerationType(str, Enum): CHAT_COMPLETION = "chat-completion" EMBEDDING = "embedding" - CHAT_COMPLETION_IMAGE = "chat-completion-image" - DIFFUSION_IMAGE = "diffusion-image" + IMAGE = "image" class BaseInferenceParams(ConfigBase, ABC): @@ -423,19 +422,52 @@ def generate_kwargs(self) -> dict[str, float | int]: return result -class ChatCompletionImageInferenceParams(BaseInferenceParams): - """Configuration for image generation using autoregressive models via chat completions API. +class ImageInferenceParams(BaseInferenceParams): + """Configuration for image generation models. + + Works for all image generation models. The API type is automatically detected + based on the model name: + - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) use image_generation API + - All other models use chat/completions API (default) + + Image storage behavior: + - Create mode: Images saved to disk with UUID filenames, paths stored in dataframe + - Preview mode: Images stored as base64 directly in dataframe - Uses the standard chat completions API for autoregressive multimodal models - that can generate images (GPT-5, gpt-image-*, Gemini image generation, etc.). + Common parameters like quality and size are provided as optional fields. + For model-specific parameters, use the `extra_body` field inherited from + BaseInferenceParams. Attributes: - generation_type: Type of generation, always "chat-completion-image" for this class. - quality: Optional quality setting for image generation (e.g., "standard", "hd"). - size: Optional size specification for generated images (e.g., "1024x1024", "1792x1024"). + generation_type: Type of generation, always "image" for this class. + quality: Image quality setting (e.g., "standard", "hd"). Optional and model-specific. + size: Image size specification (e.g., "1024x1024", "1792x1024"). Optional and model-specific. + + Example: + ```python + # Standard usage with common params + dd.ImageInferenceParams( + quality="hd", + size="1024x1024" + ) + + # With model-specific params via extra_body + dd.ImageInferenceParams( + quality="hd", + size="1024x1024", + extra_body={ + "generationConfig": { + "imageConfig": { + "aspectRatio": "1:1", + "negativePrompt": "blurry, low quality" + } + } + } + ) + ``` """ - generation_type: Literal[GenerationType.CHAT_COMPLETION_IMAGE] = GenerationType.CHAT_COMPLETION_IMAGE + generation_type: Literal[GenerationType.IMAGE] = GenerationType.IMAGE quality: str | None = None size: str | None = None @@ -449,38 +481,8 @@ def generate_kwargs(self) -> dict[str, Any]: return result -class DiffusionImageInferenceParams(BaseInferenceParams): - """Configuration for image generation using diffusion models via image_generation API. - - Uses the legacy image_generation API for diffusion models like DALL-E, Imagen, - and Stable Diffusion. - - Attributes: - generation_type: Type of generation, always "diffusion-image" for this class. - quality: Quality setting for image generation (e.g., "standard", "hd"). - size: Size specification for generated images (e.g., "1024x1024", "1792x1024"). - output_format: Format of the output ("url" or "base64"). Default: "base64". - """ - - generation_type: Literal[GenerationType.DIFFUSION_IMAGE] = GenerationType.DIFFUSION_IMAGE - quality: str - size: str - output_format: ModalityDataType = ModalityDataType.BASE64 - - @property - def generate_kwargs(self) -> dict[str, Any]: - result = super().generate_kwargs - result["size"] = self.size - result["quality"] = self.quality - result["response_format"] = "b64_json" if self.output_format == ModalityDataType.BASE64 else "url" - return result - - InferenceParamsT: TypeAlias = Annotated[ - ChatCompletionInferenceParams - | EmbeddingInferenceParams - | ChatCompletionImageInferenceParams - | DiffusionImageInferenceParams, + ChatCompletionInferenceParams | EmbeddingInferenceParams | ImageInferenceParams, Field(discriminator="generation_type"), ] @@ -517,10 +519,8 @@ def _convert_inference_parameters(cls, value: Any) -> Any: gen_type = value.get("generation_type") # Infer type from generation_type or field presence - if gen_type == "chat-completion-image": - return ChatCompletionImageInferenceParams(**value) - elif gen_type == "diffusion-image": - return DiffusionImageInferenceParams(**value) + if gen_type == "image": + return ImageInferenceParams(**value) elif gen_type == "embedding" or "encoding_format" in value or "dimensions" in value: return EmbeddingInferenceParams(**value) else: diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index b74174f5..56d28fd3 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -70,13 +70,59 @@ def _is_image_url(value: str) -> bool: ) -def _display_image_if_in_notebook(image_data: str, col_name: str, max_width: int = 512) -> bool: +def _is_image_path(value: str) -> bool: + """Check if a string is an image file path.""" + if not isinstance(value, str): + return False + # Check if it looks like a file path with image extension + return any(value.lower().endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"]) + + +def _load_image_path_to_base64(image_path: str, base_path: str | None = None) -> str | None: + """Load an image from a file path and return as base64. + + Args: + image_path: Relative or absolute path to the image file. + base_path: Optional base path to resolve relative paths from. + + Returns: + Base64-encoded image data or None if loading fails. + """ + try: + from pathlib import Path + + path = Path(image_path) + + # If path is not absolute, try to resolve it + if not path.is_absolute(): + if base_path: + path = Path(base_path) / path + # If still not found, try current working directory + if not path.exists(): + path = Path.cwd() / image_path + + # Check if file exists + if not path.exists(): + return None + + # Read image file and convert to base64 + with open(path, "rb") as f: + image_bytes = f.read() + return base64.b64encode(image_bytes).decode() + except Exception: + return None + + +def _display_image_if_in_notebook( + image_data: str, col_name: str, max_width: int = 512, base_path: str | None = None +) -> bool: """Display image with caption in Jupyter notebook if available. Args: - image_data: Base64-encoded image data or data URI. + image_data: Base64-encoded image data, data URI, or file path. col_name: Name of the column (used for caption). max_width: Maximum width for the displayed image in pixels. + base_path: Optional base path to resolve relative image paths. Returns: True if image was displayed, False otherwise. @@ -88,8 +134,17 @@ def _display_image_if_in_notebook(image_data: str, col_name: str, max_width: int get_ipython() # This will raise NameError if not in IPython/Jupyter + # Check if it's a file path and load it + if _is_image_path(image_data) and not image_data.startswith("data:image/"): + loaded_base64 = _load_image_path_to_base64(image_data, base_path) + if loaded_base64 is None: + console.print( + f"[yellow]⚠️ Could not load image from path '{image_data}' for column '{col_name}'[/yellow]" + ) + return False + base64_data = loaded_base64 # Decode the image - if image_data.startswith("data:image/"): + elif image_data.startswith("data:image/"): # Extract base64 from data URI base64_data = image_data.split(",", 1)[1] if "," in image_data else image_data else: @@ -217,6 +272,11 @@ def display_sample_record( None if hide_seed_columns or self.dataset_metadata is None else self.dataset_metadata.seed_column_names ) + # Try to get base path from artifact storage if available + base_path = None + if hasattr(self, "artifact_storage") and self.artifact_storage is not None: + base_path = str(self.artifact_storage.base_dataset_path) + display_sample_record( record=record, processor_data_to_display=processor_data_to_display, @@ -225,6 +285,7 @@ def display_sample_record( syntax_highlighting_theme=syntax_highlighting_theme, record_index=i, seed_column_names=seed_column_names, + base_path=base_path, ) if index is None: self._display_cycle_index = (self._display_cycle_index + 1) % num_records @@ -258,6 +319,7 @@ def display_sample_record( syntax_highlighting_theme: str = "dracula", record_index: int | None = None, seed_column_names: list[str] | None = None, + base_path: str | None = None, ): if isinstance(record, (dict, pd.Series)): record = pd.DataFrame([record]).iloc[0] @@ -340,6 +402,10 @@ def display_sample_record( preview = f"" if in_notebook: images_to_display_later.append((col.name, image_data)) + elif _is_image_path(image_data): + preview = f"" + if in_notebook: + images_to_display_later.append((col.name, image_data)) else: preview = str(image_data)[:100] + "..." if len(str(image_data)) > 100 else str(image_data) table.add_row(col.name, preview) @@ -413,7 +479,7 @@ def display_sample_record( # Display images at the bottom with captions (only in notebook) if len(images_to_display_later) > 0: for col_name, image_data in images_to_display_later: - _display_image_if_in_notebook(image_data, col_name) + _display_image_if_in_notebook(image_data, col_name, base_path=base_path) def get_truncated_list_as_string(long_list: list[Any], max_items: int = 2) -> str: diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index f59573c6..2d24fc2d 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -1,27 +1,52 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +from typing import TYPE_CHECKING from data_designer.config.column_configs import ImageGenerationColumnConfig from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering from data_designer.engine.processing.utils import deserialize_json_values +if TYPE_CHECKING: + from data_designer.engine.storage.image_storage import ImageStorageManager + class ImageCellGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorWithModel[ImageGenerationColumnConfig]): - """Generator for image columns using either autoregressive or diffusion models. + """Generator for image columns with optional disk persistence. + + Behavior depends on whether image_storage_manager is set: + - If set (create mode): Saves images to disk and stores relative paths in dataframe + - If None (preview mode): Stores base64 directly in dataframe - Automatically detects the appropriate API based on the model's inference parameters: - - ChatCompletionImageGenerationInferenceParams β†’ Responses API (GPT-5, gpt-image-*, Gemini) - - DiffusionImageGenerationInferenceParams β†’ image_generation API (DALL-E, Imagen, Stable Diffusion) + API is automatically detected based on the model name: + - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API + - All other models β†’ chat/completions API (default) + + Attributes: + image_storage_manager: Optional image storage manager instance (set by dataset builder) """ + image_storage_manager: ImageStorageManager | None = None + @staticmethod def get_generation_strategy() -> GenerationStrategy: return GenerationStrategy.CELL_BY_CELL def generate(self, data: dict) -> dict: + """Generate image and optionally save to disk. + + Args: + data: Record data + + Returns: + Record with image path (create mode) or base64 data (preview mode) added + """ deserialized_record = deserialize_json_values(data) + + # Validate required columns missing_columns = list(set(self.config.required_columns) - set(data.keys())) if len(missing_columns) > 0: error_msg = ( @@ -30,11 +55,24 @@ def generate(self, data: dict) -> dict: ) raise ValueError(error_msg) + # Render prompt template self.prepare_jinja2_template_renderer(self.config.prompt, list(deserialized_record.keys())) prompt = self.render_template(deserialized_record) - # Generate image (automatically routes to appropriate API based on inference params) - # Returns base64-encoded image data or URL depending on configuration - image_data = self.model.generate_image(prompt=prompt) - data[self.config.name] = image_data + # Validate prompt is non-empty + if not prompt or not prompt.strip(): + raise ValueError(f"Rendered prompt for column {self.config.name!r} is empty") + + # Generate image (returns base64 string) + base64_image = self.model.generate_image(prompt=prompt) + + # Store in dataframe based on mode + if self.image_storage_manager: + # Create mode: save to disk and store relative path + relative_path = self.image_storage_manager.save_base64_image(base64_image) + data[self.config.name] = relative_path + else: + # Preview mode: store base64 directly + data[self.config.name] = base64_image + return data diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index 135be97c..7a2962eb 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -26,6 +26,7 @@ ColumnGeneratorWithModel, GenerationStrategy, ) +from data_designer.engine.column_generators.generators.image import ImageCellGenerator from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated from data_designer.engine.compiler import compile_data_designer_config from data_designer.engine.dataset_builders.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage @@ -40,6 +41,7 @@ from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.image_storage import ImageStorageManager from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: @@ -64,6 +66,7 @@ def __init__( self._resource_provider = resource_provider self._records_to_drop: set[int] = set() self._registry = registry or DataDesignerRegistry() + self._image_storage_manager: ImageStorageManager | None = None self._data_designer_config = compile_data_designer_config(data_designer_config, resource_provider) self._column_configs = compile_dataset_builder_column_configs(self._data_designer_config) @@ -99,6 +102,7 @@ def build( self._run_model_health_check_if_needed() self._run_mcp_tool_check_if_needed() self._write_builder_config() + self._initialize_image_storage_if_needed() generators = self._initialize_generators() start_time = time.perf_counter() group_id = uuid.uuid4().hex @@ -124,6 +128,7 @@ def build( def build_preview(self, *, num_records: int) -> pd.DataFrame: self._run_model_health_check_if_needed() self._run_mcp_tool_check_if_needed() + # Skip image storage initialization for preview - base64 will be stored directly in DataFrame generators = self._initialize_generators() group_id = uuid.uuid4().hex @@ -144,13 +149,33 @@ def process_preview(self, dataset: pd.DataFrame) -> pd.DataFrame: current_batch_number=None, # preview mode does not have a batch number ) - def _initialize_generators(self) -> list[ColumnGenerator]: - return [ - self._registry.column_generators.get_for_config_type(type(config))( - config=config, resource_provider=self._resource_provider + def _has_image_columns(self) -> bool: + """Check if config has any image generation columns.""" + from data_designer.config.column_types import DataDesignerColumnType + + return any(col.column_type == DataDesignerColumnType.IMAGE_GENERATION for col in self.single_column_configs) + + def _initialize_image_storage_if_needed(self) -> None: + """Initialize image storage manager if dataset has image columns.""" + if self._has_image_columns(): + self._image_storage_manager = ImageStorageManager( + base_path=self.artifact_storage.base_dataset_path, images_subdir="images", validate_images=True ) - for config in self._column_configs - ] + + def _initialize_generators(self) -> list[ColumnGenerator]: + generators = [] + for config in self._column_configs: + generator_cls = self._registry.column_generators.get_for_config_type(type(config)) + generator = generator_cls(config=config, resource_provider=self._resource_provider) + + # Inject image storage manager for image generators (if available) + # For preview mode, storage manager is None and base64 is stored directly + if isinstance(generator, ImageCellGenerator): + generator.image_storage_manager = self._image_storage_manager + + generators.append(generator) + + return generators def _write_builder_config(self) -> None: self.artifact_storage.mkdir_if_needed(self.artifact_storage.base_dataset_path) diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index 07fbbab0..1abd235b 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -13,6 +13,7 @@ from data_designer.engine.model_provider import ModelProviderRegistry from data_designer.engine.models.errors import ( GenerationValidationFailureError, + ModelAPIError, catch_llm_exceptions, get_exception_primary_cause, ) @@ -163,42 +164,63 @@ def generate_text_embeddings( @catch_llm_exceptions def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: - """Generate image using either autoregressive or diffusion models. + """Generate image and return base64-encoded data. - Automatically detects the appropriate API based on the model's generation_type: - - CHAT_COMPLETION_IMAGE β†’ chat/completions API (GPT-5, gpt-image-*, Gemini) - - DIFFUSION_IMAGE β†’ image_generation API (DALL-E, Imagen, Stable Diffusion) + Automatically detects the appropriate API based on model name: + - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API + - All other models β†’ chat/completions API (default) + + Both paths return base64-encoded image data. Args: - prompt: The prompt for image generation. - skip_usage_tracking: Whether to skip usage tracking. Default: False. - **kwargs: Additional arguments to pass to the model. + prompt: The prompt for image generation + skip_usage_tracking: Whether to skip usage tracking + **kwargs: Additional arguments to pass to the model Returns: - Base64-encoded image data (without data URI prefix for autoregressive models). - For diffusion models: URL string or base64 data depending on output_format. - """ - from data_designer.config.models import GenerationType + Base64-encoded image string (without data URI prefix) + Raises: + ModelAPIError: If image generation fails or returns invalid data + """ logger.debug( f"Generating image with model {self.model_name!r}...", extra={"model": self.model_name, "prompt": prompt}, ) - # Determine which API to use based on generation_type - gen_type = self.model_generation_type - - if gen_type == GenerationType.DIFFUSION_IMAGE: + # Auto-detect API type based on model name + if self._is_diffusion_model(): return self._generate_image_diffusion(prompt, skip_usage_tracking, **kwargs) else: - # Default to chat-completion (CHAT_COMPLETION_IMAGE or backward compatibility) return self._generate_image_chat_completion(prompt, skip_usage_tracking, **kwargs) + def _is_diffusion_model(self) -> bool: + """Detect if model uses diffusion API based on name patterns. + + Diffusion models include DALL-E, Stable Diffusion, and Imagen variants. + All other image models are assumed to use chat completions API. + + Returns: + True if model is detected as diffusion-based, False otherwise + """ + model_lower = self.model_name.lower() + diffusion_patterns = [ + "dall-e", + "dalle", + "stable-diffusion", + "sd-", + "sd_", + "imagen", + ] + return any(pattern in model_lower for pattern in diffusion_patterns) + def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: - """Generate image using autoregressive model via chat completions API.""" - kwargs = self.consolidate_kwargs(**kwargs) + """Generate image using autoregressive model via chat completions API. - # Build messages for image generation + Returns: + Base64-encoded image string + """ + kwargs = self.consolidate_kwargs(**kwargs) messages = [ChatMessage.as_user(content=prompt)] response = None @@ -208,64 +230,78 @@ def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool skip_usage_tracking=skip_usage_tracking, **kwargs, ) + logger.debug( f"Received image from autoregressive model {self.model_name!r}", extra={"model": self.model_name, "response": response}, ) - # Check if response has images attribute (some models return images here) - if hasattr(response.choices[0].message, "images") and response.choices[0].message.images: - # Extract base64 from first image - first_image = response.choices[0].message.images[0] + # Validate response structure + if not response.choices or len(response.choices) == 0: + raise ModelAPIError("Response missing choices") + + message = response.choices[0].message + + # Extract base64 from images attribute (primary path) + if hasattr(message, "images") and message.images: + first_image = message.images[0] + + # Handle different response formats if isinstance(first_image, dict) and "image_url" in first_image: image_url = first_image["image_url"] + if isinstance(image_url, dict) and "url" in image_url: - # Extract base64 data from data URL url = image_url["url"] - if url.startswith("data:image/"): - # Remove data URI prefix to get pure base64 - return url.split(",", 1)[1] if "," in url else url - return url + return self._extract_base64_from_data_uri(url) elif isinstance(image_url, str): - if image_url.startswith("data:image/"): - return image_url.split(",", 1)[1] if "," in image_url else image_url - return image_url - return str(first_image) + return self._extract_base64_from_data_uri(image_url) - # If no images attribute, check content for base64 or image data - content = response.choices[0].message.content or "" - if content.startswith("data:image/"): - # Remove data URI prefix - return content.split(",", 1)[1] if "," in content else content + # Fallback: treat as base64 string + if isinstance(first_image, str): + return self._extract_base64_from_data_uri(first_image) - # Return content as-is (might be base64 or other format) - return content + # Fallback: check content field + content = message.content or "" + if content: + return self._extract_base64_from_data_uri(content) - except Exception as e: - raise e + raise ModelAPIError("No image data found in response") + + except Exception: + raise def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: - """Generate image using diffusion model via image_generation API.""" - from data_designer.config.models import ModalityDataType + """Generate image using diffusion model via image_generation API. + Always returns base64. The API is configured to return base64 format. + + Returns: + Base64-encoded image string + """ kwargs = self.consolidate_kwargs(**kwargs) + + # Always request base64 format + kwargs["response_format"] = "b64_json" + response = None + try: response = self._router.image_generation(prompt=prompt, model=self.model_name, **kwargs) + logger.debug( f"Received image from diffusion model {self.model_name!r}", extra={"model": self.model_name, "response": response}, ) - # Return URL or base64 based on output_format - output_format = getattr(self._model_config.inference_parameters, "output_format", ModalityDataType.BASE64) - if output_format == ModalityDataType.URL: - return response.data[0].url - else: - return response.data[0].b64_json + # Validate response + if not response.data or len(response.data) == 0: + raise ModelAPIError("Image generation returned no data") - except Exception as e: - raise e + # Return base64 data + return response.data[0].b64_json + + except Exception: + raise finally: if not skip_usage_tracking and response is not None: self._track_usage_from_image_diffusion(response) @@ -494,3 +530,50 @@ def _track_usage_from_image_diffusion(self, response: litellm.types.utils.ImageR ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) + + def _extract_base64_from_data_uri(self, data: str) -> str: + """Extract base64 data from data URI or return as-is. + + Args: + data: Data URI (e.g., "data:image/png;base64,iVBORw0...") or plain base64 + + Returns: + Base64 string without data URI prefix + + Raises: + ModelAPIError: If data URI format is invalid + """ + if data.startswith("data:image/"): + # Extract base64 portion after comma + if "," in data: + return data.split(",", 1)[1] + else: + raise ModelAPIError("Invalid data URI format: missing comma separator") + + # Already plain base64 + return data + + def _download_url_to_base64(self, url: str) -> str: + """Download image from URL and convert to base64. + + Args: + url: Image URL + + Returns: + Base64-encoded image string + + Raises: + ModelAPIError: If download fails + """ + import base64 + + from data_designer.lazy_heavy_imports import httpx + + try: + with httpx.Client(timeout=30.0) as client: + response = client.get(url) + response.raise_for_status() + image_bytes = response.content + return base64.b64encode(image_bytes).decode("utf-8") + except Exception as e: + raise ModelAPIError(f"Failed to download image from URL {url}: {e}") from e diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py new file mode 100644 index 00000000..ad7ef0d5 --- /dev/null +++ b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from data_designer.engine.storage.image_storage import ImageFormat, ImageStorageManager + +__all__ = ["ImageFormat", "ImageStorageManager"] diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py new file mode 100644 index 00000000..d632bbc1 --- /dev/null +++ b/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py @@ -0,0 +1,166 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import base64 +import uuid +from enum import Enum +from pathlib import Path + + +class ImageFormat(str, Enum): + """Supported image formats.""" + + PNG = "png" + JPEG = "jpeg" + JPG = "jpg" + WEBP = "webp" + + +class ImageStorageManager: + """Manages disk storage of generated images. + + Handles: + - Creating images directory + - Decoding base64 to bytes + - Detecting image format + - Saving with UUID filenames + - Returning relative paths + """ + + def __init__(self, base_path: Path, images_subdir: str = "images", validate_images: bool = True) -> None: + """Initialize image storage manager. + + Args: + base_path: Base directory for dataset + images_subdir: Subdirectory name for images (default: "images") + validate_images: Whether to validate images after saving (default: True) + """ + self.base_path = Path(base_path) + self.images_dir = self.base_path / images_subdir + self.images_subdir = images_subdir + self.validate_images = validate_images + self._ensure_images_directory() + + def _ensure_images_directory(self) -> None: + """Create images directory if it doesn't exist.""" + self.images_dir.mkdir(parents=True, exist_ok=True) + + def save_base64_image(self, base64_data: str) -> str: + """Save base64 image to disk and return relative path. + + Args: + base64_data: Base64 encoded image string (with or without data URI prefix) + + Returns: + Relative path to saved image (e.g., "images/f47ac10b-58cc.png") + + Raises: + ValueError: If base64 data is invalid + OSError: If disk write fails + """ + # Decode base64 to bytes + image_bytes = self._decode_base64(base64_data) + + # Detect format + image_format = self._detect_format(image_bytes) + + # Generate unique filename + image_id = uuid.uuid4() + filename = f"{image_id}.{image_format.value}" + full_path = self.images_dir / filename + relative_path = f"{self.images_subdir}/{filename}" + + # Write to disk + with open(full_path, "wb") as f: + f.write(image_bytes) + + # Optional validation + if self.validate_images: + self._validate_image(full_path) + + return relative_path + + def _decode_base64(self, base64_data: str) -> bytes: + """Decode base64 string to bytes. + + Args: + base64_data: Base64 string (with or without data URI prefix) + + Returns: + Decoded bytes + + Raises: + ValueError: If base64 data is invalid + """ + # Remove data URI prefix if present (e.g., "data:image/png;base64,") + if base64_data.startswith("data:"): + if "," in base64_data: + base64_data = base64_data.split(",", 1)[1] + else: + raise ValueError("Invalid data URI format: missing comma separator") + + try: + return base64.b64decode(base64_data, validate=True) + except Exception as e: + raise ValueError(f"Invalid base64 data: {e}") from e + + def _detect_format(self, image_bytes: bytes) -> ImageFormat: + """Detect image format from bytes. + + Args: + image_bytes: Image data as bytes + + Returns: + Detected format (defaults to PNG if unknown) + """ + # Check magic bytes first (fast) + if image_bytes.startswith(b"\x89PNG\r\n\x1a\n"): + return ImageFormat.PNG + elif image_bytes.startswith(b"\xff\xd8\xff"): + return ImageFormat.JPG + elif image_bytes.startswith(b"RIFF") and b"WEBP" in image_bytes[:12]: + return ImageFormat.WEBP + + # Fallback to PIL for robust detection + try: + import io + + from PIL import Image + + img = Image.open(io.BytesIO(image_bytes)) + format_str = img.format.lower() if img.format else None + if format_str in ["png", "jpeg", "jpg", "webp"]: + return ImageFormat(format_str if format_str != "jpeg" else "jpg") + except Exception: + pass + + # Default to PNG + return ImageFormat.PNG + + def _validate_image(self, image_path: Path) -> None: + """Validate that saved image is readable. + + Args: + image_path: Path to image file + + Raises: + ValueError: If image is corrupted or unreadable + """ + try: + from PIL import Image + + with Image.open(image_path) as img: + img.verify() + except Exception as e: + # Clean up invalid file + image_path.unlink(missing_ok=True) + raise ValueError(f"Saved image is invalid or corrupted: {e}") from e + + def cleanup(self) -> None: + """Clean up image directory (for preview mode).""" + import shutil + + if self.images_dir.exists(): + shutil.rmtree(self.images_dir) diff --git a/packages/data-designer/src/data_designer/integrations/huggingface/client.py b/packages/data-designer/src/data_designer/integrations/huggingface/client.py index c047d73b..2e84ee3c 100644 --- a/packages/data-designer/src/data_designer/integrations/huggingface/client.py +++ b/packages/data-designer/src/data_designer/integrations/huggingface/client.py @@ -66,6 +66,7 @@ def upload_dataset( Uploads the complete dataset including: - Main parquet batch files from parquet-files/ β†’ data/ + - Images from images/ β†’ images/ (if present) - Processor output batch files from processors-files/{name}/ β†’ {name}/ - Existing builder_config.json and metadata.json files - Auto-generated README.md (dataset card) @@ -102,6 +103,7 @@ def upload_dataset( raise HuggingFaceHubClientUploadError(f"Failed to upload dataset card: {e}") from e self._upload_main_dataset_files(repo_id=repo_id, parquet_folder=base_dataset_path / FINAL_DATASET_FOLDER_NAME) + self._upload_images_folder(repo_id=repo_id, images_folder=base_dataset_path / "images") self._upload_processor_files( repo_id=repo_id, processors_folder=base_dataset_path / PROCESSORS_OUTPUTS_FOLDER_NAME ) @@ -178,6 +180,36 @@ def _upload_main_dataset_files(self, repo_id: str, parquet_folder: Path) -> None except Exception as e: raise HuggingFaceHubClientUploadError(f"Failed to upload parquet files: {e}") from e + def _upload_images_folder(self, repo_id: str, images_folder: Path) -> None: + """Upload images folder to Hugging Face Hub. + + Args: + repo_id: Hugging Face dataset repo ID + images_folder: Path to images folder + + Raises: + HuggingFaceUploadError: If upload fails + """ + if not images_folder.exists(): + return + + image_files = list(images_folder.glob("*")) + if not image_files: + return + + logger.info(f" |-- {RandomEmoji.loading()} Uploading {len(image_files)} images...") + + try: + self._api.upload_folder( + repo_id=repo_id, + folder_path=str(images_folder), + path_in_repo="images", + repo_type="dataset", + commit_message="Upload images", + ) + except Exception as e: + raise HuggingFaceHubClientUploadError(f"Failed to upload images: {e}") from e + def _upload_processor_files(self, repo_id: str, processors_folder: Path) -> None: """Upload processor output files. From ed9787bf297a5a57c90f5b58ebd049a2fbe07cae Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 6 Feb 2026 10:17:21 -0700 Subject: [PATCH 22/64] support generation of multiple images --- .../src/data_designer/config/models.py | 14 +++- .../config/utils/visualization.py | 24 ++++++- .../column_generators/generators/image.py | 20 +++--- .../src/data_designer/engine/models/facade.py | 72 ++++++++++--------- 4 files changed, 84 insertions(+), 46 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index dc3533e3..3dab2d8d 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -435,8 +435,11 @@ class ImageInferenceParams(BaseInferenceParams): - Preview mode: Images stored as base64 directly in dataframe Common parameters like quality and size are provided as optional fields. - For model-specific parameters, use the `extra_body` field inherited from - BaseInferenceParams. + For model-specific parameters (including n for number of images), use the `extra_body` + field inherited from BaseInferenceParams. + + If the API returns multiple images (either from prompt or API parameters), all images + will be stored as a list in the dataframe. Attributes: generation_type: Type of generation, always "image" for this class. @@ -451,6 +454,13 @@ class ImageInferenceParams(BaseInferenceParams): size="1024x1024" ) + # Generate multiple images using extra_body + dd.ImageInferenceParams( + quality="hd", + size="1024x1024", + extra_body={"n": 3} # Request 3 images from API + ) + # With model-specific params via extra_body dd.ImageInferenceParams( quality="hd", diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 56d28fd3..62b57f5e 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -394,7 +394,28 @@ def display_sample_record( if col.drop: continue image_data = record[col.name] - if _is_base64_image(image_data): + + # Handle list of images + if isinstance(image_data, list): + previews = [] + for idx, img in enumerate(image_data): + if _is_base64_image(img): + previews.append(f"[{idx}] ") + if in_notebook: + images_to_display_later.append((f"{col.name}[{idx}]", img)) + elif _is_image_url(img): + previews.append(f"[{idx}] ") + if in_notebook: + images_to_display_later.append((f"{col.name}[{idx}]", img)) + elif _is_image_path(img): + previews.append(f"[{idx}] ") + if in_notebook: + images_to_display_later.append((f"{col.name}[{idx}]", img)) + else: + previews.append(f"[{idx}] {str(img)[:30]}") + preview = "\n".join(previews) if previews else "" + # Handle single image (backwards compatibility) + elif _is_base64_image(image_data): preview = f"" if in_notebook: images_to_display_later.append((col.name, image_data)) @@ -408,6 +429,7 @@ def display_sample_record( images_to_display_later.append((col.name, image_data)) else: preview = str(image_data)[:100] + "..." if len(str(image_data)) > 100 else str(image_data) + table.add_row(col.name, preview) render_list.append(pad_console_element(table)) diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 2d24fc2d..db3c9c9e 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -36,13 +36,13 @@ def get_generation_strategy() -> GenerationStrategy: return GenerationStrategy.CELL_BY_CELL def generate(self, data: dict) -> dict: - """Generate image and optionally save to disk. + """Generate image(s) and optionally save to disk. Args: data: Record data Returns: - Record with image path (create mode) or base64 data (preview mode) added + Record with image path(s) (create mode) or base64 data (preview mode) added """ deserialized_record = deserialize_json_values(data) @@ -63,16 +63,18 @@ def generate(self, data: dict) -> dict: if not prompt or not prompt.strip(): raise ValueError(f"Rendered prompt for column {self.config.name!r} is empty") - # Generate image (returns base64 string) - base64_image = self.model.generate_image(prompt=prompt) + # Generate images (returns list of base64 strings) + base64_images = self.model.generate_image(prompt=prompt) # Store in dataframe based on mode if self.image_storage_manager: - # Create mode: save to disk and store relative path - relative_path = self.image_storage_manager.save_base64_image(base64_image) - data[self.config.name] = relative_path + # Create mode: save each image to disk and store list of relative paths + relative_paths = [ + self.image_storage_manager.save_base64_image(base64_image) for base64_image in base64_images + ] + data[self.config.name] = relative_paths else: - # Preview mode: store base64 directly - data[self.config.name] = base64_image + # Preview mode: store list of base64 strings directly + data[self.config.name] = base64_images return data diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index 1abd235b..b78d2e1e 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -163,22 +163,23 @@ def generate_text_embeddings( self._track_usage_from_embedding(response) @catch_llm_exceptions - def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: - """Generate image and return base64-encoded data. + def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: + """Generate image(s) and return base64-encoded data. Automatically detects the appropriate API based on model name: - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API - All other models β†’ chat/completions API (default) - Both paths return base64-encoded image data. + Both paths return base64-encoded image data. If the API returns multiple images, + all are returned in the list. Args: prompt: The prompt for image generation skip_usage_tracking: Whether to skip usage tracking - **kwargs: Additional arguments to pass to the model + **kwargs: Additional arguments to pass to the model (including n=number of images) Returns: - Base64-encoded image string (without data URI prefix) + List of base64-encoded image strings (without data URI prefix) Raises: ModelAPIError: If image generation fails or returns invalid data @@ -214,11 +215,11 @@ def _is_diffusion_model(self) -> bool: ] return any(pattern in model_lower for pattern in diffusion_patterns) - def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: - """Generate image using autoregressive model via chat completions API. + def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: + """Generate image(s) using autoregressive model via chat completions API. Returns: - Base64-encoded image string + List of base64-encoded image strings """ kwargs = self.consolidate_kwargs(**kwargs) messages = [ChatMessage.as_user(content=prompt)] @@ -232,7 +233,7 @@ def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool ) logger.debug( - f"Received image from autoregressive model {self.model_name!r}", + f"Received image(s) from autoregressive model {self.model_name!r}", extra={"model": self.model_name, "response": response}, ) @@ -241,42 +242,45 @@ def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool raise ModelAPIError("Response missing choices") message = response.choices[0].message + images = [] # Extract base64 from images attribute (primary path) if hasattr(message, "images") and message.images: - first_image = message.images[0] - - # Handle different response formats - if isinstance(first_image, dict) and "image_url" in first_image: - image_url = first_image["image_url"] - - if isinstance(image_url, dict) and "url" in image_url: - url = image_url["url"] - return self._extract_base64_from_data_uri(url) - elif isinstance(image_url, str): - return self._extract_base64_from_data_uri(image_url) - - # Fallback: treat as base64 string - if isinstance(first_image, str): - return self._extract_base64_from_data_uri(first_image) + for image in message.images: + # Handle different response formats + if isinstance(image, dict) and "image_url" in image: + image_url = image["image_url"] + + if isinstance(image_url, dict) and "url" in image_url: + url = image_url["url"] + images.append(self._extract_base64_from_data_uri(url)) + elif isinstance(image_url, str): + images.append(self._extract_base64_from_data_uri(image_url)) + # Fallback: treat as base64 string + elif isinstance(image, str): + images.append(self._extract_base64_from_data_uri(image)) # Fallback: check content field - content = message.content or "" - if content: - return self._extract_base64_from_data_uri(content) + if not images: + content = message.content or "" + if content: + images.append(self._extract_base64_from_data_uri(content)) + + if not images: + raise ModelAPIError("No image data found in response") - raise ModelAPIError("No image data found in response") + return images except Exception: raise - def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> str: - """Generate image using diffusion model via image_generation API. + def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: + """Generate image(s) using diffusion model via image_generation API. Always returns base64. The API is configured to return base64 format. Returns: - Base64-encoded image string + List of base64-encoded image strings """ kwargs = self.consolidate_kwargs(**kwargs) @@ -289,7 +293,7 @@ def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = Fal response = self._router.image_generation(prompt=prompt, model=self.model_name, **kwargs) logger.debug( - f"Received image from diffusion model {self.model_name!r}", + f"Received {len(response.data)} image(s) from diffusion model {self.model_name!r}", extra={"model": self.model_name, "response": response}, ) @@ -297,8 +301,8 @@ def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = Fal if not response.data or len(response.data) == 0: raise ModelAPIError("Image generation returned no data") - # Return base64 data - return response.data[0].b64_json + # Return all images as list + return [img.b64_json for img in response.data] except Exception: raise From 7dea87a0e75e242951751cf3af14e94aac77eb46 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 6 Feb 2026 10:28:40 -0700 Subject: [PATCH 23/64] clean up visualization --- .../config/utils/image_helpers.py | 110 ++++++++++++++++++ .../config/utils/visualization.py | 94 +++------------ 2 files changed, 124 insertions(+), 80 deletions(-) create mode 100644 packages/data-designer-config/src/data_designer/config/utils/image_helpers.py diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py new file mode 100644 index 00000000..a32714d3 --- /dev/null +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -0,0 +1,110 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Helper utilities for working with images.""" + +from __future__ import annotations + +import base64 +from pathlib import Path + +from data_designer.config.models import ImageFormat + + +def is_image_path(value: str) -> bool: + """Check if a string is an image file path. + + Args: + value: String to check + + Returns: + True if the string looks like an image file path, False otherwise + """ + if not isinstance(value, str): + return False + return any(value.lower().endswith(ext) for ext in get_supported_image_extensions()) + + +def is_base64_image(value: str) -> bool: + """Check if a string is base64-encoded image data. + + Args: + value: String to check + + Returns: + True if the string looks like base64-encoded image data, False otherwise + """ + if not isinstance(value, str): + return False + # Check if it starts with data URI scheme + if value.startswith("data:image/"): + return True + # Check if it looks like base64 (at least 100 chars, contains only base64 chars) + if len(value) > 100 and all( + c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in value[:100] + ): + try: + # Try to decode a small portion to verify it's valid base64 + base64.b64decode(value[:100]) + return True + except Exception: + return False + return False + + +def is_image_url(value: str) -> bool: + """Check if a string is an image URL. + + Args: + value: String to check + + Returns: + True if the string looks like an image URL, False otherwise + """ + if not isinstance(value, str): + return False + return value.startswith(("http://", "https://")) and any( + ext in value.lower() for ext in get_supported_image_extensions() + ) + + +def load_image_path_to_base64(image_path: str, base_path: str | None = None) -> str | None: + """Load an image from a file path and return as base64. + + Args: + image_path: Relative or absolute path to the image file. + base_path: Optional base path to resolve relative paths from. + + Returns: + Base64-encoded image data or None if loading fails. + """ + try: + path = Path(image_path) + + # If path is not absolute, try to resolve it + if not path.is_absolute(): + if base_path: + path = Path(base_path) / path + # If still not found, try current working directory + if not path.exists(): + path = Path.cwd() / image_path + + # Check if file exists + if not path.exists(): + return None + + # Read image file and convert to base64 + with open(path, "rb") as f: + image_bytes = f.read() + return base64.b64encode(image_bytes).decode() + except Exception: + return None + + +def get_supported_image_extensions() -> list[str]: + """Get list of supported image extensions from ImageFormat enum. + + Returns: + List of extensions with leading dot (e.g., [".png", ".jpg", ...]) + """ + return [f".{fmt.value}" for fmt in ImageFormat] diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 62b57f5e..dd819aa3 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -28,6 +28,12 @@ from data_designer.config.utils.code_lang import code_lang_to_syntax_lexer from data_designer.config.utils.constants import NVIDIA_API_KEY_ENV_VAR_NAME, OPENAI_API_KEY_ENV_VAR_NAME from data_designer.config.utils.errors import DatasetSampleDisplayError +from data_designer.config.utils.image_helpers import ( + is_base64_image, + is_image_path, + is_image_url, + load_image_path_to_base64, +) from data_designer.lazy_heavy_imports import np, pd if TYPE_CHECKING: @@ -41,78 +47,6 @@ console = Console() -def _is_base64_image(value: str) -> bool: - """Check if a string is base64-encoded image data.""" - if not isinstance(value, str): - return False - # Check if it starts with data URI scheme - if value.startswith("data:image/"): - return True - # Check if it looks like base64 (at least 100 chars, contains only base64 chars) - if len(value) > 100 and all( - c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in value[:100] - ): - try: - # Try to decode a small portion to verify it's valid base64 - base64.b64decode(value[:100]) - return True - except Exception: - return False - return False - - -def _is_image_url(value: str) -> bool: - """Check if a string is an image URL.""" - if not isinstance(value, str): - return False - return value.startswith(("http://", "https://")) and any( - ext in value.lower() for ext in [".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"] - ) - - -def _is_image_path(value: str) -> bool: - """Check if a string is an image file path.""" - if not isinstance(value, str): - return False - # Check if it looks like a file path with image extension - return any(value.lower().endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"]) - - -def _load_image_path_to_base64(image_path: str, base_path: str | None = None) -> str | None: - """Load an image from a file path and return as base64. - - Args: - image_path: Relative or absolute path to the image file. - base_path: Optional base path to resolve relative paths from. - - Returns: - Base64-encoded image data or None if loading fails. - """ - try: - from pathlib import Path - - path = Path(image_path) - - # If path is not absolute, try to resolve it - if not path.is_absolute(): - if base_path: - path = Path(base_path) / path - # If still not found, try current working directory - if not path.exists(): - path = Path.cwd() / image_path - - # Check if file exists - if not path.exists(): - return None - - # Read image file and convert to base64 - with open(path, "rb") as f: - image_bytes = f.read() - return base64.b64encode(image_bytes).decode() - except Exception: - return None - - def _display_image_if_in_notebook( image_data: str, col_name: str, max_width: int = 512, base_path: str | None = None ) -> bool: @@ -135,8 +69,8 @@ def _display_image_if_in_notebook( get_ipython() # This will raise NameError if not in IPython/Jupyter # Check if it's a file path and load it - if _is_image_path(image_data) and not image_data.startswith("data:image/"): - loaded_base64 = _load_image_path_to_base64(image_data, base_path) + if is_image_path(image_data) and not image_data.startswith("data:image/"): + loaded_base64 = load_image_path_to_base64(image_data, base_path) if loaded_base64 is None: console.print( f"[yellow]⚠️ Could not load image from path '{image_data}' for column '{col_name}'[/yellow]" @@ -399,15 +333,15 @@ def display_sample_record( if isinstance(image_data, list): previews = [] for idx, img in enumerate(image_data): - if _is_base64_image(img): + if is_base64_image(img): previews.append(f"[{idx}] ") if in_notebook: images_to_display_later.append((f"{col.name}[{idx}]", img)) - elif _is_image_url(img): + elif is_image_url(img): previews.append(f"[{idx}] ") if in_notebook: images_to_display_later.append((f"{col.name}[{idx}]", img)) - elif _is_image_path(img): + elif is_image_path(img): previews.append(f"[{idx}] ") if in_notebook: images_to_display_later.append((f"{col.name}[{idx}]", img)) @@ -415,15 +349,15 @@ def display_sample_record( previews.append(f"[{idx}] {str(img)[:30]}") preview = "\n".join(previews) if previews else "" # Handle single image (backwards compatibility) - elif _is_base64_image(image_data): + elif is_base64_image(image_data): preview = f"" if in_notebook: images_to_display_later.append((col.name, image_data)) - elif _is_image_url(image_data): + elif is_image_url(image_data): preview = f"" if in_notebook: images_to_display_later.append((col.name, image_data)) - elif _is_image_path(image_data): + elif is_image_path(image_data): preview = f"" if in_notebook: images_to_display_later.append((col.name, image_data)) From 31cc24ee364f555c10a740c8d0da25c6d7e5c1e3 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 6 Feb 2026 11:39:31 -0700 Subject: [PATCH 24/64] clean up some util methods + add tests --- .../config/utils/image_helpers.py | 90 +++++++++++ .../config/utils/visualization.py | 15 +- .../src/data_designer/lazy_heavy_imports.py | 1 + .../tests/config/utils/test_image_helpers.py | 148 ++++++++++++++++++ .../src/data_designer/engine/models/facade.py | 51 ++---- .../engine/storage/image_storage.py | 79 +--------- 6 files changed, 270 insertions(+), 114 deletions(-) create mode 100644 packages/data-designer-config/tests/config/utils/test_image_helpers.py diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index a32714d3..48dacbae 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -6,9 +6,99 @@ from __future__ import annotations import base64 +import io from pathlib import Path +from typing import TYPE_CHECKING from data_designer.config.models import ImageFormat +from data_designer.lazy_heavy_imports import PIL + +if TYPE_CHECKING: + import PIL + +# Magic bytes for image format detection +IMAGE_FORMAT_MAGIC_BYTES = { + ImageFormat.PNG: b"\x89PNG\r\n\x1a\n", + ImageFormat.JPG: b"\xff\xd8\xff", + # WEBP uses RIFF header - handled separately +} + + +def extract_base64_from_data_uri(data: str) -> str: + """Extract base64 from data URI or return as-is. + + Handles data URIs like "data:image/png;base64,iVBORw0..." and returns + just the base64 portion. + + Args: + data: Data URI (e.g., "data:image/png;base64,XXX") or plain base64 + + Returns: + Base64 string without data URI prefix + + Raises: + ValueError: If data URI format is invalid + """ + if data.startswith("data:"): + if "," in data: + return data.split(",", 1)[1] + raise ValueError("Invalid data URI format: missing comma separator") + return data + + +def decode_base64_image(base64_data: str) -> bytes: + """Decode base64 string to image bytes. + + Automatically handles data URIs by extracting the base64 portion first. + + Args: + base64_data: Base64 string (with or without data URI prefix) + + Returns: + Decoded image bytes + + Raises: + ValueError: If base64 data is invalid + """ + # Remove data URI prefix if present + base64_data = extract_base64_from_data_uri(base64_data) + + try: + return base64.b64decode(base64_data, validate=True) + except Exception as e: + raise ValueError(f"Invalid base64 data: {e}") from e + + +def detect_image_format(image_bytes: bytes) -> ImageFormat: + """Detect image format from bytes. + + Uses magic bytes for fast detection, falls back to PIL for robust detection. + + Args: + image_bytes: Image data as bytes + + Returns: + Detected format (defaults to PNG if unknown) + """ + # Check magic bytes first (fast) + if image_bytes.startswith(IMAGE_FORMAT_MAGIC_BYTES[ImageFormat.PNG]): + return ImageFormat.PNG + elif image_bytes.startswith(IMAGE_FORMAT_MAGIC_BYTES[ImageFormat.JPG]): + return ImageFormat.JPG + elif image_bytes.startswith(b"RIFF") and b"WEBP" in image_bytes[:12]: + return ImageFormat.WEBP + + # Fallback to PIL for robust detection + try: + img = PIL.Image.open(io.BytesIO(image_bytes)) + format_str = img.format.lower() if img.format else None + if format_str in ["png", "jpeg", "jpg", "webp"]: + return ImageFormat(format_str if format_str != "jpeg" else "jpg") + except Exception: + pass + + # Default to PNG + return ImageFormat.PNG def is_image_path(value: str) -> bool: diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index dd819aa3..c349ec86 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -29,16 +29,18 @@ from data_designer.config.utils.constants import NVIDIA_API_KEY_ENV_VAR_NAME, OPENAI_API_KEY_ENV_VAR_NAME from data_designer.config.utils.errors import DatasetSampleDisplayError from data_designer.config.utils.image_helpers import ( + extract_base64_from_data_uri, is_base64_image, is_image_path, is_image_url, load_image_path_to_base64, ) -from data_designer.lazy_heavy_imports import np, pd +from data_designer.lazy_heavy_imports import PIL, np, pd if TYPE_CHECKING: import numpy as np import pandas as pd + import PIL from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.dataset_metadata import DatasetMetadata @@ -64,7 +66,6 @@ def _display_image_if_in_notebook( try: # Check if we're in a Jupyter environment from IPython.display import HTML, display - from PIL import Image as PILImage get_ipython() # This will raise NameError if not in IPython/Jupyter @@ -77,23 +78,21 @@ def _display_image_if_in_notebook( ) return False base64_data = loaded_base64 - # Decode the image - elif image_data.startswith("data:image/"): - # Extract base64 from data URI - base64_data = image_data.split(",", 1)[1] if "," in image_data else image_data else: base64_data = image_data + # Extract base64 from data URI if present + base64_data = extract_base64_from_data_uri(base64_data) image_bytes = base64.b64decode(base64_data) # Open image with PIL and resize if needed - img = PILImage.open(io.BytesIO(image_bytes)) + img = PIL.Image.open(io.BytesIO(image_bytes)) # Resize if image is too large if img.width > max_width: ratio = max_width / img.width new_height = int(img.height * ratio) - img = img.resize((max_width, new_height), PILImage.Resampling.LANCZOS) + img = img.resize((max_width, new_height), PIL.Image.Resampling.LANCZOS) # Convert back to base64 for HTML display buffered = io.BytesIO() diff --git a/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py b/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py index be7b7185..f7901a7c 100644 --- a/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py +++ b/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py @@ -35,6 +35,7 @@ "nx": "networkx", "scipy": "scipy", "jsonschema": "jsonschema", + "PIL": "PIL", } diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py new file mode 100644 index 00000000..3d6683e4 --- /dev/null +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -0,0 +1,148 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import base64 + +import pytest + +from data_designer.config.models import ImageFormat +from data_designer.config.utils.image_helpers import ( + decode_base64_image, + detect_image_format, + extract_base64_from_data_uri, + get_supported_image_extensions, + is_base64_image, + is_image_path, + is_image_url, +) + +# Tests for extract_base64_from_data_uri + + +def test_extract_base64_from_data_uri_with_prefix(): + data_uri = "data:image/png;base64,iVBORw0KGgoAAAANS" + result = extract_base64_from_data_uri(data_uri) + assert result == "iVBORw0KGgoAAAANS" + + +def test_extract_base64_plain_base64_without_prefix(): + plain_base64 = "iVBORw0KGgoAAAANS" + result = extract_base64_from_data_uri(plain_base64) + assert result == plain_base64 + + +def test_extract_base64_invalid_data_uri_raises_error(): + with pytest.raises(ValueError, match="Invalid data URI format: missing comma separator"): + extract_base64_from_data_uri("data:image/png;base64") + + +# Tests for decode_base64_image + + +def test_decode_base64_image_valid(): + png_bytes = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01" + base64_data = base64.b64encode(png_bytes).decode() + result = decode_base64_image(base64_data) + assert result == png_bytes + + +def test_decode_base64_image_with_data_uri(): + png_bytes = b"\x89PNG\r\n\x1a\n" + base64_data = base64.b64encode(png_bytes).decode() + data_uri = f"data:image/png;base64,{base64_data}" + result = decode_base64_image(data_uri) + assert result == png_bytes + + +def test_decode_base64_image_invalid_raises_error(): + with pytest.raises(ValueError, match="Invalid base64 data"): + decode_base64_image("not-valid-base64!!!") + + +# Tests for detect_image_format + + +def test_detect_image_format_png(): + png_magic = b"\x89PNG\r\n\x1a\n" + b"\x00" * 10 + assert detect_image_format(png_magic) == ImageFormat.PNG + + +def test_detect_image_format_jpg(): + jpg_magic = b"\xff\xd8\xff" + b"\x00" * 10 + assert detect_image_format(jpg_magic) == ImageFormat.JPG + + +def test_detect_image_format_webp(): + webp_magic = b"RIFF" + b"\x00" * 4 + b"WEBP" + assert detect_image_format(webp_magic) == ImageFormat.WEBP + + +def test_detect_image_format_unknown_defaults_to_png(): + unknown_bytes = b"\x00\x00\x00\x00" + b"\x00" * 10 + assert detect_image_format(unknown_bytes) == ImageFormat.PNG + + +# Tests for is_image_path + + +def test_is_image_path_various_extensions(): + assert is_image_path("/path/to/image.png") is True + assert is_image_path("image.PNG") is True + assert is_image_path("image.jpg") is True + assert is_image_path("image.jpeg") is True + + +def test_is_image_path_non_image(): + assert is_image_path("/path/to/file.txt") is False + assert is_image_path("document.pdf") is False + + +def test_is_image_path_extension_in_directory(): + assert is_image_path("/some.png/file.txt") is False + + +# Tests for is_base64_image + + +def test_is_base64_image_data_uri(): + assert is_base64_image("data:image/png;base64,iVBORw0KGgo") is True + + +def test_is_base64_image_long_valid_base64(): + long_base64 = base64.b64encode(b"x" * 100).decode() + assert is_base64_image(long_base64) is True + + +def test_is_base64_image_short_string(): + assert is_base64_image("short") is False + + +# Tests for is_image_url + + +def test_is_image_url_http_and_https(): + assert is_image_url("http://example.com/image.png") is True + assert is_image_url("https://example.com/photo.jpg") is True + + +def test_is_image_url_with_query_params(): + assert is_image_url("https://example.com/image.png?size=large") is True + + +def test_is_image_url_without_image_extension(): + assert is_image_url("https://example.com/page.html") is False + + +def test_is_image_url_non_http(): + assert is_image_url("ftp://example.com/image.png") is False + + +# Tests for get_supported_image_extensions + + +def test_get_supported_image_extensions_matches_enum(): + result = get_supported_image_extensions() + enum_values = [f".{fmt.value}" for fmt in ImageFormat] + assert set(result) == set(enum_values) diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index b78d2e1e..d13273f4 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any from data_designer.config.models import GenerationType, ModelConfig, ModelProvider +from data_designer.config.utils.image_helpers import extract_base64_from_data_uri from data_designer.engine.mcp.errors import MCPConfigurationError from data_designer.engine.model_provider import ModelProviderRegistry from data_designer.engine.models.errors import ( @@ -38,6 +39,16 @@ def _identity(x: Any) -> Any: logger = logging.getLogger(__name__) +# Patterns for detecting diffusion-based image generation models +DIFFUSION_MODEL_PATTERNS = [ + "dall-e", + "dalle", + "stable-diffusion", + "sd-", + "sd_", + "imagen", +] + class ModelFacade: def __init__( @@ -205,15 +216,7 @@ def _is_diffusion_model(self) -> bool: True if model is detected as diffusion-based, False otherwise """ model_lower = self.model_name.lower() - diffusion_patterns = [ - "dall-e", - "dalle", - "stable-diffusion", - "sd-", - "sd_", - "imagen", - ] - return any(pattern in model_lower for pattern in diffusion_patterns) + return any(pattern in model_lower for pattern in DIFFUSION_MODEL_PATTERNS) def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: """Generate image(s) using autoregressive model via chat completions API. @@ -253,18 +256,18 @@ def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool if isinstance(image_url, dict) and "url" in image_url: url = image_url["url"] - images.append(self._extract_base64_from_data_uri(url)) + images.append(extract_base64_from_data_uri(url)) elif isinstance(image_url, str): - images.append(self._extract_base64_from_data_uri(image_url)) + images.append(extract_base64_from_data_uri(image_url)) # Fallback: treat as base64 string elif isinstance(image, str): - images.append(self._extract_base64_from_data_uri(image)) + images.append(extract_base64_from_data_uri(image)) # Fallback: check content field if not images: content = message.content or "" if content: - images.append(self._extract_base64_from_data_uri(content)) + images.append(extract_base64_from_data_uri(content)) if not images: raise ModelAPIError("No image data found in response") @@ -535,28 +538,6 @@ def _track_usage_from_image_diffusion(self, response: litellm.types.utils.ImageR request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) - def _extract_base64_from_data_uri(self, data: str) -> str: - """Extract base64 data from data URI or return as-is. - - Args: - data: Data URI (e.g., "data:image/png;base64,iVBORw0...") or plain base64 - - Returns: - Base64 string without data URI prefix - - Raises: - ModelAPIError: If data URI format is invalid - """ - if data.startswith("data:image/"): - # Extract base64 portion after comma - if "," in data: - return data.split(",", 1)[1] - else: - raise ModelAPIError("Invalid data URI format: missing comma separator") - - # Already plain base64 - return data - def _download_url_to_base64(self, url: str) -> str: """Download image from URL and convert to base64. diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py index d632bbc1..22d4bf84 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py @@ -3,19 +3,15 @@ from __future__ import annotations -import base64 import uuid -from enum import Enum from pathlib import Path +from typing import TYPE_CHECKING +from data_designer.config.utils.image_helpers import decode_base64_image, detect_image_format +from data_designer.lazy_heavy_imports import PIL -class ImageFormat(str, Enum): - """Supported image formats.""" - - PNG = "png" - JPEG = "jpeg" - JPG = "jpg" - WEBP = "webp" +if TYPE_CHECKING: + import PIL class ImageStorageManager: @@ -61,10 +57,10 @@ def save_base64_image(self, base64_data: str) -> str: OSError: If disk write fails """ # Decode base64 to bytes - image_bytes = self._decode_base64(base64_data) + image_bytes = decode_base64_image(base64_data) # Detect format - image_format = self._detect_format(image_bytes) + image_format = detect_image_format(image_bytes) # Generate unique filename image_id = uuid.uuid4() @@ -82,63 +78,6 @@ def save_base64_image(self, base64_data: str) -> str: return relative_path - def _decode_base64(self, base64_data: str) -> bytes: - """Decode base64 string to bytes. - - Args: - base64_data: Base64 string (with or without data URI prefix) - - Returns: - Decoded bytes - - Raises: - ValueError: If base64 data is invalid - """ - # Remove data URI prefix if present (e.g., "data:image/png;base64,") - if base64_data.startswith("data:"): - if "," in base64_data: - base64_data = base64_data.split(",", 1)[1] - else: - raise ValueError("Invalid data URI format: missing comma separator") - - try: - return base64.b64decode(base64_data, validate=True) - except Exception as e: - raise ValueError(f"Invalid base64 data: {e}") from e - - def _detect_format(self, image_bytes: bytes) -> ImageFormat: - """Detect image format from bytes. - - Args: - image_bytes: Image data as bytes - - Returns: - Detected format (defaults to PNG if unknown) - """ - # Check magic bytes first (fast) - if image_bytes.startswith(b"\x89PNG\r\n\x1a\n"): - return ImageFormat.PNG - elif image_bytes.startswith(b"\xff\xd8\xff"): - return ImageFormat.JPG - elif image_bytes.startswith(b"RIFF") and b"WEBP" in image_bytes[:12]: - return ImageFormat.WEBP - - # Fallback to PIL for robust detection - try: - import io - - from PIL import Image - - img = Image.open(io.BytesIO(image_bytes)) - format_str = img.format.lower() if img.format else None - if format_str in ["png", "jpeg", "jpg", "webp"]: - return ImageFormat(format_str if format_str != "jpeg" else "jpg") - except Exception: - pass - - # Default to PNG - return ImageFormat.PNG - def _validate_image(self, image_path: Path) -> None: """Validate that saved image is readable. @@ -149,9 +88,7 @@ def _validate_image(self, image_path: Path) -> None: ValueError: If image is corrupted or unreadable """ try: - from PIL import Image - - with Image.open(image_path) as img: + with PIL.Image.open(image_path) as img: img.verify() except Exception as e: # Clean up invalid file From 0f07f7b9501aaee80e68b994b301ccd464391f05 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 6 Feb 2026 13:11:54 -0700 Subject: [PATCH 25/64] Streamline integration for image generation --- .../config/utils/image_helpers.py | 20 +- .../tests/config/utils/test_image_helpers.py | 184 +++++++++++++++++- .../column_generators/generators/image.py | 20 +- .../dataset_builders/artifact_storage.py | 22 ++- .../dataset_builders/column_wise_builder.py | 45 +++-- .../data_designer/engine/storage/__init__.py | 4 +- ...image_storage.py => multimedia_storage.py} | 33 ++-- .../generators/test_image.py | 121 ++++++++++++ .../tests/engine/storage/__init__.py | 2 + .../engine/storage/test_multimedia_storage.py | 182 +++++++++++++++++ 10 files changed, 583 insertions(+), 50 deletions(-) rename packages/data-designer-engine/src/data_designer/engine/storage/{image_storage.py => multimedia_storage.py} (80%) create mode 100644 packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py create mode 100644 packages/data-designer-engine/tests/engine/storage/__init__.py create mode 100644 packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 48dacbae..1f5ec332 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -92,8 +92,8 @@ def detect_image_format(image_bytes: bytes) -> ImageFormat: try: img = PIL.Image.open(io.BytesIO(image_bytes)) format_str = img.format.lower() if img.format else None - if format_str in ["png", "jpeg", "jpg", "webp"]: - return ImageFormat(format_str if format_str != "jpeg" else "jpg") + if format_str in [ImageFormat.PNG, ImageFormat.JPG, ImageFormat.JPEG, ImageFormat.WEBP]: + return ImageFormat(format_str if format_str != ImageFormat.JPEG else ImageFormat.JPG) except Exception: pass @@ -191,6 +191,22 @@ def load_image_path_to_base64(image_path: str, base_path: str | None = None) -> return None +def validate_image(image_path: Path) -> None: + """Validate that an image file is readable and not corrupted. + + Args: + image_path: Path to image file + + Raises: + ValueError: If image is corrupted or unreadable + """ + try: + with PIL.Image.open(image_path) as img: + img.verify() + except Exception as e: + raise ValueError(f"Image validation failed: {e}") from e + + def get_supported_image_extensions() -> list[str]: """Get list of supported image extensions from ImageFormat enum. diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index 3d6683e4..9c7ccd7f 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -4,7 +4,14 @@ from __future__ import annotations import base64 - +import io +from typing import TYPE_CHECKING +from unittest.mock import Mock, patch + +# Explicitly import PIL.Image submodule to make it accessible as PIL.Image +# Python doesn't automatically import submodules when you import a package, +# so `import PIL` alone doesn't give you access to PIL.Image +import PIL.Image # noqa: E402 import pytest from data_designer.config.models import ImageFormat @@ -16,7 +23,13 @@ is_base64_image, is_image_path, is_image_url, + load_image_path_to_base64, + validate_image, ) +from data_designer.lazy_heavy_imports import PIL + +if TYPE_CHECKING: + import PIL # Tests for extract_base64_from_data_uri @@ -139,6 +152,39 @@ def test_is_image_url_non_http(): assert is_image_url("ftp://example.com/image.png") is False +# Tests for validate_image + + +def test_validate_image_valid_png(tmp_path): + # Create a valid 1x1 PNG using PIL + img = PIL.Image.new("RGB", (1, 1), color="red") + buf = io.BytesIO() + img.save(buf, format="PNG") + png_bytes = buf.getvalue() + + image_path = tmp_path / "test.png" + image_path.write_bytes(png_bytes) + + # Should not raise + validate_image(image_path) + + +def test_validate_image_corrupted_raises_error(tmp_path): + # Create an invalid image file + image_path = tmp_path / "corrupted.png" + image_path.write_bytes(b"not a valid image") + + with pytest.raises(ValueError, match="Image validation failed"): + validate_image(image_path) + + +def test_validate_image_nonexistent_raises_error(tmp_path): + image_path = tmp_path / "nonexistent.png" + + with pytest.raises(ValueError, match="Image validation failed"): + validate_image(image_path) + + # Tests for get_supported_image_extensions @@ -146,3 +192,139 @@ def test_get_supported_image_extensions_matches_enum(): result = get_supported_image_extensions() enum_values = [f".{fmt.value}" for fmt in ImageFormat] assert set(result) == set(enum_values) + + +# Additional tests for uncovered lines + + +def test_detect_image_format_with_pil_fallback_unsupported_format(tmp_path): + # Create a real GIF image that will trigger PIL fallback + # (GIF has different magic bytes not in our fast-path detection) + img = PIL.Image.new("RGB", (1, 1), color="red") + gif_path = tmp_path / "test.gif" + img.save(gif_path, format="GIF") + + gif_bytes = gif_path.read_bytes() + # Should use PIL fallback and default to PNG (GIF not in ImageFormat enum) + result = detect_image_format(gif_bytes) + assert result == ImageFormat.PNG + + +def test_detect_image_format_with_pil_fallback_jpeg(): + # Test PIL fallback path that converts "jpeg" format string to JPG enum + # Use mock since we can't easily create valid JPEG bytes without magic bytes + mock_img = Mock() + mock_img.format = "JPEG" + + # Use bytes that don't match our magic bytes to trigger PIL fallback + test_bytes = b"\x00\x00\x00\x00" + + with patch.object(PIL.Image, "open", return_value=mock_img): + result = detect_image_format(test_bytes) + # Should convert JPEG -> JPG via line 96 + assert result == ImageFormat.JPG + + +def test_is_image_path_non_string_input(): + assert is_image_path(123) is False + assert is_image_path(None) is False + assert is_image_path([]) is False + + +def test_is_base64_image_non_string_input(): + assert is_base64_image(123) is False + assert is_base64_image(None) is False + assert is_base64_image([]) is False + + +def test_is_base64_image_invalid_base64_decode(): + # String with valid base64 characters but incorrect padding that causes decode to fail + # Single '=' in middle of string is invalid base64 (padding only allowed at end) + invalid_base64 = "A" * 50 + "=" + "A" * 49 + "more text" + assert is_base64_image(invalid_base64) is False + + +def test_is_image_url_non_string_input(): + assert is_image_url(123) is False + assert is_image_url(None) is False + assert is_image_url([]) is False + + +# Tests for load_image_path_to_base64 + + +def test_load_image_path_to_base64_absolute_path(tmp_path): + # Create a test image file + img = PIL.Image.new("RGB", (1, 1), color="blue") + image_path = tmp_path / "test.png" + img.save(image_path) + + # Load with absolute path + result = load_image_path_to_base64(str(image_path)) + assert result is not None + assert len(result) > 0 + # Verify it's valid base64 + decoded = base64.b64decode(result) + assert len(decoded) > 0 + + +def test_load_image_path_to_base64_relative_with_base_path(tmp_path): + # Create a test image file + img = PIL.Image.new("RGB", (1, 1), color="green") + image_path = tmp_path / "subdir" / "test.png" + image_path.parent.mkdir(exist_ok=True) + img.save(image_path) + + # Load with relative path and base_path + result = load_image_path_to_base64("subdir/test.png", base_path=str(tmp_path)) + assert result is not None + assert len(result) > 0 + + +def test_load_image_path_to_base64_nonexistent_file(): + result = load_image_path_to_base64("/nonexistent/path/to/image.png") + assert result is None + + +def test_load_image_path_to_base64_relative_with_cwd_fallback(tmp_path, monkeypatch): + # Create test image in current working directory + + # Change to tmp_path as cwd + monkeypatch.chdir(tmp_path) + + img = PIL.Image.new("RGB", (1, 1), color="yellow") + image_path = tmp_path / "test_cwd.png" + img.save(image_path) + + # Use relative path without base_path - should fall back to cwd + result = load_image_path_to_base64("test_cwd.png") + assert result is not None + assert len(result) > 0 + + +def test_load_image_path_to_base64_base_path_fallback_to_cwd(tmp_path, monkeypatch): + # Test the case where base_path is provided but file isn't there, falls back to cwd + monkeypatch.chdir(tmp_path) + + # Create image in cwd + img = PIL.Image.new("RGB", (1, 1), color="red") + image_path = tmp_path / "test.png" + img.save(image_path) + + # Create a different base_path that doesn't have the image + wrong_base = tmp_path / "wrong" + wrong_base.mkdir() + + # Use relative path with wrong base_path - should fall back to cwd + result = load_image_path_to_base64("test.png", base_path=str(wrong_base)) + assert result is not None + assert len(result) > 0 + + +def test_load_image_path_to_base64_exception_handling(tmp_path): + # Create a directory (not a file) to trigger exception + dir_path = tmp_path / "directory" + dir_path.mkdir() + + result = load_image_path_to_base64(str(dir_path)) + assert result is None diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index db3c9c9e..7ad7a18c 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -11,25 +11,27 @@ from data_designer.engine.processing.utils import deserialize_json_values if TYPE_CHECKING: - from data_designer.engine.storage.image_storage import ImageStorageManager + from data_designer.engine.storage.multimedia_storage import MultimediaStorage class ImageCellGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorWithModel[ImageGenerationColumnConfig]): """Generator for image columns with optional disk persistence. - Behavior depends on whether image_storage_manager is set: - - If set (create mode): Saves images to disk and stores relative paths in dataframe + Behavior depends on whether multimedia storage is available via ResourceProvider: + - If available (create mode): Saves images to disk and stores relative paths in dataframe - If None (preview mode): Stores base64 directly in dataframe API is automatically detected based on the model name: - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API - All other models β†’ chat/completions API (default) - Attributes: - image_storage_manager: Optional image storage manager instance (set by dataset builder) + Storage is accessed via ResourceProvider.artifact_storage.multimedia_storage """ - image_storage_manager: ImageStorageManager | None = None + @property + def multimedia_storage(self) -> MultimediaStorage | None: + """Get multimedia storage from resource provider if available.""" + return self._resource_provider.artifact_storage.multimedia_storage @staticmethod def get_generation_strategy() -> GenerationStrategy: @@ -67,11 +69,9 @@ def generate(self, data: dict) -> dict: base64_images = self.model.generate_image(prompt=prompt) # Store in dataframe based on mode - if self.image_storage_manager: + if self.multimedia_storage: # Create mode: save each image to disk and store list of relative paths - relative_paths = [ - self.image_storage_manager.save_base64_image(base64_image) for base64_image in base64_images - ] + relative_paths = [self.multimedia_storage.save_base64_image(base64_image) for base64_image in base64_images] data[self.config.name] = relative_paths else: # Preview mode: store list of base64 strings directly diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py index 35e7d4f8..b5ffaae7 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py @@ -11,11 +11,12 @@ from pathlib import Path from typing import TYPE_CHECKING -from pydantic import BaseModel, field_validator, model_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from data_designer.config.utils.io_helpers import read_parquet_dataset from data_designer.config.utils.type_helpers import StrEnum, resolve_string_enum from data_designer.engine.dataset_builders.errors import ArtifactStorageError +from data_designer.engine.storage.multimedia_storage import MultimediaStorage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: @@ -38,12 +39,15 @@ class BatchStage(StrEnum): class ArtifactStorage(BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + artifact_path: Path | str dataset_name: str = "dataset" final_dataset_folder_name: str = FINAL_DATASET_FOLDER_NAME partial_results_folder_name: str = "tmp-partial-parquet-files" dropped_columns_folder_name: str = "dropped-columns-parquet-files" processors_outputs_folder_name: str = PROCESSORS_OUTPUTS_FOLDER_NAME + multimedia_storage: MultimediaStorage | None = Field(default=None, exclude=True) @property def artifact_path_exists(self) -> bool: @@ -116,6 +120,22 @@ def validate_folder_names(self): return self + def ensure_multimedia_storage(self) -> MultimediaStorage: + """Lazily create multimedia storage if not already present. + + Returns: + MultimediaStorage instance + + Note: + Creates storage with default settings (images_subdir="images", validate_images=True) + """ + if self.multimedia_storage is None: + self.multimedia_storage = MultimediaStorage( + base_path=self.base_dataset_path, + validate_images=True, + ) + return self.multimedia_storage + @staticmethod def mkdir_if_needed(path: Path | str) -> Path: """Create the directory if it does not exist.""" diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index 7a2962eb..ac4469eb 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -26,7 +26,6 @@ ColumnGeneratorWithModel, GenerationStrategy, ) -from data_designer.engine.column_generators.generators.image import ImageCellGenerator from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated from data_designer.engine.compiler import compile_data_designer_config from data_designer.engine.dataset_builders.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage @@ -41,7 +40,6 @@ from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry from data_designer.engine.resources.resource_provider import ResourceProvider -from data_designer.engine.storage.image_storage import ImageStorageManager from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: @@ -66,7 +64,6 @@ def __init__( self._resource_provider = resource_provider self._records_to_drop: set[int] = set() self._registry = registry or DataDesignerRegistry() - self._image_storage_manager: ImageStorageManager | None = None self._data_designer_config = compile_data_designer_config(data_designer_config, resource_provider) self._column_configs = compile_dataset_builder_column_configs(self._data_designer_config) @@ -98,11 +95,31 @@ def build( *, num_records: int, on_batch_complete: Callable[[Path], None] | None = None, + save_multimedia_to_disk: bool = True, ) -> Path: + """Build the dataset. + + Args: + num_records: Number of records to generate. + on_batch_complete: Optional callback function called when each batch completes. + save_multimedia_to_disk: Whether to save generated multimedia (images, audio, video) to disk. + If False, multimedia is stored directly in the DataFrame (e.g., images as base64). + Default is True. + + Returns: + Path to the generated dataset directory. + """ self._run_model_health_check_if_needed() self._run_mcp_tool_check_if_needed() self._write_builder_config() - self._initialize_image_storage_if_needed() + + # Ensure multimedia storage exists if needed + if save_multimedia_to_disk and self._has_image_columns(): + self.artifact_storage.ensure_multimedia_storage() + else: + # Disable storage for preview or when explicitly disabled + self.artifact_storage.multimedia_storage = None + generators = self._initialize_generators() start_time = time.perf_counter() group_id = uuid.uuid4().hex @@ -128,7 +145,7 @@ def build( def build_preview(self, *, num_records: int) -> pd.DataFrame: self._run_model_health_check_if_needed() self._run_mcp_tool_check_if_needed() - # Skip image storage initialization for preview - base64 will be stored directly in DataFrame + # Skip multimedia storage initialization for preview - base64 will be stored directly in DataFrame generators = self._initialize_generators() group_id = uuid.uuid4().hex @@ -155,26 +172,16 @@ def _has_image_columns(self) -> bool: return any(col.column_type == DataDesignerColumnType.IMAGE_GENERATION for col in self.single_column_configs) - def _initialize_image_storage_if_needed(self) -> None: - """Initialize image storage manager if dataset has image columns.""" - if self._has_image_columns(): - self._image_storage_manager = ImageStorageManager( - base_path=self.artifact_storage.base_dataset_path, images_subdir="images", validate_images=True - ) - def _initialize_generators(self) -> list[ColumnGenerator]: + """Initialize column generators. + + Generators access multimedia storage via ResourceProvider.artifact_storage.multimedia_storage + """ generators = [] for config in self._column_configs: generator_cls = self._registry.column_generators.get_for_config_type(type(config)) generator = generator_cls(config=config, resource_provider=self._resource_provider) - - # Inject image storage manager for image generators (if available) - # For preview mode, storage manager is None and base64 is stored directly - if isinstance(generator, ImageCellGenerator): - generator.image_storage_manager = self._image_storage_manager - generators.append(generator) - return generators def _write_builder_config(self) -> None: diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py index ad7ef0d5..820d512a 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from data_designer.engine.storage.image_storage import ImageFormat, ImageStorageManager +from data_designer.engine.storage.multimedia_storage import MultimediaStorage -__all__ = ["ImageFormat", "ImageStorageManager"] +__all__ = ["MultimediaStorage"] diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/multimedia_storage.py similarity index 80% rename from packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py rename to packages/data-designer-engine/src/data_designer/engine/storage/multimedia_storage.py index 22d4bf84..e40c0032 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/image_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/multimedia_storage.py @@ -5,28 +5,32 @@ import uuid from pathlib import Path -from typing import TYPE_CHECKING -from data_designer.config.utils.image_helpers import decode_base64_image, detect_image_format -from data_designer.lazy_heavy_imports import PIL +from data_designer.config.utils.image_helpers import decode_base64_image, detect_image_format, validate_image -if TYPE_CHECKING: - import PIL +IMAGES_SUBDIR = "images" -class ImageStorageManager: - """Manages disk storage of generated images. +class MultimediaStorage: + """Manages disk storage of generated multimedia content. + + Currently supports: + - Images (PNG, JPG, WEBP) + + Future support planned for: + - Audio + - Video Handles: - - Creating images directory + - Creating storage directories - Decoding base64 to bytes - - Detecting image format + - Detecting media format - Saving with UUID filenames - Returning relative paths """ - def __init__(self, base_path: Path, images_subdir: str = "images", validate_images: bool = True) -> None: - """Initialize image storage manager. + def __init__(self, base_path: Path, images_subdir: str = IMAGES_SUBDIR, validate_images: bool = True) -> None: + """Initialize multimedia storage manager. Args: base_path: Base directory for dataset @@ -88,12 +92,11 @@ def _validate_image(self, image_path: Path) -> None: ValueError: If image is corrupted or unreadable """ try: - with PIL.Image.open(image_path) as img: - img.verify() - except Exception as e: + validate_image(image_path) + except ValueError: # Clean up invalid file image_path.unlink(missing_ok=True) - raise ValueError(f"Saved image is invalid or corrupted: {e}") from e + raise def cleanup(self) -> None: """Clean up image directory (for preview mode).""" diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py new file mode 100644 index 00000000..7173ed2d --- /dev/null +++ b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py @@ -0,0 +1,121 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from unittest.mock import Mock, patch + +import pytest + +from data_designer.config.column_configs import ImageGenerationColumnConfig +from data_designer.engine.column_generators.generators.base import GenerationStrategy +from data_designer.engine.column_generators.generators.image import ImageCellGenerator +from data_designer.engine.processing.ginja.exceptions import UserTemplateError + + +@pytest.fixture +def stub_image_column_config(): + return ImageGenerationColumnConfig( + name="test_image", prompt="A {{ style }} image of {{ subject }}", model_alias="test_model" + ) + + +@pytest.fixture +def stub_base64_images() -> list[str]: + return ["base64_image_1", "base64_image_2"] + + +def test_image_cell_generator_generation_strategy( + stub_image_column_config: ImageGenerationColumnConfig, stub_resource_provider: None +) -> None: + generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) + assert generator.get_generation_strategy() == GenerationStrategy.CELL_BY_CELL + + +def test_image_cell_generator_multimedia_storage_property( + stub_image_column_config: ImageGenerationColumnConfig, stub_resource_provider: None +) -> None: + generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) + # Should return multimedia_storage from artifact_storage (None by default in stub) + assert generator.multimedia_storage is None + + +def test_image_cell_generator_generate_with_storage( + stub_image_column_config, stub_resource_provider, stub_base64_images +): + """Test generate with multimedia storage (create mode) - saves to disk.""" + # Setup mock multimedia storage + mock_storage = Mock() + mock_storage.save_base64_image.side_effect = ["images/uuid1.png", "images/uuid2.png"] + stub_resource_provider.artifact_storage.multimedia_storage = mock_storage + + with patch.object( + stub_resource_provider.model_registry.get_model.return_value, + "generate_image", + return_value=stub_base64_images, + ) as mock_generate: + generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) + data = generator.generate(data={"style": "photorealistic", "subject": "cat"}) + + # Check that column was added with relative paths + assert stub_image_column_config.name in data + assert data[stub_image_column_config.name] == ["images/uuid1.png", "images/uuid2.png"] + + # Verify model was called with rendered prompt + mock_generate.assert_called_once_with(prompt="A photorealistic image of cat") + + # Verify storage was called for each image + assert mock_storage.save_base64_image.call_count == 2 + mock_storage.save_base64_image.assert_any_call("base64_image_1") + mock_storage.save_base64_image.assert_any_call("base64_image_2") + + +def test_image_cell_generator_generate_without_storage( + stub_image_column_config, stub_resource_provider, stub_base64_images +): + """Test generate without multimedia storage (preview mode) - stores base64 directly.""" + # Ensure multimedia_storage is None (preview mode) + stub_resource_provider.artifact_storage.multimedia_storage = None + + with patch.object( + stub_resource_provider.model_registry.get_model.return_value, + "generate_image", + return_value=stub_base64_images, + ) as mock_generate: + generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) + data = generator.generate(data={"style": "watercolor", "subject": "dog"}) + + # Check that column was added with base64 data + assert stub_image_column_config.name in data + assert data[stub_image_column_config.name] == stub_base64_images + + # Verify model was called with rendered prompt + mock_generate.assert_called_once_with(prompt="A watercolor image of dog") + + +def test_image_cell_generator_missing_columns_error(stub_image_column_config, stub_resource_provider): + """Test that missing required columns raises ValueError.""" + generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) + + with pytest.raises(ValueError, match="columns.*missing"): + # Missing 'subject' column + generator.generate(data={"style": "photorealistic"}) + + +def test_image_cell_generator_empty_prompt_error(stub_resource_provider): + """Test that empty rendered prompt raises UserTemplateError.""" + # Create config with template that renders to empty string + config = ImageGenerationColumnConfig(name="test_image", prompt="{{ empty }}", model_alias="test_model") + + generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) + + with pytest.raises(UserTemplateError): + generator.generate(data={"empty": ""}) + + +def test_image_cell_generator_whitespace_only_prompt_error(stub_resource_provider): + """Test that whitespace-only rendered prompt raises ValueError.""" + config = ImageGenerationColumnConfig(name="test_image", prompt="{{ spaces }}", model_alias="test_model") + + generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) + + with pytest.raises(ValueError, match="empty"): + generator.generate(data={"spaces": " "}) diff --git a/packages/data-designer-engine/tests/engine/storage/__init__.py b/packages/data-designer-engine/tests/engine/storage/__init__.py new file mode 100644 index 00000000..e5725ea5 --- /dev/null +++ b/packages/data-designer-engine/tests/engine/storage/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py b/packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py new file mode 100644 index 00000000..ade76b5a --- /dev/null +++ b/packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py @@ -0,0 +1,182 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import base64 +import io + +# Explicitly import PIL.Image submodule to make it accessible as PIL.Image +# Python doesn't automatically import submodules when you import a package, +# so `import PIL` alone doesn't give you access to PIL.Image +import PIL.Image # noqa: E402 +import pytest + +from data_designer.engine.storage.multimedia_storage import IMAGES_SUBDIR, MultimediaStorage +from data_designer.lazy_heavy_imports import PIL + + +@pytest.fixture +def multimedia_storage(tmp_path): + """Create a MultimediaStorage instance with a temporary directory.""" + return MultimediaStorage(base_path=tmp_path) + + +@pytest.fixture +def sample_base64_png() -> str: + """Create a valid 1x1 PNG as base64.""" + img = PIL.Image.new("RGB", (1, 1), color="red") + buf = io.BytesIO() + img.save(buf, format="PNG") + png_bytes = buf.getvalue() + return base64.b64encode(png_bytes).decode() + + +@pytest.fixture +def sample_base64_jpg() -> str: + """Create a valid 1x1 JPEG as base64.""" + img = PIL.Image.new("RGB", (1, 1), color="blue") + buf = io.BytesIO() + img.save(buf, format="JPEG") + jpg_bytes = buf.getvalue() + return base64.b64encode(jpg_bytes).decode() + + +def test_multimedia_storage_init(tmp_path): + """Test MultimediaStorage initialization.""" + storage = MultimediaStorage(base_path=tmp_path) + assert storage.base_path == tmp_path + assert storage.images_dir == tmp_path / IMAGES_SUBDIR + assert storage.images_subdir == IMAGES_SUBDIR + assert storage.validate_images is True + # Should create images directory on init + assert storage.images_dir.exists() + + +def test_multimedia_storage_init_custom_subdir(tmp_path): + """Test MultimediaStorage initialization with custom subdirectory.""" + custom_subdir = "custom_images" + storage = MultimediaStorage(base_path=tmp_path, images_subdir=custom_subdir, validate_images=False) + assert storage.images_subdir == custom_subdir + assert storage.images_dir == tmp_path / custom_subdir + assert storage.validate_images is False + assert storage.images_dir.exists() + + +def test_save_base64_image_png(multimedia_storage, sample_base64_png): + """Test saving a PNG image from base64.""" + relative_path = multimedia_storage.save_base64_image(sample_base64_png) + + # Check return value format + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + assert relative_path.endswith(".png") + + # Check file exists on disk + full_path = multimedia_storage.base_path / relative_path + assert full_path.exists() + + # Verify file content + saved_bytes = full_path.read_bytes() + expected_bytes = base64.b64decode(sample_base64_png) + assert saved_bytes == expected_bytes + + +def test_save_base64_image_jpg(multimedia_storage, sample_base64_jpg): + """Test saving a JPEG image from base64.""" + relative_path = multimedia_storage.save_base64_image(sample_base64_jpg) + + # Check return value format + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + assert relative_path.endswith(".jpg") + + # Check file exists on disk + full_path = multimedia_storage.base_path / relative_path + assert full_path.exists() + + +def test_save_base64_image_with_data_uri(multimedia_storage, sample_base64_png): + """Test saving image from data URI format.""" + data_uri = f"data:image/png;base64,{sample_base64_png}" + relative_path = multimedia_storage.save_base64_image(data_uri) + + # Should successfully extract base64 and save + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + assert relative_path.endswith(".png") + + # Verify file exists and content is correct + full_path = multimedia_storage.base_path / relative_path + assert full_path.exists() + saved_bytes = full_path.read_bytes() + expected_bytes = base64.b64decode(sample_base64_png) + assert saved_bytes == expected_bytes + + +def test_save_base64_image_invalid_base64_raises_error(multimedia_storage): + """Test that invalid base64 data raises ValueError.""" + with pytest.raises(ValueError, match="Invalid base64"): + multimedia_storage.save_base64_image("not-valid-base64!!!") + + +def test_save_base64_image_multiple_images_unique_filenames(multimedia_storage, sample_base64_png): + """Test that multiple images get unique filenames.""" + path1 = multimedia_storage.save_base64_image(sample_base64_png) + path2 = multimedia_storage.save_base64_image(sample_base64_png) + + # Paths should be different (different UUIDs) + assert path1 != path2 + + # Both files should exist + assert (multimedia_storage.base_path / path1).exists() + assert (multimedia_storage.base_path / path2).exists() + + +def test_save_base64_image_validation_enabled(tmp_path, sample_base64_png): + """Test that validation is performed when enabled.""" + storage = MultimediaStorage(base_path=tmp_path, validate_images=True) + # Should succeed with valid image + relative_path = storage.save_base64_image(sample_base64_png) + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + + +def test_save_base64_image_validation_corrupted_image_raises_error(tmp_path): + """Test that corrupted image fails validation and is cleaned up.""" + storage = MultimediaStorage(base_path=tmp_path, validate_images=True) + + # Create base64 of invalid image data + corrupted_bytes = b"not a valid image" + corrupted_base64 = base64.b64encode(corrupted_bytes).decode() + + with pytest.raises(ValueError, match="Image validation failed"): + storage.save_base64_image(corrupted_base64) + + # Check that no files were left behind + assert len(list(storage.images_dir.iterdir())) == 0 + + +def test_save_base64_image_validation_disabled(tmp_path): + """Test that validation can be disabled.""" + storage = MultimediaStorage(base_path=tmp_path, validate_images=False) + + # Create base64 of invalid image data + corrupted_bytes = b"not a valid image" + corrupted_base64 = base64.b64encode(corrupted_bytes).decode() + + # Should succeed without validation + relative_path = storage.save_base64_image(corrupted_base64) + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + + # File should exist even though it's invalid + full_path = storage.base_path / relative_path + assert full_path.exists() + + +def test_cleanup(multimedia_storage, sample_base64_png): + """Test cleanup removes images directory.""" + # Save an image first + multimedia_storage.save_base64_image(sample_base64_png) + assert multimedia_storage.images_dir.exists() + assert len(list(multimedia_storage.images_dir.iterdir())) > 0 + + # Cleanup should remove directory + multimedia_storage.cleanup() + assert not multimedia_storage.images_dir.exists() From 2aae6ccd6f09064feddc6b14d1faa15dd5c5e417 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 6 Feb 2026 17:30:10 -0700 Subject: [PATCH 26/64] streamline generation --- .../config/utils/image_helpers.py | 8 +- .../config/utils/visualization.py | 28 +-- .../src/data_designer/lazy_heavy_imports.py | 1 + .../tests/config/utils/test_image_helpers.py | 24 +-- .../column_generators/generators/image.py | 31 ++- .../dataset_builders/artifact_storage.py | 28 ++- .../dataset_builders/column_wise_builder.py | 18 +- .../data_designer/engine/storage/__init__.py | 4 +- ...multimedia_storage.py => media_storage.py} | 63 ++++-- .../generators/test_image.py | 24 +-- .../dataset_builders/test_artifact_storage.py | 7 +- .../engine/storage/test_media_storage.py | 174 +++++++++++++++++ .../engine/storage/test_multimedia_storage.py | 182 ------------------ .../tests/engine/test_configurable_task.py | 33 +--- 14 files changed, 301 insertions(+), 324 deletions(-) rename packages/data-designer-engine/src/data_designer/engine/storage/{multimedia_storage.py => media_storage.py} (56%) create mode 100644 packages/data-designer-engine/tests/engine/storage/test_media_storage.py delete mode 100644 packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 1f5ec332..67803aff 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -11,10 +11,10 @@ from typing import TYPE_CHECKING from data_designer.config.models import ImageFormat -from data_designer.lazy_heavy_imports import PIL +from data_designer.lazy_heavy_imports import Image if TYPE_CHECKING: - import PIL + from PIL import Image # Magic bytes for image format detection IMAGE_FORMAT_MAGIC_BYTES = { @@ -90,7 +90,7 @@ def detect_image_format(image_bytes: bytes) -> ImageFormat: # Fallback to PIL for robust detection try: - img = PIL.Image.open(io.BytesIO(image_bytes)) + img = Image.open(io.BytesIO(image_bytes)) format_str = img.format.lower() if img.format else None if format_str in [ImageFormat.PNG, ImageFormat.JPG, ImageFormat.JPEG, ImageFormat.WEBP]: return ImageFormat(format_str if format_str != ImageFormat.JPEG else ImageFormat.JPG) @@ -201,7 +201,7 @@ def validate_image(image_path: Path) -> None: ValueError: If image is corrupted or unreadable """ try: - with PIL.Image.open(image_path) as img: + with Image.open(image_path) as img: img.verify() except Exception as e: raise ValueError(f"Image validation failed: {e}") from e diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index c349ec86..6a9e8ee5 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -3,8 +3,6 @@ from __future__ import annotations -import base64 -import io import json import os from collections import OrderedDict @@ -35,12 +33,11 @@ is_image_url, load_image_path_to_base64, ) -from data_designer.lazy_heavy_imports import PIL, np, pd +from data_designer.lazy_heavy_imports import np, pd if TYPE_CHECKING: import numpy as np import pandas as pd - import PIL from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.dataset_metadata import DatasetMetadata @@ -49,15 +46,12 @@ console = Console() -def _display_image_if_in_notebook( - image_data: str, col_name: str, max_width: int = 512, base_path: str | None = None -) -> bool: +def _display_image_if_in_notebook(image_data: str, col_name: str, base_path: str | None = None) -> bool: """Display image with caption in Jupyter notebook if available. Args: image_data: Base64-encoded image data, data URI, or file path. col_name: Name of the column (used for caption). - max_width: Maximum width for the displayed image in pixels. base_path: Optional base path to resolve relative image paths. Returns: @@ -83,27 +77,15 @@ def _display_image_if_in_notebook( # Extract base64 from data URI if present base64_data = extract_base64_from_data_uri(base64_data) - image_bytes = base64.b64decode(base64_data) - # Open image with PIL and resize if needed - img = PIL.Image.open(io.BytesIO(image_bytes)) - - # Resize if image is too large - if img.width > max_width: - ratio = max_width / img.width - new_height = int(img.height * ratio) - img = img.resize((max_width, new_height), PIL.Image.Resampling.LANCZOS) - - # Convert back to base64 for HTML display - buffered = io.BytesIO() - img.save(buffered, format=img.format or "PNG") - img_base64 = base64.b64encode(buffered.getvalue()).decode() + # Use the base64 data directly without resizing + img_base64 = base64_data # Create HTML with caption and image in left-aligned container html = f"""
πŸ–ΌοΈ {col_name}
- +
""" display(HTML(html)) diff --git a/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py b/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py index f7901a7c..0e95f248 100644 --- a/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py +++ b/packages/data-designer-config/src/data_designer/lazy_heavy_imports.py @@ -36,6 +36,7 @@ "scipy": "scipy", "jsonschema": "jsonschema", "PIL": "PIL", + "Image": "PIL.Image", } diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index 9c7ccd7f..e0eb0370 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -5,13 +5,8 @@ import base64 import io -from typing import TYPE_CHECKING from unittest.mock import Mock, patch -# Explicitly import PIL.Image submodule to make it accessible as PIL.Image -# Python doesn't automatically import submodules when you import a package, -# so `import PIL` alone doesn't give you access to PIL.Image -import PIL.Image # noqa: E402 import pytest from data_designer.config.models import ImageFormat @@ -26,10 +21,7 @@ load_image_path_to_base64, validate_image, ) -from data_designer.lazy_heavy_imports import PIL - -if TYPE_CHECKING: - import PIL +from data_designer.lazy_heavy_imports import Image # Tests for extract_base64_from_data_uri @@ -157,7 +149,7 @@ def test_is_image_url_non_http(): def test_validate_image_valid_png(tmp_path): # Create a valid 1x1 PNG using PIL - img = PIL.Image.new("RGB", (1, 1), color="red") + img = Image.new("RGB", (1, 1), color="red") buf = io.BytesIO() img.save(buf, format="PNG") png_bytes = buf.getvalue() @@ -200,7 +192,7 @@ def test_get_supported_image_extensions_matches_enum(): def test_detect_image_format_with_pil_fallback_unsupported_format(tmp_path): # Create a real GIF image that will trigger PIL fallback # (GIF has different magic bytes not in our fast-path detection) - img = PIL.Image.new("RGB", (1, 1), color="red") + img = Image.new("RGB", (1, 1), color="red") gif_path = tmp_path / "test.gif" img.save(gif_path, format="GIF") @@ -219,7 +211,7 @@ def test_detect_image_format_with_pil_fallback_jpeg(): # Use bytes that don't match our magic bytes to trigger PIL fallback test_bytes = b"\x00\x00\x00\x00" - with patch.object(PIL.Image, "open", return_value=mock_img): + with patch.object(Image, "open", return_value=mock_img): result = detect_image_format(test_bytes) # Should convert JPEG -> JPG via line 96 assert result == ImageFormat.JPG @@ -255,7 +247,7 @@ def test_is_image_url_non_string_input(): def test_load_image_path_to_base64_absolute_path(tmp_path): # Create a test image file - img = PIL.Image.new("RGB", (1, 1), color="blue") + img = Image.new("RGB", (1, 1), color="blue") image_path = tmp_path / "test.png" img.save(image_path) @@ -270,7 +262,7 @@ def test_load_image_path_to_base64_absolute_path(tmp_path): def test_load_image_path_to_base64_relative_with_base_path(tmp_path): # Create a test image file - img = PIL.Image.new("RGB", (1, 1), color="green") + img = Image.new("RGB", (1, 1), color="green") image_path = tmp_path / "subdir" / "test.png" image_path.parent.mkdir(exist_ok=True) img.save(image_path) @@ -292,7 +284,7 @@ def test_load_image_path_to_base64_relative_with_cwd_fallback(tmp_path, monkeypa # Change to tmp_path as cwd monkeypatch.chdir(tmp_path) - img = PIL.Image.new("RGB", (1, 1), color="yellow") + img = Image.new("RGB", (1, 1), color="yellow") image_path = tmp_path / "test_cwd.png" img.save(image_path) @@ -307,7 +299,7 @@ def test_load_image_path_to_base64_base_path_fallback_to_cwd(tmp_path, monkeypat monkeypatch.chdir(tmp_path) # Create image in cwd - img = PIL.Image.new("RGB", (1, 1), color="red") + img = Image.new("RGB", (1, 1), color="red") image_path = tmp_path / "test.png" img.save(image_path) diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 7ad7a18c..41586e4b 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -11,27 +11,27 @@ from data_designer.engine.processing.utils import deserialize_json_values if TYPE_CHECKING: - from data_designer.engine.storage.multimedia_storage import MultimediaStorage + from data_designer.engine.storage.media_storage import MediaStorage class ImageCellGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorWithModel[ImageGenerationColumnConfig]): - """Generator for image columns with optional disk persistence. + """Generator for image columns with disk or dataframe persistence. - Behavior depends on whether multimedia storage is available via ResourceProvider: - - If available (create mode): Saves images to disk and stores relative paths in dataframe - - If None (preview mode): Stores base64 directly in dataframe + Media storage always exists and determines behavior via its mode: + - DISK mode (create): Saves images to disk and stores relative paths in dataframe + - DATAFRAME mode (preview): Stores base64 directly in dataframe API is automatically detected based on the model name: - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API - All other models β†’ chat/completions API (default) - Storage is accessed via ResourceProvider.artifact_storage.multimedia_storage + Storage is accessed via ResourceProvider.artifact_storage.media_storage """ @property - def multimedia_storage(self) -> MultimediaStorage | None: - """Get multimedia storage from resource provider if available.""" - return self._resource_provider.artifact_storage.multimedia_storage + def media_storage(self) -> MediaStorage: + """Get media storage from resource provider.""" + return self._resource_provider.artifact_storage.media_storage @staticmethod def get_generation_strategy() -> GenerationStrategy: @@ -68,13 +68,10 @@ def generate(self, data: dict) -> dict: # Generate images (returns list of base64 strings) base64_images = self.model.generate_image(prompt=prompt) - # Store in dataframe based on mode - if self.multimedia_storage: - # Create mode: save each image to disk and store list of relative paths - relative_paths = [self.multimedia_storage.save_base64_image(base64_image) for base64_image in base64_images] - data[self.config.name] = relative_paths - else: - # Preview mode: store list of base64 strings directly - data[self.config.name] = base64_images + # Store via media storage (mode determines disk vs dataframe storage) + # TODO: MediaStorage will check its mode (DISK/DATAFRAME) and act accordingly + # For now, always saves to disk - need to implement mode system + results = [self.media_storage.save_base64_image(base64_image) for base64_image in base64_images] + data[self.config.name] = results return data diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py index b5ffaae7..a7316be3 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py @@ -16,7 +16,7 @@ from data_designer.config.utils.io_helpers import read_parquet_dataset from data_designer.config.utils.type_helpers import StrEnum, resolve_string_enum from data_designer.engine.dataset_builders.errors import ArtifactStorageError -from data_designer.engine.storage.multimedia_storage import MultimediaStorage +from data_designer.engine.storage.media_storage import MediaStorage, StorageMode from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: @@ -47,7 +47,7 @@ class ArtifactStorage(BaseModel): partial_results_folder_name: str = "tmp-partial-parquet-files" dropped_columns_folder_name: str = "dropped-columns-parquet-files" processors_outputs_folder_name: str = PROCESSORS_OUTPUTS_FOLDER_NAME - multimedia_storage: MultimediaStorage | None = Field(default=None, exclude=True) + media_storage: MediaStorage = Field(default=None, exclude=True) @property def artifact_path_exists(self) -> bool: @@ -118,23 +118,21 @@ def validate_folder_names(self): if any(char in invalid_chars for char in name): raise ArtifactStorageError(f"πŸ›‘ Directory name '{name}' contains invalid characters.") - return self + # Initialize media storage with DISK mode by default + self.media_storage = MediaStorage( + base_path=self.base_dataset_path, + mode=StorageMode.DISK, + ) - def ensure_multimedia_storage(self) -> MultimediaStorage: - """Lazily create multimedia storage if not already present. + return self - Returns: - MultimediaStorage instance + def set_media_storage_mode(self, mode: StorageMode) -> None: + """Set media storage mode. - Note: - Creates storage with default settings (images_subdir="images", validate_images=True) + Args: + mode: StorageMode.DISK (save to disk) or StorageMode.DATAFRAME (store in memory) """ - if self.multimedia_storage is None: - self.multimedia_storage = MultimediaStorage( - base_path=self.base_dataset_path, - validate_images=True, - ) - return self.multimedia_storage + self.media_storage.mode = mode @staticmethod def mkdir_if_needed(path: Path | str) -> Path: diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index ac4469eb..6802f805 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -40,6 +40,7 @@ from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.media_storage import StorageMode from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: @@ -113,12 +114,10 @@ def build( self._run_mcp_tool_check_if_needed() self._write_builder_config() - # Ensure multimedia storage exists if needed - if save_multimedia_to_disk and self._has_image_columns(): - self.artifact_storage.ensure_multimedia_storage() - else: - # Disable storage for preview or when explicitly disabled - self.artifact_storage.multimedia_storage = None + # Set media storage mode based on parameters + if self._has_image_columns(): + mode = StorageMode.DISK if save_multimedia_to_disk else StorageMode.DATAFRAME + self.artifact_storage.set_media_storage_mode(mode) generators = self._initialize_generators() start_time = time.perf_counter() @@ -145,7 +144,10 @@ def build( def build_preview(self, *, num_records: int) -> pd.DataFrame: self._run_model_health_check_if_needed() self._run_mcp_tool_check_if_needed() - # Skip multimedia storage initialization for preview - base64 will be stored directly in DataFrame + + # Set media storage to DATAFRAME mode for preview - base64 stored directly in DataFrame + if self._has_image_columns(): + self.artifact_storage.set_media_storage_mode(StorageMode.DATAFRAME) generators = self._initialize_generators() group_id = uuid.uuid4().hex @@ -175,7 +177,7 @@ def _has_image_columns(self) -> bool: def _initialize_generators(self) -> list[ColumnGenerator]: """Initialize column generators. - Generators access multimedia storage via ResourceProvider.artifact_storage.multimedia_storage + Generators access media storage via ResourceProvider.artifact_storage.media_storage """ generators = [] for config in self._column_configs: diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py index 820d512a..34c776d5 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from data_designer.engine.storage.multimedia_storage import MultimediaStorage +from data_designer.engine.storage.media_storage import MediaStorage, StorageMode -__all__ = ["MultimediaStorage"] +__all__ = ["MediaStorage", "StorageMode"] diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/multimedia_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py similarity index 56% rename from packages/data-designer-engine/src/data_designer/engine/storage/multimedia_storage.py rename to packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py index e40c0032..ddac3459 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/multimedia_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py @@ -3,16 +3,29 @@ from __future__ import annotations +import shutil import uuid from pathlib import Path from data_designer.config.utils.image_helpers import decode_base64_image, detect_image_format, validate_image +from data_designer.config.utils.type_helpers import StrEnum IMAGES_SUBDIR = "images" -class MultimediaStorage: - """Manages disk storage of generated multimedia content. +class StorageMode(StrEnum): + """Storage mode for generated media content. + + - DISK: Save media to disk and store relative paths in dataframe (for dataset creation) + - DATAFRAME: Store base64 data directly in dataframe (for preview mode) + """ + + DISK = "disk" + DATAFRAME = "dataframe" + + +class MediaStorage: + """Manages storage of generated media content. Currently supports: - Images (PNG, JPG, WEBP) @@ -21,45 +34,60 @@ class MultimediaStorage: - Audio - Video + Storage modes: + - DISK: Save media to disk and return relative paths (for dataset creation) + - DATAFRAME: Return base64 data directly (for preview mode) + Handles: - Creating storage directories - Decoding base64 to bytes - Detecting media format - - Saving with UUID filenames - - Returning relative paths + - Saving with UUID filenames (DISK mode) + - Returning relative paths or base64 data based on mode + - Always validates images to ensure data quality """ - def __init__(self, base_path: Path, images_subdir: str = IMAGES_SUBDIR, validate_images: bool = True) -> None: - """Initialize multimedia storage manager. + def __init__( + self, base_path: Path, images_subdir: str = IMAGES_SUBDIR, mode: StorageMode = StorageMode.DISK + ) -> None: + """Initialize media storage manager. Args: base_path: Base directory for dataset images_subdir: Subdirectory name for images (default: "images") - validate_images: Whether to validate images after saving (default: True) + mode: Storage mode - DISK (save to disk) or DATAFRAME (return base64) """ self.base_path = Path(base_path) self.images_dir = self.base_path / images_subdir self.images_subdir = images_subdir - self.validate_images = validate_images - self._ensure_images_directory() + self.mode = mode def _ensure_images_directory(self) -> None: - """Create images directory if it doesn't exist.""" + """Create images directory if it doesn't exist (lazy initialization).""" self.images_dir.mkdir(parents=True, exist_ok=True) def save_base64_image(self, base64_data: str) -> str: - """Save base64 image to disk and return relative path. + """Save or return base64 image based on storage mode. Args: base64_data: Base64 encoded image string (with or without data URI prefix) Returns: - Relative path to saved image (e.g., "images/f47ac10b-58cc.png") + DISK mode: Relative path to saved image (e.g., "images/f47ac10b-58cc.png") + DATAFRAME mode: Original base64 data string Raises: - ValueError: If base64 data is invalid - OSError: If disk write fails + ValueError: If base64 data is invalid (DISK mode only) + OSError: If disk write fails (DISK mode only) """ + # DATAFRAME mode: return base64 directly without disk operations + if self.mode == StorageMode.DATAFRAME: + return base64_data + + # DISK mode: save to disk, validate, and return relative path + # Ensure images directory exists (lazy initialization) + self._ensure_images_directory() + # Decode base64 to bytes image_bytes = decode_base64_image(base64_data) @@ -76,9 +104,8 @@ def save_base64_image(self, base64_data: str) -> str: with open(full_path, "wb") as f: f.write(image_bytes) - # Optional validation - if self.validate_images: - self._validate_image(full_path) + # Always validate in DISK mode to ensure data quality + self._validate_image(full_path) return relative_path @@ -100,7 +127,5 @@ def _validate_image(self, image_path: Path) -> None: def cleanup(self) -> None: """Clean up image directory (for preview mode).""" - import shutil - if self.images_dir.exists(): shutil.rmtree(self.images_dir) diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py index 7173ed2d..e7055d67 100644 --- a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py +++ b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py @@ -30,22 +30,22 @@ def test_image_cell_generator_generation_strategy( assert generator.get_generation_strategy() == GenerationStrategy.CELL_BY_CELL -def test_image_cell_generator_multimedia_storage_property( +def test_image_cell_generator_media_storage_property( stub_image_column_config: ImageGenerationColumnConfig, stub_resource_provider: None ) -> None: generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) - # Should return multimedia_storage from artifact_storage (None by default in stub) - assert generator.multimedia_storage is None + # Should return media_storage from artifact_storage (always exists) + assert generator.media_storage is not None def test_image_cell_generator_generate_with_storage( stub_image_column_config, stub_resource_provider, stub_base64_images ): - """Test generate with multimedia storage (create mode) - saves to disk.""" - # Setup mock multimedia storage + """Test generate with media storage (create mode) - saves to disk.""" + # Setup mock media storage mock_storage = Mock() mock_storage.save_base64_image.side_effect = ["images/uuid1.png", "images/uuid2.png"] - stub_resource_provider.artifact_storage.multimedia_storage = mock_storage + stub_resource_provider.artifact_storage.media_storage = mock_storage with patch.object( stub_resource_provider.model_registry.get_model.return_value, @@ -68,12 +68,14 @@ def test_image_cell_generator_generate_with_storage( mock_storage.save_base64_image.assert_any_call("base64_image_2") -def test_image_cell_generator_generate_without_storage( +def test_image_cell_generator_generate_in_dataframe_mode( stub_image_column_config, stub_resource_provider, stub_base64_images ): - """Test generate without multimedia storage (preview mode) - stores base64 directly.""" - # Ensure multimedia_storage is None (preview mode) - stub_resource_provider.artifact_storage.multimedia_storage = None + """Test generate with media storage in DATAFRAME mode - stores base64 directly.""" + # Mock save_base64_image to return base64 directly (simulating DATAFRAME mode) + mock_storage = Mock() + mock_storage.save_base64_image.side_effect = stub_base64_images + stub_resource_provider.artifact_storage.media_storage = mock_storage with patch.object( stub_resource_provider.model_registry.get_model.return_value, @@ -83,7 +85,7 @@ def test_image_cell_generator_generate_without_storage( generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) data = generator.generate(data={"style": "watercolor", "subject": "dog"}) - # Check that column was added with base64 data + # Check that column was added with base64 data (simulating DATAFRAME mode) assert stub_image_column_config.name in data assert data[stub_image_column_config.name] == stub_base64_images diff --git a/packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py b/packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py index df15b4f7..35edf892 100644 --- a/packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py +++ b/packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py @@ -213,10 +213,11 @@ def test_artifact_storage_resolved_dataset_name(mock_datetime, tmp_path): (af_storage.artifact_path / af_storage.dataset_name).mkdir() assert af_storage.resolved_dataset_name == "dataset" - # dataset path exists and is not empty + # dataset path exists and is not empty (create file BEFORE constructing ArtifactStorage) + dataset_dir = tmp_path / "dataset" + dataset_dir.mkdir(exist_ok=True) + (dataset_dir / "stub_file.txt").touch() af_storage = ArtifactStorage(artifact_path=tmp_path) - (af_storage.artifact_path / af_storage.dataset_name / "stub_file.txt").touch() - print(af_storage.resolved_dataset_name) assert af_storage.resolved_dataset_name == "dataset_01-01-2025_120304" diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py new file mode 100644 index 00000000..abd17afe --- /dev/null +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -0,0 +1,174 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import base64 +import io + +import pytest + +from data_designer.engine.storage.media_storage import IMAGES_SUBDIR, MediaStorage, StorageMode +from data_designer.lazy_heavy_imports import Image + + +@pytest.fixture +def media_storage(tmp_path): + """Create a MediaStorage instance with a temporary directory.""" + return MediaStorage(base_path=tmp_path) + + +@pytest.fixture +def sample_base64_png() -> str: + """Create a valid 1x1 PNG as base64.""" + img = Image.new("RGB", (1, 1), color="red") + buf = io.BytesIO() + img.save(buf, format="PNG") + png_bytes = buf.getvalue() + return base64.b64encode(png_bytes).decode() + + +@pytest.fixture +def sample_base64_jpg() -> str: + """Create a valid 1x1 JPEG as base64.""" + img = Image.new("RGB", (1, 1), color="blue") + buf = io.BytesIO() + img.save(buf, format="JPEG") + jpg_bytes = buf.getvalue() + return base64.b64encode(jpg_bytes).decode() + + +def test_media_storage_init(tmp_path): + """Test MediaStorage initialization.""" + storage = MediaStorage(base_path=tmp_path) + assert storage.base_path == tmp_path + assert storage.images_dir == tmp_path / IMAGES_SUBDIR + assert storage.images_subdir == IMAGES_SUBDIR + assert storage.mode == StorageMode.DISK + # Directory should NOT exist until first save (lazy initialization) + assert not storage.images_dir.exists() + + +def test_media_storage_init_custom_subdir(tmp_path): + """Test MediaStorage initialization with custom subdirectory and mode.""" + custom_subdir = "custom_images" + storage = MediaStorage(base_path=tmp_path, images_subdir=custom_subdir, mode=StorageMode.DATAFRAME) + assert storage.images_subdir == custom_subdir + assert storage.images_dir == tmp_path / custom_subdir + assert storage.mode == StorageMode.DATAFRAME + # Directory should NOT exist until first save (lazy initialization) + assert not storage.images_dir.exists() + + +def test_save_base64_image_png(media_storage, sample_base64_png): + """Test saving a PNG image from base64.""" + relative_path = media_storage.save_base64_image(sample_base64_png) + + # Check return value format + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + assert relative_path.endswith(".png") + + # Check file exists on disk + full_path = media_storage.base_path / relative_path + assert full_path.exists() + + # Verify file content + saved_bytes = full_path.read_bytes() + expected_bytes = base64.b64decode(sample_base64_png) + assert saved_bytes == expected_bytes + + +def test_save_base64_image_jpg(media_storage, sample_base64_jpg): + """Test saving a JPEG image from base64.""" + relative_path = media_storage.save_base64_image(sample_base64_jpg) + + # Check return value format + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + assert relative_path.endswith(".jpg") + + # Check file exists on disk + full_path = media_storage.base_path / relative_path + assert full_path.exists() + + +def test_save_base64_image_with_data_uri(media_storage, sample_base64_png): + """Test saving image from data URI format.""" + data_uri = f"data:image/png;base64,{sample_base64_png}" + relative_path = media_storage.save_base64_image(data_uri) + + # Should successfully extract base64 and save + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + assert relative_path.endswith(".png") + + # Verify file exists and content is correct + full_path = media_storage.base_path / relative_path + assert full_path.exists() + saved_bytes = full_path.read_bytes() + expected_bytes = base64.b64decode(sample_base64_png) + assert saved_bytes == expected_bytes + + +def test_save_base64_image_invalid_base64_raises_error(media_storage): + """Test that invalid base64 data raises ValueError.""" + with pytest.raises(ValueError, match="Invalid base64"): + media_storage.save_base64_image("not-valid-base64!!!") + + +def test_save_base64_image_multiple_images_unique_filenames(media_storage, sample_base64_png): + """Test that multiple images get unique filenames.""" + path1 = media_storage.save_base64_image(sample_base64_png) + path2 = media_storage.save_base64_image(sample_base64_png) + + # Paths should be different (different UUIDs) + assert path1 != path2 + + # Both files should exist + assert (media_storage.base_path / path1).exists() + assert (media_storage.base_path / path2).exists() + + +def test_save_base64_image_disk_mode_validates(tmp_path, sample_base64_png): + """Test that DISK mode validates images.""" + storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DISK) + # Should succeed with valid image + relative_path = storage.save_base64_image(sample_base64_png) + assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + + +def test_save_base64_image_disk_mode_corrupted_image_raises_error(tmp_path): + """Test that DISK mode validates and rejects corrupted images.""" + storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DISK) + + # Create base64 of invalid image data + corrupted_bytes = b"not a valid image" + corrupted_base64 = base64.b64encode(corrupted_bytes).decode() + + with pytest.raises(ValueError, match="Image validation failed"): + storage.save_base64_image(corrupted_base64) + + # Check that no files were left behind (cleanup on validation failure) + assert len(list(storage.images_dir.iterdir())) == 0 + + +def test_save_base64_image_dataframe_mode_returns_base64(tmp_path, sample_base64_png): + """Test that DATAFRAME mode returns base64 directly without disk operations.""" + storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DATAFRAME) + + # Should return the same base64 data + result = storage.save_base64_image(sample_base64_png) + assert result == sample_base64_png + + # Directory should not be created in DATAFRAME mode (lazy initialization) + assert not storage.images_dir.exists() + + +def test_cleanup(media_storage, sample_base64_png): + """Test cleanup removes images directory.""" + # Save an image first + media_storage.save_base64_image(sample_base64_png) + assert media_storage.images_dir.exists() + assert len(list(media_storage.images_dir.iterdir())) > 0 + + # Cleanup should remove directory + media_storage.cleanup() + assert not media_storage.images_dir.exists() diff --git a/packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py b/packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py deleted file mode 100644 index ade76b5a..00000000 --- a/packages/data-designer-engine/tests/engine/storage/test_multimedia_storage.py +++ /dev/null @@ -1,182 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import base64 -import io - -# Explicitly import PIL.Image submodule to make it accessible as PIL.Image -# Python doesn't automatically import submodules when you import a package, -# so `import PIL` alone doesn't give you access to PIL.Image -import PIL.Image # noqa: E402 -import pytest - -from data_designer.engine.storage.multimedia_storage import IMAGES_SUBDIR, MultimediaStorage -from data_designer.lazy_heavy_imports import PIL - - -@pytest.fixture -def multimedia_storage(tmp_path): - """Create a MultimediaStorage instance with a temporary directory.""" - return MultimediaStorage(base_path=tmp_path) - - -@pytest.fixture -def sample_base64_png() -> str: - """Create a valid 1x1 PNG as base64.""" - img = PIL.Image.new("RGB", (1, 1), color="red") - buf = io.BytesIO() - img.save(buf, format="PNG") - png_bytes = buf.getvalue() - return base64.b64encode(png_bytes).decode() - - -@pytest.fixture -def sample_base64_jpg() -> str: - """Create a valid 1x1 JPEG as base64.""" - img = PIL.Image.new("RGB", (1, 1), color="blue") - buf = io.BytesIO() - img.save(buf, format="JPEG") - jpg_bytes = buf.getvalue() - return base64.b64encode(jpg_bytes).decode() - - -def test_multimedia_storage_init(tmp_path): - """Test MultimediaStorage initialization.""" - storage = MultimediaStorage(base_path=tmp_path) - assert storage.base_path == tmp_path - assert storage.images_dir == tmp_path / IMAGES_SUBDIR - assert storage.images_subdir == IMAGES_SUBDIR - assert storage.validate_images is True - # Should create images directory on init - assert storage.images_dir.exists() - - -def test_multimedia_storage_init_custom_subdir(tmp_path): - """Test MultimediaStorage initialization with custom subdirectory.""" - custom_subdir = "custom_images" - storage = MultimediaStorage(base_path=tmp_path, images_subdir=custom_subdir, validate_images=False) - assert storage.images_subdir == custom_subdir - assert storage.images_dir == tmp_path / custom_subdir - assert storage.validate_images is False - assert storage.images_dir.exists() - - -def test_save_base64_image_png(multimedia_storage, sample_base64_png): - """Test saving a PNG image from base64.""" - relative_path = multimedia_storage.save_base64_image(sample_base64_png) - - # Check return value format - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") - assert relative_path.endswith(".png") - - # Check file exists on disk - full_path = multimedia_storage.base_path / relative_path - assert full_path.exists() - - # Verify file content - saved_bytes = full_path.read_bytes() - expected_bytes = base64.b64decode(sample_base64_png) - assert saved_bytes == expected_bytes - - -def test_save_base64_image_jpg(multimedia_storage, sample_base64_jpg): - """Test saving a JPEG image from base64.""" - relative_path = multimedia_storage.save_base64_image(sample_base64_jpg) - - # Check return value format - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") - assert relative_path.endswith(".jpg") - - # Check file exists on disk - full_path = multimedia_storage.base_path / relative_path - assert full_path.exists() - - -def test_save_base64_image_with_data_uri(multimedia_storage, sample_base64_png): - """Test saving image from data URI format.""" - data_uri = f"data:image/png;base64,{sample_base64_png}" - relative_path = multimedia_storage.save_base64_image(data_uri) - - # Should successfully extract base64 and save - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") - assert relative_path.endswith(".png") - - # Verify file exists and content is correct - full_path = multimedia_storage.base_path / relative_path - assert full_path.exists() - saved_bytes = full_path.read_bytes() - expected_bytes = base64.b64decode(sample_base64_png) - assert saved_bytes == expected_bytes - - -def test_save_base64_image_invalid_base64_raises_error(multimedia_storage): - """Test that invalid base64 data raises ValueError.""" - with pytest.raises(ValueError, match="Invalid base64"): - multimedia_storage.save_base64_image("not-valid-base64!!!") - - -def test_save_base64_image_multiple_images_unique_filenames(multimedia_storage, sample_base64_png): - """Test that multiple images get unique filenames.""" - path1 = multimedia_storage.save_base64_image(sample_base64_png) - path2 = multimedia_storage.save_base64_image(sample_base64_png) - - # Paths should be different (different UUIDs) - assert path1 != path2 - - # Both files should exist - assert (multimedia_storage.base_path / path1).exists() - assert (multimedia_storage.base_path / path2).exists() - - -def test_save_base64_image_validation_enabled(tmp_path, sample_base64_png): - """Test that validation is performed when enabled.""" - storage = MultimediaStorage(base_path=tmp_path, validate_images=True) - # Should succeed with valid image - relative_path = storage.save_base64_image(sample_base64_png) - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") - - -def test_save_base64_image_validation_corrupted_image_raises_error(tmp_path): - """Test that corrupted image fails validation and is cleaned up.""" - storage = MultimediaStorage(base_path=tmp_path, validate_images=True) - - # Create base64 of invalid image data - corrupted_bytes = b"not a valid image" - corrupted_base64 = base64.b64encode(corrupted_bytes).decode() - - with pytest.raises(ValueError, match="Image validation failed"): - storage.save_base64_image(corrupted_base64) - - # Check that no files were left behind - assert len(list(storage.images_dir.iterdir())) == 0 - - -def test_save_base64_image_validation_disabled(tmp_path): - """Test that validation can be disabled.""" - storage = MultimediaStorage(base_path=tmp_path, validate_images=False) - - # Create base64 of invalid image data - corrupted_bytes = b"not a valid image" - corrupted_base64 = base64.b64encode(corrupted_bytes).decode() - - # Should succeed without validation - relative_path = storage.save_base64_image(corrupted_base64) - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") - - # File should exist even though it's invalid - full_path = storage.base_path / relative_path - assert full_path.exists() - - -def test_cleanup(multimedia_storage, sample_base64_png): - """Test cleanup removes images directory.""" - # Save an image first - multimedia_storage.save_base64_image(sample_base64_png) - assert multimedia_storage.images_dir.exists() - assert len(list(multimedia_storage.images_dir.iterdir())) > 0 - - # Cleanup should remove directory - multimedia_storage.cleanup() - assert not multimedia_storage.images_dir.exists() diff --git a/packages/data-designer-engine/tests/engine/test_configurable_task.py b/packages/data-designer-engine/tests/engine/test_configurable_task.py index f20936a2..6e3673de 100644 --- a/packages/data-designer-engine/tests/engine/test_configurable_task.py +++ b/packages/data-designer-engine/tests/engine/test_configurable_task.py @@ -25,7 +25,7 @@ def test_configurable_task_generic_type_variables() -> None: assert TaskConfigT.__bound__ == ConfigBase -def test_configurable_task_concrete_implementation() -> None: +def test_configurable_task_concrete_implementation(tmp_path) -> None: class TestConfig(ConfigBase): value: str @@ -41,13 +41,8 @@ def _initialize(self) -> None: pass config = TestConfig(value="test") - mock_artifact_storage = Mock(spec=ArtifactStorage) - mock_artifact_storage.dataset_name = "test_dataset" - mock_artifact_storage.final_dataset_folder_name = "final_dataset" - mock_artifact_storage.partial_results_folder_name = "partial_results" - mock_artifact_storage.dropped_columns_folder_name = "dropped_columns" - mock_artifact_storage.processors_outputs_folder_name = "processors_outputs" - resource_provider = ResourceProvider(artifact_storage=mock_artifact_storage) + artifact_storage = ArtifactStorage(artifact_path=tmp_path) + resource_provider = ResourceProvider(artifact_storage=artifact_storage) task = TestTask(config=config, resource_provider=resource_provider) @@ -55,7 +50,7 @@ def _initialize(self) -> None: assert task._resource_provider == resource_provider -def test_configurable_task_config_validation() -> None: +def test_configurable_task_config_validation(tmp_path) -> None: class TestConfig(ConfigBase): value: str @@ -69,13 +64,8 @@ def _validate(self) -> None: raise ValueError("Invalid config") config = TestConfig(value="test") - mock_artifact_storage = Mock(spec=ArtifactStorage) - mock_artifact_storage.dataset_name = "test_dataset" - mock_artifact_storage.final_dataset_folder_name = "final_dataset" - mock_artifact_storage.partial_results_folder_name = "partial_results" - mock_artifact_storage.dropped_columns_folder_name = "dropped_columns" - mock_artifact_storage.processors_outputs_folder_name = "processors_outputs" - resource_provider = ResourceProvider(artifact_storage=mock_artifact_storage) + artifact_storage = ArtifactStorage(artifact_path=tmp_path) + resource_provider = ResourceProvider(artifact_storage=artifact_storage) task = TestTask(config=config, resource_provider=resource_provider) assert task._config.value == "test" @@ -85,7 +75,7 @@ def _validate(self) -> None: TestTask(config=invalid_config, resource_provider=resource_provider) -def test_configurable_task_resource_validation() -> None: +def test_configurable_task_resource_validation(tmp_path) -> None: class TestConfig(ConfigBase): value: str @@ -102,14 +92,9 @@ def _initialize(self) -> None: config = TestConfig(value="test") - mock_artifact_storage = Mock(spec=ArtifactStorage) - mock_artifact_storage.dataset_name = "test_dataset" - mock_artifact_storage.final_dataset_folder_name = "final_dataset" - mock_artifact_storage.partial_results_folder_name = "partial_results" - mock_artifact_storage.dropped_columns_folder_name = "dropped_columns" - mock_artifact_storage.processors_outputs_folder_name = "processors_outputs" + artifact_storage = ArtifactStorage(artifact_path=tmp_path) mock_model_registry = Mock(spec=ModelRegistry) - resource_provider = ResourceProvider(artifact_storage=mock_artifact_storage, model_registry=mock_model_registry) + resource_provider = ResourceProvider(artifact_storage=artifact_storage, model_registry=mock_model_registry) task = TestTask(config=config, resource_provider=resource_provider) assert task._resource_provider == resource_provider From 1677f066e5a228c418d558633ece69969cd7d122 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 10:17:32 -0700 Subject: [PATCH 27/64] track images generated in usage --- .../config/utils/image_helpers.py | 25 ++ .../tests/config/utils/test_image_helpers.py | 27 ++ .../src/data_designer/engine/models/facade.py | 403 ++++++++---------- .../data_designer/engine/models/registry.py | 4 + .../src/data_designer/engine/models/usage.py | 23 +- .../tests/engine/models/test_facade.py | 147 +++++++ .../tests/engine/models/test_usage.py | 60 ++- 7 files changed, 457 insertions(+), 232 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 67803aff..2069d9bf 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -23,6 +23,31 @@ # WEBP uses RIFF header - handled separately } +# Patterns for detecting diffusion-based image generation models (DALL-E, Stable Diffusion, Imagen, etc.) +_IMAGE_DIFFUSION_MODEL_PATTERNS = ( + "dall-e", + "dalle", + "stable-diffusion", + "sd-", + "sd_", + "imagen", +) + + +def is_image_diffusion_model(model_name: str) -> bool: + """Return True if the model is a diffusion-based image generation model. + + Diffusion models use the image_generation API (e.g. DALL-E, Stable Diffusion, Imagen). + All other image models are assumed to use the chat/completions API. + + Args: + model_name: Model name or identifier (e.g. from provider). + + Returns: + True if the model is detected as diffusion-based, False otherwise. + """ + return any(pattern in model_name.lower() for pattern in _IMAGE_DIFFUSION_MODEL_PATTERNS) + def extract_base64_from_data_uri(data: str) -> str: """Extract base64 from data URI or return as-is. diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index e0eb0370..aa1ca451 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -16,6 +16,7 @@ extract_base64_from_data_uri, get_supported_image_extensions, is_base64_image, + is_image_diffusion_model, is_image_path, is_image_url, load_image_path_to_base64, @@ -144,6 +145,32 @@ def test_is_image_url_non_http(): assert is_image_url("ftp://example.com/image.png") is False +# Tests for is_image_diffusion_model + + +def test_is_image_diffusion_model_dall_e(): + assert is_image_diffusion_model("dall-e-3") is True + assert is_image_diffusion_model("DALL-E-2") is True + assert is_image_diffusion_model("openai/dalle-2") is True + + +def test_is_image_diffusion_model_stable_diffusion(): + assert is_image_diffusion_model("stable-diffusion-xl") is True + assert is_image_diffusion_model("sd-2.1") is True + assert is_image_diffusion_model("sd_1.5") is True + + +def test_is_image_diffusion_model_imagen(): + assert is_image_diffusion_model("imagen-3") is True + assert is_image_diffusion_model("google/imagen") is True + + +def test_is_image_diffusion_model_chat_completion_image_models(): + assert is_image_diffusion_model("gemini-3-pro-image-preview") is False + assert is_image_diffusion_model("gpt-5-image") is False + assert is_image_diffusion_model("flux.2-pro") is False + + # Tests for validate_image diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index d13273f4..11f6e9ec 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -9,7 +9,10 @@ from typing import TYPE_CHECKING, Any from data_designer.config.models import GenerationType, ModelConfig, ModelProvider -from data_designer.config.utils.image_helpers import extract_base64_from_data_uri +from data_designer.config.utils.image_helpers import ( + extract_base64_from_data_uri, + is_image_diffusion_model, +) from data_designer.engine.mcp.errors import MCPConfigurationError from data_designer.engine.model_provider import ModelProviderRegistry from data_designer.engine.models.errors import ( @@ -20,7 +23,7 @@ ) from data_designer.engine.models.litellm_overrides import CustomRouter, LiteLLMRouterDefaultKwargs from data_designer.engine.models.parsers.errors import ParserException -from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats +from data_designer.engine.models.usage import ImageUsageStats, ModelUsageStats, RequestUsageStats, TokenUsageStats from data_designer.engine.models.utils import ChatMessage, prompt_to_messages from data_designer.engine.secret_resolver import SecretResolver from data_designer.lazy_heavy_imports import litellm @@ -39,16 +42,6 @@ def _identity(x: Any) -> Any: logger = logging.getLogger(__name__) -# Patterns for detecting diffusion-based image generation models -DIFFUSION_MODEL_PATTERNS = [ - "dall-e", - "dalle", - "stable-diffusion", - "sd-", - "sd_", - "imagen", -] - class ModelFacade: def __init__( @@ -117,7 +110,7 @@ def completion( raise e finally: if not skip_usage_tracking and response is not None: - self._track_usage(response) + self._track_token_usage_from_completion(response) def consolidate_kwargs(self, **kwargs) -> dict[str, Any]: # Remove purpose from kwargs to avoid passing it to the model @@ -129,16 +122,153 @@ def consolidate_kwargs(self, **kwargs) -> dict[str, Any]: kwargs["extra_headers"] = self.model_provider.extra_headers return kwargs - def _get_mcp_facade(self, tool_alias: str | None) -> MCPFacade | None: - if tool_alias is None: - return None - if self._mcp_registry is None: - raise MCPConfigurationError(f"Tool alias {tool_alias!r} specified but no MCPRegistry configured.") + @catch_llm_exceptions + def generate( + self, + prompt: str, + *, + parser: Callable[[str], Any] = _identity, + system_prompt: str | None = None, + multi_modal_context: list[dict[str, Any]] | None = None, + tool_alias: str | None = None, + max_correction_steps: int = 0, + max_conversation_restarts: int = 0, + skip_usage_tracking: bool = False, + purpose: str | None = None, + **kwargs, + ) -> tuple[Any, list[ChatMessage]]: + """Generate a parsed output with correction steps. - try: - return self._mcp_registry.get_mcp(tool_alias=tool_alias) - except ValueError as exc: - raise MCPConfigurationError(f"Tool alias {tool_alias!r} is not registered.") from exc + This generation call will attempt to generate an output which is + valid according to the specified parser, where "valid" implies + that the parser can process the LLM response without raising + an exception. + + `ParserExceptions` are routed back + to the LLM as new rounds in the conversation, where the LLM is provided its + earlier response along with the "user" role responding with the exception string + (not traceback). This will continue for the number of rounds specified by + `max_correction_steps`. + + Args: + prompt (str): Task prompt. + system_prompt (str, optional): Optional system instructions. If not specified, + no system message is provided and the model should use its default system + prompt. + parser (func(str) -> Any): A function applied to the LLM response which processes + an LLM response into some output object. Default: identity function. + tool_alias (str | None): Optional tool configuration alias. When provided, + the model may call permitted tools from the configured MCP providers. + The alias must reference a ToolConfig registered in the MCPRegistry. + max_correction_steps (int): Maximum number of correction rounds permitted + within a single conversation. Note, many rounds can lead to increasing + context size without necessarily improving performance -- small language + models can enter repeated cycles which will not be solved with more steps. + Default: `0` (no correction). + max_conversation_restarts (int): Maximum number of full conversation restarts permitted + if generation fails. Default: `0` (no restarts). + skip_usage_tracking (bool): Whether to skip usage tracking. Default: `False`. + purpose (str): The purpose of the model usage to show as context in the error message. + It is expected to be used by the @catch_llm_exceptions decorator. + **kwargs: Additional arguments to pass to the model. + + Returns: + A tuple containing: + - The parsed output object from the parser. + - The full trace of ChatMessage entries in the conversation, including any tool calls, + corrections, and reasoning traces. Callers can decide whether to store this. + + Raises: + GenerationValidationFailureError: If the maximum number of retries or + correction steps are met and the last response failures on + generation validation. + MCPConfigurationError: If tool_alias is specified but no MCPRegistry is configured. + """ + output_obj = None + tool_schemas = None + tool_call_turns = 0 + total_tool_calls = 0 + curr_num_correction_steps = 0 + curr_num_restarts = 0 + + mcp_facade = self._get_mcp_facade(tool_alias) + + # Checkpoint for restarts - updated after tool calls so we don't repeat them + restart_checkpoint = prompt_to_messages( + user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context + ) + checkpoint_tool_call_turns = 0 + messages: list[ChatMessage] = deepcopy(restart_checkpoint) + + if mcp_facade is not None: + tool_schemas = mcp_facade.get_tool_schemas() + + while True: + completion_kwargs = dict(kwargs) + if tool_schemas is not None: + completion_kwargs["tools"] = tool_schemas + + completion_response = self.completion( + messages, + skip_usage_tracking=skip_usage_tracking, + **completion_kwargs, + ) + + # Process any tool calls in the response (handles parallel tool calling) + if mcp_facade is not None and mcp_facade.has_tool_calls(completion_response): + tool_call_turns += 1 + total_tool_calls += mcp_facade.tool_call_count(completion_response) + + if tool_call_turns > mcp_facade.max_tool_call_turns: + # Gracefully refuse tool calls when budget is exhausted + messages.extend(mcp_facade.refuse_completion_response(completion_response)) + else: + messages.extend(mcp_facade.process_completion_response(completion_response)) + + # Update checkpoint so restarts don't repeat tool calls + restart_checkpoint = deepcopy(messages) + checkpoint_tool_call_turns = tool_call_turns + + continue # Back to top + + # No tool calls remaining to process + response = completion_response.choices[0].message.content or "" + reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None) + messages.append(ChatMessage.as_assistant(content=response, reasoning_content=reasoning_trace or None)) + curr_num_correction_steps += 1 + + try: + output_obj = parser(response) # type: ignore - if not a string will cause a ParserException below + break + except ParserException as exc: + if max_correction_steps == 0 and max_conversation_restarts == 0: + raise GenerationValidationFailureError( + "Unsuccessful generation attempt. No retries were attempted." + ) from exc + + if curr_num_correction_steps <= max_correction_steps: + # Add user message with error for correction + messages.append(ChatMessage.as_user(content=str(get_exception_primary_cause(exc)))) + + elif curr_num_restarts < max_conversation_restarts: + curr_num_correction_steps = 0 + curr_num_restarts += 1 + messages = deepcopy(restart_checkpoint) + tool_call_turns = checkpoint_tool_call_turns + + else: + raise GenerationValidationFailureError( + f"Unsuccessful generation despite {max_correction_steps} correction steps " + f"and {max_conversation_restarts} conversation restarts." + ) from exc + + if not skip_usage_tracking and mcp_facade is not None: + self._usage_stats.tool_usage.extend( + tool_calls=total_tool_calls, + tool_call_turns=tool_call_turns, + ) + + return output_obj, messages @catch_llm_exceptions def generate_text_embeddings( @@ -171,7 +301,7 @@ def generate_text_embeddings( raise e finally: if not skip_usage_tracking and response is not None: - self._track_usage_from_embedding(response) + self._track_token_usage_from_embedding(response) @catch_llm_exceptions def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: @@ -201,22 +331,27 @@ def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwarg ) # Auto-detect API type based on model name - if self._is_diffusion_model(): - return self._generate_image_diffusion(prompt, skip_usage_tracking, **kwargs) + if is_image_diffusion_model(self.model_name): + images = self._generate_image_diffusion(prompt, skip_usage_tracking, **kwargs) else: - return self._generate_image_chat_completion(prompt, skip_usage_tracking, **kwargs) + images = self._generate_image_chat_completion(prompt, skip_usage_tracking, **kwargs) - def _is_diffusion_model(self) -> bool: - """Detect if model uses diffusion API based on name patterns. + # Track image usage + if not skip_usage_tracking and len(images) > 0: + self._usage_stats.extend(image_usage=ImageUsageStats(total_images=len(images))) - Diffusion models include DALL-E, Stable Diffusion, and Imagen variants. - All other image models are assumed to use chat completions API. + return images - Returns: - True if model is detected as diffusion-based, False otherwise - """ - model_lower = self.model_name.lower() - return any(pattern in model_lower for pattern in DIFFUSION_MODEL_PATTERNS) + def _get_mcp_facade(self, tool_alias: str | None) -> MCPFacade | None: + if tool_alias is None: + return None + if self._mcp_registry is None: + raise MCPConfigurationError(f"Tool alias {tool_alias!r} specified but no MCPRegistry configured.") + + try: + return self._mcp_registry.get_mcp(tool_alias=tool_alias) + except ValueError as exc: + raise MCPConfigurationError(f"Tool alias {tool_alias!r} is not registered.") from exc def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: """Generate image(s) using autoregressive model via chat completions API. @@ -311,155 +446,7 @@ def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = Fal raise finally: if not skip_usage_tracking and response is not None: - self._track_usage_from_image_diffusion(response) - - @catch_llm_exceptions - def generate( - self, - prompt: str, - *, - parser: Callable[[str], Any] = _identity, - system_prompt: str | None = None, - multi_modal_context: list[dict[str, Any]] | None = None, - tool_alias: str | None = None, - max_correction_steps: int = 0, - max_conversation_restarts: int = 0, - skip_usage_tracking: bool = False, - purpose: str | None = None, - **kwargs, - ) -> tuple[Any, list[ChatMessage]]: - """Generate a parsed output with correction steps. - - This generation call will attempt to generate an output which is - valid according to the specified parser, where "valid" implies - that the parser can process the LLM response without raising - an exception. - - `ParserExceptions` are routed back - to the LLM as new rounds in the conversation, where the LLM is provided its - earlier response along with the "user" role responding with the exception string - (not traceback). This will continue for the number of rounds specified by - `max_correction_steps`. - - Args: - prompt (str): Task prompt. - system_prompt (str, optional): Optional system instructions. If not specified, - no system message is provided and the model should use its default system - prompt. - parser (func(str) -> Any): A function applied to the LLM response which processes - an LLM response into some output object. Default: identity function. - tool_alias (str | None): Optional tool configuration alias. When provided, - the model may call permitted tools from the configured MCP providers. - The alias must reference a ToolConfig registered in the MCPRegistry. - max_correction_steps (int): Maximum number of correction rounds permitted - within a single conversation. Note, many rounds can lead to increasing - context size without necessarily improving performance -- small language - models can enter repeated cycles which will not be solved with more steps. - Default: `0` (no correction). - max_conversation_restarts (int): Maximum number of full conversation restarts permitted - if generation fails. Default: `0` (no restarts). - skip_usage_tracking (bool): Whether to skip usage tracking. Default: `False`. - purpose (str): The purpose of the model usage to show as context in the error message. - It is expected to be used by the @catch_llm_exceptions decorator. - **kwargs: Additional arguments to pass to the model. - - Returns: - A tuple containing: - - The parsed output object from the parser. - - The full trace of ChatMessage entries in the conversation, including any tool calls, - corrections, and reasoning traces. Callers can decide whether to store this. - - Raises: - GenerationValidationFailureError: If the maximum number of retries or - correction steps are met and the last response failures on - generation validation. - MCPConfigurationError: If tool_alias is specified but no MCPRegistry is configured. - """ - output_obj = None - tool_schemas = None - tool_call_turns = 0 - total_tool_calls = 0 - curr_num_correction_steps = 0 - curr_num_restarts = 0 - - mcp_facade = self._get_mcp_facade(tool_alias) - - # Checkpoint for restarts - updated after tool calls so we don't repeat them - restart_checkpoint = prompt_to_messages( - user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context - ) - checkpoint_tool_call_turns = 0 - messages: list[ChatMessage] = deepcopy(restart_checkpoint) - - if mcp_facade is not None: - tool_schemas = mcp_facade.get_tool_schemas() - - while True: - completion_kwargs = dict(kwargs) - if tool_schemas is not None: - completion_kwargs["tools"] = tool_schemas - - completion_response = self.completion( - messages, - skip_usage_tracking=skip_usage_tracking, - **completion_kwargs, - ) - - # Process any tool calls in the response (handles parallel tool calling) - if mcp_facade is not None and mcp_facade.has_tool_calls(completion_response): - tool_call_turns += 1 - total_tool_calls += mcp_facade.tool_call_count(completion_response) - - if tool_call_turns > mcp_facade.max_tool_call_turns: - # Gracefully refuse tool calls when budget is exhausted - messages.extend(mcp_facade.refuse_completion_response(completion_response)) - else: - messages.extend(mcp_facade.process_completion_response(completion_response)) - - # Update checkpoint so restarts don't repeat tool calls - restart_checkpoint = deepcopy(messages) - checkpoint_tool_call_turns = tool_call_turns - - continue # Back to top - - # No tool calls remaining to process - response = completion_response.choices[0].message.content or "" - reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None) - messages.append(ChatMessage.as_assistant(content=response, reasoning_content=reasoning_trace or None)) - curr_num_correction_steps += 1 - - try: - output_obj = parser(response) # type: ignore - if not a string will cause a ParserException below - break - except ParserException as exc: - if max_correction_steps == 0 and max_conversation_restarts == 0: - raise GenerationValidationFailureError( - "Unsuccessful generation attempt. No retries were attempted." - ) from exc - - if curr_num_correction_steps <= max_correction_steps: - # Add user message with error for correction - messages.append(ChatMessage.as_user(content=str(get_exception_primary_cause(exc)))) - - elif curr_num_restarts < max_conversation_restarts: - curr_num_correction_steps = 0 - curr_num_restarts += 1 - messages = deepcopy(restart_checkpoint) - tool_call_turns = checkpoint_tool_call_turns - - else: - raise GenerationValidationFailureError( - f"Unsuccessful generation despite {max_correction_steps} correction steps " - f"and {max_conversation_restarts} conversation restarts." - ) from exc - - if not skip_usage_tracking and mcp_facade is not None: - self._usage_stats.tool_usage.extend( - tool_calls=total_tool_calls, - tool_call_turns=tool_call_turns, - ) - - return output_obj, messages + self._track_token_usage_from_image_diffusion(response) def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict: provider = self._model_provider_registry.get_provider(model_config.provider) @@ -478,7 +465,7 @@ def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.Deployme "litellm_params": litellm_params.model_dump(), } - def _track_usage(self, response: litellm.types.utils.ModelResponse | None) -> None: + def _track_token_usage_from_completion(self, response: litellm.types.utils.ModelResponse | None) -> None: if response is None: self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) return @@ -495,7 +482,7 @@ def _track_usage(self, response: litellm.types.utils.ModelResponse | None) -> No request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) - def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingResponse | None) -> None: + def _track_token_usage_from_embedding(self, response: litellm.types.utils.EmbeddingResponse | None) -> None: if response is None: self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) return @@ -508,27 +495,12 @@ def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingRes request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) - def _track_usage_from_response(self, response: litellm.types.utils.ResponseResponse | None) -> None: - """Track usage from Responses API response.""" + def _track_token_usage_from_image_diffusion(self, response: litellm.types.utils.ImageResponse | None) -> None: + """Track token usage from image_generation API response.""" if response is None: self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) return - if response.usage is not None: - input_tokens = getattr(response.usage, "input_tokens", 0) or 0 - output_tokens = getattr(response.usage, "output_tokens", 0) or 0 - self._usage_stats.extend( - token_usage=TokenUsageStats( - input_tokens=input_tokens, - output_tokens=output_tokens, - ), - request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), - ) - def _track_usage_from_image_diffusion(self, response: litellm.types.utils.ImageResponse | None) -> None: - """Track usage from image_generation API response.""" - if response is None: - self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1)) - return if response.usage is not None and isinstance(response.usage, litellm.types.utils.ImageUsage): self._usage_stats.extend( token_usage=TokenUsageStats( @@ -537,28 +509,3 @@ def _track_usage_from_image_diffusion(self, response: litellm.types.utils.ImageR ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) - - def _download_url_to_base64(self, url: str) -> str: - """Download image from URL and convert to base64. - - Args: - url: Image URL - - Returns: - Base64-encoded image string - - Raises: - ModelAPIError: If download fails - """ - import base64 - - from data_designer.lazy_heavy_imports import httpx - - try: - with httpx.Client(timeout=30.0) as client: - response = client.get(url) - response.raise_for_status() - image_bytes = response.content - return base64.b64encode(image_bytes).decode("utf-8") - except Exception as e: - raise ModelAPIError(f"Failed to download image from URL {url}: {e}") from e diff --git a/packages/data-designer-engine/src/data_designer/engine/models/registry.py b/packages/data-designer-engine/src/data_designer/engine/models/registry.py index 56945941..2878f64e 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/registry.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/registry.py @@ -120,6 +120,10 @@ def log_model_usage(self, total_time_elapsed: float) -> None: f"turns={tool_usage['total_tool_call_turns']}" ) + if image_usage := stats.get("image_usage"): + total_images = image_usage["total_images"] + logger.info(f"{LOG_INDENT}images: total={total_images}") + if model_index < len(sorted_model_names) - 1: logger.info(LOG_INDENT.rstrip()) diff --git a/packages/data-designer-engine/src/data_designer/engine/models/usage.py b/packages/data-designer-engine/src/data_designer/engine/models/usage.py index f44a31ae..169ef1bb 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/usage.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/usage.py @@ -71,10 +71,23 @@ def merge(self, other: ToolUsageStats) -> ToolUsageStats: return self +class ImageUsageStats(BaseModel): + total_images: int = 0 + + @property + def has_usage(self) -> bool: + return self.total_images > 0 + + def extend(self, *, images: int) -> None: + """Extend stats with generated images count.""" + self.total_images += images + + class ModelUsageStats(BaseModel): token_usage: TokenUsageStats = TokenUsageStats() request_usage: RequestUsageStats = RequestUsageStats() tool_usage: ToolUsageStats = ToolUsageStats() + image_usage: ImageUsageStats = ImageUsageStats() @property def has_usage(self) -> bool: @@ -86,6 +99,7 @@ def extend( token_usage: TokenUsageStats | None = None, request_usage: RequestUsageStats | None = None, tool_usage: ToolUsageStats | None = None, + image_usage: ImageUsageStats | None = None, ) -> None: if token_usage is not None: self.token_usage.extend(input_tokens=token_usage.input_tokens, output_tokens=token_usage.output_tokens) @@ -95,9 +109,16 @@ def extend( ) if tool_usage is not None: self.tool_usage.merge(tool_usage) + if image_usage is not None: + self.image_usage.extend(images=image_usage.total_images) def get_usage_stats(self, *, total_time_elapsed: float) -> dict: - exclude = {"tool_usage"} if not self.tool_usage.has_usage else None + exclude = set() + if not self.tool_usage.has_usage: + exclude.add("tool_usage") + if not self.image_usage.has_usage: + exclude.add("image_usage") + exclude = exclude if exclude else None return self.model_dump(exclude=exclude) | { "tokens_per_second": int(self.token_usage.total_tokens / total_time_elapsed) if total_time_elapsed > 0 diff --git a/packages/data-designer-engine/tests/engine/models/test_facade.py b/packages/data-designer-engine/tests/engine/models/test_facade.py index c0ab9cd3..78473d63 100644 --- a/packages/data-designer-engine/tests/engine/models/test_facade.py +++ b/packages/data-designer-engine/tests/engine/models/test_facade.py @@ -989,3 +989,150 @@ def _completion(self: Any, messages: list[ChatMessage], **kwargs: Any) -> StubRe with patch.object(ModelFacade, "completion", new=_completion): with pytest.raises(MCPToolError, match="Invalid tool arguments"): model.generate(prompt="question", parser=lambda x: x, tool_alias="tools") + + +# ============================================================================= +# Image generation tests +# ============================================================================= + + +@patch("data_designer.engine.models.facade.CustomRouter.image_generation", autospec=True) +def test_generate_image_diffusion_tracks_image_usage( + mock_image_generation: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image tracks image usage for diffusion models.""" + from litellm.types.utils import ImageObject, ImageResponse + + # Mock response with 3 images + mock_response = ImageResponse( + data=[ + ImageObject(b64_json="image1_base64"), + ImageObject(b64_json="image2_base64"), + ImageObject(b64_json="image3_base64"), + ] + ) + mock_image_generation.return_value = mock_response + + # Verify initial state + assert stub_model_facade.usage_stats.image_usage.total_images == 0 + + # Generate images + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=True): + images = stub_model_facade.generate_image(prompt="test prompt", n=3) + + # Verify results + assert len(images) == 3 + assert images == ["image1_base64", "image2_base64", "image3_base64"] + + # Verify image usage was tracked + assert stub_model_facade.usage_stats.image_usage.total_images == 3 + assert stub_model_facade.usage_stats.image_usage.has_usage is True + + +@patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True) +def test_generate_image_chat_completion_tracks_image_usage( + mock_completion: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image tracks image usage for chat completion models.""" + from litellm.types.utils import Choices, ImageURLListItem, Message, ModelResponse + + # Mock response with images attribute (Message requires type and index per ImageURLListItem) + mock_message = Message( + role="assistant", + content="", + images=[ + ImageURLListItem(type="image_url", image_url={"url": "data:image/png;base64,image1"}, index=0), + ImageURLListItem(type="image_url", image_url={"url": "data:image/png;base64,image2"}, index=1), + ], + ) + mock_response = ModelResponse(choices=[Choices(message=mock_message)]) + mock_completion.return_value = mock_response + + # Verify initial state + assert stub_model_facade.usage_stats.image_usage.total_images == 0 + + # Generate images + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=False): + images = stub_model_facade.generate_image(prompt="test prompt") + + # Verify results + assert len(images) == 2 + assert images == ["image1", "image2"] + + # Verify image usage was tracked + assert stub_model_facade.usage_stats.image_usage.total_images == 2 + assert stub_model_facade.usage_stats.image_usage.has_usage is True + + +@patch("data_designer.engine.models.facade.CustomRouter.image_generation", autospec=True) +def test_generate_image_skip_usage_tracking( + mock_image_generation: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image respects skip_usage_tracking flag.""" + from litellm.types.utils import ImageObject, ImageResponse + + mock_response = ImageResponse( + data=[ + ImageObject(b64_json="image1_base64"), + ImageObject(b64_json="image2_base64"), + ] + ) + mock_image_generation.return_value = mock_response + + # Verify initial state + assert stub_model_facade.usage_stats.image_usage.total_images == 0 + + # Generate images with skip_usage_tracking=True + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=True): + images = stub_model_facade.generate_image(prompt="test prompt", skip_usage_tracking=True) + + # Verify results + assert len(images) == 2 + + # Verify image usage was NOT tracked + assert stub_model_facade.usage_stats.image_usage.total_images == 0 + assert stub_model_facade.usage_stats.image_usage.has_usage is False + + +@patch("data_designer.engine.models.facade.CustomRouter.image_generation", autospec=True) +def test_generate_image_accumulates_usage( + mock_image_generation: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image accumulates image usage across multiple calls.""" + from litellm.types.utils import ImageObject, ImageResponse + + # First call - 2 images + mock_response1 = ImageResponse( + data=[ + ImageObject(b64_json="image1"), + ImageObject(b64_json="image2"), + ] + ) + # Second call - 3 images + mock_response2 = ImageResponse( + data=[ + ImageObject(b64_json="image3"), + ImageObject(b64_json="image4"), + ImageObject(b64_json="image5"), + ] + ) + mock_image_generation.side_effect = [mock_response1, mock_response2] + + # Verify initial state + assert stub_model_facade.usage_stats.image_usage.total_images == 0 + + # First generation + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=True): + images1 = stub_model_facade.generate_image(prompt="test1") + assert len(images1) == 2 + assert stub_model_facade.usage_stats.image_usage.total_images == 2 + + # Second generation + images2 = stub_model_facade.generate_image(prompt="test2") + assert len(images2) == 3 + # Usage should accumulate + assert stub_model_facade.usage_stats.image_usage.total_images == 5 diff --git a/packages/data-designer-engine/tests/engine/models/test_usage.py b/packages/data-designer-engine/tests/engine/models/test_usage.py index 8e7adb04..2c4f783f 100644 --- a/packages/data-designer-engine/tests/engine/models/test_usage.py +++ b/packages/data-designer-engine/tests/engine/models/test_usage.py @@ -1,7 +1,13 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats, ToolUsageStats +from data_designer.engine.models.usage import ( + ImageUsageStats, + ModelUsageStats, + RequestUsageStats, + TokenUsageStats, + ToolUsageStats, +) def test_token_usage_stats() -> None: @@ -32,6 +38,20 @@ def test_request_usage_stats() -> None: assert request_usage_stats.has_usage is True +def test_image_usage_stats() -> None: + image_usage_stats = ImageUsageStats() + assert image_usage_stats.total_images == 0 + assert image_usage_stats.has_usage is False + + image_usage_stats.extend(images=5) + assert image_usage_stats.total_images == 5 + assert image_usage_stats.has_usage is True + + image_usage_stats.extend(images=3) + assert image_usage_stats.total_images == 8 + assert image_usage_stats.has_usage is True + + def test_tool_usage_stats_empty_state() -> None: """Test ToolUsageStats initialization with empty state.""" tool_usage = ToolUsageStats() @@ -132,9 +152,10 @@ def test_model_usage_stats() -> None: assert model_usage_stats.token_usage.output_tokens == 0 assert model_usage_stats.request_usage.successful_requests == 0 assert model_usage_stats.request_usage.failed_requests == 0 + assert model_usage_stats.image_usage.total_images == 0 assert model_usage_stats.has_usage is False - # tool_usage is excluded when has_usage is False + # tool_usage and image_usage are excluded when has_usage is False assert model_usage_stats.get_usage_stats(total_time_elapsed=10) == { "token_usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}, "request_usage": {"successful_requests": 0, "failed_requests": 0, "total_requests": 0}, @@ -152,7 +173,7 @@ def test_model_usage_stats() -> None: assert model_usage_stats.request_usage.failed_requests == 1 assert model_usage_stats.has_usage is True - # tool_usage is excluded when has_usage is False + # tool_usage and image_usage are excluded when has_usage is False assert model_usage_stats.get_usage_stats(total_time_elapsed=2) == { "token_usage": {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, "request_usage": {"successful_requests": 2, "failed_requests": 1, "total_requests": 3}, @@ -177,3 +198,36 @@ def test_model_usage_stats_extend_with_tool_usage() -> None: assert stats1.tool_usage.total_tool_call_turns == 6 assert stats1.tool_usage.total_generations == 4 assert stats1.tool_usage.generations_with_tools == 3 + + +def test_model_usage_stats_with_image_usage() -> None: + """Test that ModelUsageStats includes image_usage when it has usage.""" + model_usage_stats = ModelUsageStats() + model_usage_stats.extend( + token_usage=TokenUsageStats(input_tokens=10, output_tokens=20), + request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), + image_usage=ImageUsageStats(total_images=5), + ) + + assert model_usage_stats.image_usage.total_images == 5 + assert model_usage_stats.image_usage.has_usage is True + + # image_usage should be included in output + usage_stats = model_usage_stats.get_usage_stats(total_time_elapsed=2) + assert "image_usage" in usage_stats + assert usage_stats["image_usage"] == {"total_images": 5} + + +def test_model_usage_stats_exclude_unused_stats() -> None: + """Test that ModelUsageStats excludes tool_usage and image_usage when they have no usage.""" + model_usage_stats = ModelUsageStats() + model_usage_stats.extend( + token_usage=TokenUsageStats(input_tokens=10, output_tokens=20), + request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), + ) + + usage_stats = model_usage_stats.get_usage_stats(total_time_elapsed=2) + assert "tool_usage" not in usage_stats + assert "image_usage" not in usage_stats + assert "token_usage" in usage_stats + assert "request_usage" in usage_stats From 3b4acf19202db778f6905f0c1bd27ca984cdaffb Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 10:55:49 -0700 Subject: [PATCH 28/64] fix image usage tracking --- .../config/utils/image_helpers.py | 9 ++++---- .../src/data_designer/engine/models/usage.py | 2 +- .../tests/engine/models/test_usage.py | 22 +++++++++++++++++++ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 2069d9bf..9fc4e2b0 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -23,8 +23,8 @@ # WEBP uses RIFF header - handled separately } -# Patterns for detecting diffusion-based image generation models (DALL-E, Stable Diffusion, Imagen, etc.) -_IMAGE_DIFFUSION_MODEL_PATTERNS = ( +# Patterns for diffusion-based image models only (use image_generation API). +IMAGE_DIFFUSION_MODEL_PATTERNS = ( "dall-e", "dalle", "stable-diffusion", @@ -37,8 +37,7 @@ def is_image_diffusion_model(model_name: str) -> bool: """Return True if the model is a diffusion-based image generation model. - Diffusion models use the image_generation API (e.g. DALL-E, Stable Diffusion, Imagen). - All other image models are assumed to use the chat/completions API. + Args: model_name: Model name or identifier (e.g. from provider). @@ -46,7 +45,7 @@ def is_image_diffusion_model(model_name: str) -> bool: Returns: True if the model is detected as diffusion-based, False otherwise. """ - return any(pattern in model_name.lower() for pattern in _IMAGE_DIFFUSION_MODEL_PATTERNS) + return any(pattern in model_name.lower() for pattern in IMAGE_DIFFUSION_MODEL_PATTERNS) def extract_base64_from_data_uri(data: str) -> str: diff --git a/packages/data-designer-engine/src/data_designer/engine/models/usage.py b/packages/data-designer-engine/src/data_designer/engine/models/usage.py index 169ef1bb..64e82b47 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/usage.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/usage.py @@ -91,7 +91,7 @@ class ModelUsageStats(BaseModel): @property def has_usage(self) -> bool: - return self.token_usage.has_usage and self.request_usage.has_usage + return self.token_usage.has_usage or self.request_usage.has_usage or self.image_usage.has_usage def extend( self, diff --git a/packages/data-designer-engine/tests/engine/models/test_usage.py b/packages/data-designer-engine/tests/engine/models/test_usage.py index 2c4f783f..2bfea4b4 100644 --- a/packages/data-designer-engine/tests/engine/models/test_usage.py +++ b/packages/data-designer-engine/tests/engine/models/test_usage.py @@ -218,6 +218,28 @@ def test_model_usage_stats_with_image_usage() -> None: assert usage_stats["image_usage"] == {"total_images": 5} +def test_model_usage_stats_has_usage_any_of() -> None: + """Test that has_usage is True when any of token, request, or image usage is present.""" + # Only token usage + stats = ModelUsageStats() + stats.extend(token_usage=TokenUsageStats(input_tokens=1, output_tokens=0)) + assert stats.has_usage is True + + # Only request usage (e.g. diffusion API without token counts) + stats = ModelUsageStats() + stats.extend(request_usage=RequestUsageStats(successful_requests=1, failed_requests=0)) + assert stats.has_usage is True + + # Only image usage + stats = ModelUsageStats() + stats.extend(image_usage=ImageUsageStats(total_images=2)) + assert stats.has_usage is True + + # None of the three + stats = ModelUsageStats() + assert stats.has_usage is False + + def test_model_usage_stats_exclude_unused_stats() -> None: """Test that ModelUsageStats excludes tool_usage and image_usage when they have no usage.""" model_usage_stats = ModelUsageStats() From 33b4211490519ac6f2e3bd38cd5273d945f58718 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 11:52:15 -0700 Subject: [PATCH 29/64] test clean up --- .../src/data_designer/config/models.py | 28 +------ .../config/utils/image_helpers.py | 2 - .../tests/config/test_models.py | 30 ++++++++ .../data_designer/engine/models/registry.py | 2 +- .../tests/engine/models/test_facade.py | 77 ++++++++++--------- 5 files changed, 76 insertions(+), 63 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 3dab2d8d..8b16b4bc 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -425,21 +425,7 @@ def generate_kwargs(self) -> dict[str, float | int]: class ImageInferenceParams(BaseInferenceParams): """Configuration for image generation models. - Works for all image generation models. The API type is automatically detected - based on the model name: - - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) use image_generation API - - All other models use chat/completions API (default) - - Image storage behavior: - - Create mode: Images saved to disk with UUID filenames, paths stored in dataframe - - Preview mode: Images stored as base64 directly in dataframe - - Common parameters like quality and size are provided as optional fields. - For model-specific parameters (including n for number of images), use the `extra_body` - field inherited from BaseInferenceParams. - - If the API returns multiple images (either from prompt or API parameters), all images - will be stored as a list in the dataframe. + Works for both diffusion and autoregressive image generation models. Use extra_body for model-specific parameters. Attributes: generation_type: Type of generation, always "image" for this class. @@ -454,22 +440,14 @@ class ImageInferenceParams(BaseInferenceParams): size="1024x1024" ) - # Generate multiple images using extra_body - dd.ImageInferenceParams( - quality="hd", - size="1024x1024", - extra_body={"n": 3} # Request 3 images from API - ) - # With model-specific params via extra_body dd.ImageInferenceParams( - quality="hd", - size="1024x1024", + quality="auto", extra_body={ "generationConfig": { "imageConfig": { "aspectRatio": "1:1", - "negativePrompt": "blurry, low quality" + "imageSize": "1024" } } } diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 9fc4e2b0..678d3b80 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -37,8 +37,6 @@ def is_image_diffusion_model(model_name: str) -> bool: """Return True if the model is a diffusion-based image generation model. - - Args: model_name: Model name or identifier (e.g. from provider). diff --git a/packages/data-designer-config/tests/config/test_models.py b/packages/data-designer-config/tests/config/test_models.py index 38b8079e..4891c78d 100644 --- a/packages/data-designer-config/tests/config/test_models.py +++ b/packages/data-designer-config/tests/config/test_models.py @@ -17,6 +17,7 @@ GenerationType, ImageContext, ImageFormat, + ImageInferenceParams, ManualDistribution, ManualDistributionParams, ModalityDataType, @@ -412,6 +413,12 @@ def test_model_config_construction(): assert model_config.inference_parameters == embedding_params assert model_config.generation_type == GenerationType.EMBEDDING + # test construction with image inference parameters + image_params = ImageInferenceParams(quality="hd", size="1024x1024") + model_config = ModelConfig(alias="test", model="test", inference_parameters=image_params) + assert model_config.inference_parameters == image_params + assert model_config.generation_type == GenerationType.IMAGE + def test_model_config_generation_type_from_dict(): # Test that generation_type in dict is used to create the right inference params type @@ -435,6 +442,29 @@ def test_model_config_generation_type_from_dict(): assert isinstance(model_config.inference_parameters, ChatCompletionInferenceParams) assert model_config.generation_type == GenerationType.CHAT_COMPLETION + model_config = ModelConfig.model_validate( + { + "alias": "test", + "model": "image-model", + "inference_parameters": {"generation_type": "image", "quality": "hd", "size": "1024x1024"}, + } + ) + assert isinstance(model_config.inference_parameters, ImageInferenceParams) + assert model_config.inference_parameters.quality == "hd" + assert model_config.inference_parameters.size == "1024x1024" + assert model_config.generation_type == GenerationType.IMAGE + + +def test_image_inference_params_generate_kwargs() -> None: + """ImageInferenceParams.generate_kwargs includes quality and size when set.""" + params = ImageInferenceParams() + assert params.generate_kwargs.get("quality") is None + assert params.generate_kwargs.get("size") is None + + params = ImageInferenceParams(quality="hd", size="1024x1024") + assert params.generate_kwargs["quality"] == "hd" + assert params.generate_kwargs["size"] == "1024x1024" + def test_chat_completion_params_format_for_display_all_params(): """Test formatting chat completion model with all parameters.""" diff --git a/packages/data-designer-engine/src/data_designer/engine/models/registry.py b/packages/data-designer-engine/src/data_designer/engine/models/registry.py index 2878f64e..c6f2b7c7 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/registry.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/registry.py @@ -187,7 +187,7 @@ def run_health_check(self, model_aliases: list[str]) -> None: skip_usage_tracking=True, purpose="running health checks", ) - elif model.model_generation_type == GenerationType.IMAGE_GENERATION: + elif model.model_generation_type == GenerationType.IMAGE: model.generate_image( prompt="Generate a simple pixel", skip_usage_tracking=True, diff --git a/packages/data-designer-engine/tests/engine/models/test_facade.py b/packages/data-designer-engine/tests/engine/models/test_facade.py index 78473d63..0323ce98 100644 --- a/packages/data-designer-engine/tests/engine/models/test_facade.py +++ b/packages/data-designer-engine/tests/engine/models/test_facade.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from typing import Any +from __future__ import annotations + +from typing import TYPE_CHECKING, Any from unittest.mock import patch import pytest -from litellm.types.utils import Choices, EmbeddingResponse, Message, ModelResponse from data_designer.engine.mcp.errors import MCPConfigurationError, MCPToolError from data_designer.engine.models.errors import ModelGenerationValidationFailureError @@ -13,6 +14,10 @@ from data_designer.engine.models.parsers.errors import ParserException from data_designer.engine.models.utils import ChatMessage from data_designer.engine.testing import StubMCPFacade, StubMCPRegistry, StubMessage, StubResponse +from data_designer.lazy_heavy_imports import litellm + +if TYPE_CHECKING: + import litellm def mock_oai_response_object(response_text: str) -> StubResponse: @@ -35,12 +40,14 @@ def stub_completion_messages() -> list[ChatMessage]: @pytest.fixture def stub_expected_completion_response(): - return ModelResponse(choices=Choices(message=Message(content="Test response"))) + return litellm.types.utils.ModelResponse( + choices=litellm.types.utils.Choices(message=litellm.types.utils.Message(content="Test response")) + ) @pytest.fixture def stub_expected_embedding_response(): - return EmbeddingResponse(data=[{"embedding": [0.1, 0.2, 0.3]}] * 2) + return litellm.types.utils.EmbeddingResponse(data=[{"embedding": [0.1, 0.2, 0.3]}] * 2) @pytest.mark.parametrize( @@ -106,9 +113,11 @@ def test_generate_with_system_prompt( # Capture messages at call time since they get mutated after the call captured_messages = [] - def capture_and_return(*args: Any, **kwargs: Any) -> ModelResponse: + def capture_and_return(*args: Any, **kwargs: Any) -> litellm.types.utils.ModelResponse: captured_messages.append(list(args[1])) # Copy the messages list - return ModelResponse(choices=Choices(message=Message(content="Hello!"))) + return litellm.types.utils.ModelResponse( + choices=litellm.types.utils.Choices(message=litellm.types.utils.Message(content="Hello!")) + ) mock_completion.side_effect = capture_and_return @@ -166,7 +175,7 @@ def test_completion_success( stub_completion_messages: list[ChatMessage], stub_model_configs: Any, stub_model_facade: ModelFacade, - stub_expected_completion_response: ModelResponse, + stub_expected_completion_response: litellm.types.utils.ModelResponse, skip_usage_tracking: bool, ) -> None: mock_router_completion.side_effect = lambda self, model, messages, **kwargs: stub_expected_completion_response @@ -199,11 +208,13 @@ def test_completion_with_kwargs( stub_completion_messages: list[ChatMessage], stub_model_configs: Any, stub_model_facade: ModelFacade, - stub_expected_completion_response: ModelResponse, + stub_expected_completion_response: litellm.types.utils.ModelResponse, ) -> None: captured_kwargs = {} - def mock_completion(self: Any, model: str, messages: list[dict[str, Any]], **kwargs: Any) -> ModelResponse: + def mock_completion( + self: Any, model: str, messages: list[dict[str, Any]], **kwargs: Any + ) -> litellm.types.utils.ModelResponse: captured_kwargs.update(kwargs) return stub_expected_completion_response @@ -1002,14 +1013,12 @@ def test_generate_image_diffusion_tracks_image_usage( stub_model_facade: ModelFacade, ) -> None: """Test that generate_image tracks image usage for diffusion models.""" - from litellm.types.utils import ImageObject, ImageResponse - # Mock response with 3 images - mock_response = ImageResponse( + mock_response = litellm.types.utils.ImageResponse( data=[ - ImageObject(b64_json="image1_base64"), - ImageObject(b64_json="image2_base64"), - ImageObject(b64_json="image3_base64"), + litellm.types.utils.ImageObject(b64_json="image1_base64"), + litellm.types.utils.ImageObject(b64_json="image2_base64"), + litellm.types.utils.ImageObject(b64_json="image3_base64"), ] ) mock_image_generation.return_value = mock_response @@ -1036,18 +1045,20 @@ def test_generate_image_chat_completion_tracks_image_usage( stub_model_facade: ModelFacade, ) -> None: """Test that generate_image tracks image usage for chat completion models.""" - from litellm.types.utils import Choices, ImageURLListItem, Message, ModelResponse - # Mock response with images attribute (Message requires type and index per ImageURLListItem) - mock_message = Message( + mock_message = litellm.types.utils.Message( role="assistant", content="", images=[ - ImageURLListItem(type="image_url", image_url={"url": "data:image/png;base64,image1"}, index=0), - ImageURLListItem(type="image_url", image_url={"url": "data:image/png;base64,image2"}, index=1), + litellm.types.utils.ImageURLListItem( + type="image_url", image_url={"url": "data:image/png;base64,image1"}, index=0 + ), + litellm.types.utils.ImageURLListItem( + type="image_url", image_url={"url": "data:image/png;base64,image2"}, index=1 + ), ], ) - mock_response = ModelResponse(choices=[Choices(message=mock_message)]) + mock_response = litellm.types.utils.ModelResponse(choices=[litellm.types.utils.Choices(message=mock_message)]) mock_completion.return_value = mock_response # Verify initial state @@ -1072,12 +1083,10 @@ def test_generate_image_skip_usage_tracking( stub_model_facade: ModelFacade, ) -> None: """Test that generate_image respects skip_usage_tracking flag.""" - from litellm.types.utils import ImageObject, ImageResponse - - mock_response = ImageResponse( + mock_response = litellm.types.utils.ImageResponse( data=[ - ImageObject(b64_json="image1_base64"), - ImageObject(b64_json="image2_base64"), + litellm.types.utils.ImageObject(b64_json="image1_base64"), + litellm.types.utils.ImageObject(b64_json="image2_base64"), ] ) mock_image_generation.return_value = mock_response @@ -1103,21 +1112,19 @@ def test_generate_image_accumulates_usage( stub_model_facade: ModelFacade, ) -> None: """Test that generate_image accumulates image usage across multiple calls.""" - from litellm.types.utils import ImageObject, ImageResponse - # First call - 2 images - mock_response1 = ImageResponse( + mock_response1 = litellm.types.utils.ImageResponse( data=[ - ImageObject(b64_json="image1"), - ImageObject(b64_json="image2"), + litellm.types.utils.ImageObject(b64_json="image1"), + litellm.types.utils.ImageObject(b64_json="image2"), ] ) # Second call - 3 images - mock_response2 = ImageResponse( + mock_response2 = litellm.types.utils.ImageResponse( data=[ - ImageObject(b64_json="image3"), - ImageObject(b64_json="image4"), - ImageObject(b64_json="image5"), + litellm.types.utils.ImageObject(b64_json="image3"), + litellm.types.utils.ImageObject(b64_json="image4"), + litellm.types.utils.ImageObject(b64_json="image5"), ] ) mock_image_generation.side_effect = [mock_response1, mock_response2] From fad791ee9c9073e590dd74b4febb7f8cc72b2064 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 12:16:31 -0700 Subject: [PATCH 30/64] Small refactor for simplicity --- .../src/data_designer/config/__init__.py | 4 ++-- .../src/data_designer/config/column_configs.py | 14 +++----------- .../src/data_designer/config/column_types.py | 8 ++++---- .../data_designer/config/utils/visualization.py | 2 +- .../tests/config/test_columns.py | 2 +- .../engine/column_generators/generators/image.py | 16 ++++------------ .../engine/column_generators/registry.py | 4 ++-- .../utils/generator_classification.py | 2 ++ .../dataset_builders/column_wise_builder.py | 2 +- .../column_generators/generators/test_image.py | 14 ++++++-------- .../utils/test_generator_classification.py | 2 ++ 11 files changed, 28 insertions(+), 42 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/__init__.py b/packages/data-designer-config/src/data_designer/config/__init__.py index 5686b506..42afae81 100644 --- a/packages/data-designer-config/src/data_designer/config/__init__.py +++ b/packages/data-designer-config/src/data_designer/config/__init__.py @@ -17,7 +17,7 @@ EmbeddingColumnConfig, ExpressionColumnConfig, GenerationStrategy, - ImageGenerationColumnConfig, + ImageColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -123,7 +123,7 @@ "CustomColumnConfig": (_MOD_COLUMN_CONFIGS, "CustomColumnConfig"), "EmbeddingColumnConfig": (_MOD_COLUMN_CONFIGS, "EmbeddingColumnConfig"), "ExpressionColumnConfig": (_MOD_COLUMN_CONFIGS, "ExpressionColumnConfig"), - "ImageGenerationColumnConfig": (_MOD_COLUMN_CONFIGS, "ImageGenerationColumnConfig"), + "ImageColumnConfig": (_MOD_COLUMN_CONFIGS, "ImageColumnConfig"), "GenerationStrategy": (_MOD_COLUMN_CONFIGS, "GenerationStrategy"), "LLMCodeColumnConfig": (_MOD_COLUMN_CONFIGS, "LLMCodeColumnConfig"), "LLMJudgeColumnConfig": (_MOD_COLUMN_CONFIGS, "LLMJudgeColumnConfig"), diff --git a/packages/data-designer-config/src/data_designer/config/column_configs.py b/packages/data-designer-config/src/data_designer/config/column_configs.py index facbc4cf..e9d89f4e 100644 --- a/packages/data-designer-config/src/data_designer/config/column_configs.py +++ b/packages/data-designer-config/src/data_designer/config/column_configs.py @@ -485,22 +485,14 @@ def side_effect_columns(self) -> list[str]: return [] -class ImageGenerationColumnConfig(SingleColumnConfig): +class ImageColumnConfig(SingleColumnConfig): """Configuration for image generation columns. Image columns generate images using either autoregressive or diffusion models. The API used is automatically determined based on the model name: - - **Diffusion models** (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API - - **All other models** β†’ chat/completions API (default) - - Image storage behavior: - - **Create mode**: Images saved to disk with UUID filenames in `images/` folder, - dataframe stores relative paths (e.g., "images/abc123.png") - - **Preview mode**: Images stored as base64 directly in dataframe - Attributes: - column_type: Discriminator field, always "image-generation" for this configuration type. + column_type: Discriminator field, always "image" for this configuration type. prompt: Prompt template for image generation. Supports Jinja2 templating to reference other columns (e.g., "Generate an image of a {{ character_name }}"). Must be a valid Jinja2 template. @@ -509,7 +501,7 @@ class ImageGenerationColumnConfig(SingleColumnConfig): prompt: str model_alias: str - column_type: Literal["image-generation"] = "image-generation" + column_type: Literal["image"] = "image" @staticmethod def get_column_emoji() -> str: diff --git a/packages/data-designer-config/src/data_designer/config/column_types.py b/packages/data-designer-config/src/data_designer/config/column_types.py index 9b01e7d7..baba25dd 100644 --- a/packages/data-designer-config/src/data_designer/config/column_types.py +++ b/packages/data-designer-config/src/data_designer/config/column_types.py @@ -9,7 +9,7 @@ CustomColumnConfig, EmbeddingColumnConfig, ExpressionColumnConfig, - ImageGenerationColumnConfig, + ImageColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -40,7 +40,7 @@ | SeedDatasetColumnConfig | ValidationColumnConfig | EmbeddingColumnConfig - | ImageGenerationColumnConfig + | ImageColumnConfig ) ColumnConfigT = plugin_manager.inject_into_column_config_type_union(ColumnConfigT) @@ -89,7 +89,7 @@ def get_column_display_order() -> list[DataDesignerColumnType]: DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, DataDesignerColumnType.EMBEDDING, - DataDesignerColumnType.IMAGE_GENERATION, + DataDesignerColumnType.IMAGE, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EXPRESSION, DataDesignerColumnType.CUSTOM, @@ -145,5 +145,5 @@ def _resolve_sampler_kwargs(name: str, kwargs: dict) -> dict: DataDesignerColumnType.SAMPLER: SamplerColumnConfig, DataDesignerColumnType.SEED_DATASET: SeedDatasetColumnConfig, DataDesignerColumnType.EMBEDDING: EmbeddingColumnConfig, - DataDesignerColumnType.IMAGE_GENERATION: ImageGenerationColumnConfig, + DataDesignerColumnType.IMAGE: ImageColumnConfig, } diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 6a9e8ee5..910bc467 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -290,7 +290,7 @@ def display_sample_record( render_list.append(pad_console_element(table)) # Collect image generation columns (will be displayed at the end) - image_columns = config_builder.get_columns_of_type(DataDesignerColumnType.IMAGE_GENERATION) + image_columns = config_builder.get_columns_of_type(DataDesignerColumnType.IMAGE) images_to_display_later = [] if len(image_columns) > 0: # Check if we're in a notebook to decide display style diff --git a/packages/data-designer-config/tests/config/test_columns.py b/packages/data-designer-config/tests/config/test_columns.py index 56bb912d..e633518d 100644 --- a/packages/data-designer-config/tests/config/test_columns.py +++ b/packages/data-designer-config/tests/config/test_columns.py @@ -53,7 +53,7 @@ def test_data_designer_column_type_get_display_order(): DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, DataDesignerColumnType.EMBEDDING, - DataDesignerColumnType.IMAGE_GENERATION, + DataDesignerColumnType.IMAGE, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EXPRESSION, DataDesignerColumnType.CUSTOM, diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 41586e4b..c8396e24 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING -from data_designer.config.column_configs import ImageGenerationColumnConfig +from data_designer.config.column_configs import ImageColumnConfig from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering from data_designer.engine.processing.utils import deserialize_json_values @@ -14,18 +14,12 @@ from data_designer.engine.storage.media_storage import MediaStorage -class ImageCellGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorWithModel[ImageGenerationColumnConfig]): +class ImageCellGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorWithModel[ImageColumnConfig]): """Generator for image columns with disk or dataframe persistence. Media storage always exists and determines behavior via its mode: - - DISK mode (create): Saves images to disk and stores relative paths in dataframe - - DATAFRAME mode (preview): Stores base64 directly in dataframe - - API is automatically detected based on the model name: - - Diffusion models (DALL-E, Stable Diffusion, Imagen, etc.) β†’ image_generation API - - All other models β†’ chat/completions API (default) - - Storage is accessed via ResourceProvider.artifact_storage.media_storage + - DISK mode: Saves images to disk and stores relative paths in dataframe + - DATAFRAME mode: Stores base64 directly in dataframe """ @property @@ -69,8 +63,6 @@ def generate(self, data: dict) -> dict: base64_images = self.model.generate_image(prompt=prompt) # Store via media storage (mode determines disk vs dataframe storage) - # TODO: MediaStorage will check its mode (DISK/DATAFRAME) and act accordingly - # For now, always saves to disk - need to implement mode system results = [self.media_storage.save_base64_image(base64_image) for base64_image in base64_images] data[self.config.name] = results diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/registry.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/registry.py index a4538ad6..f4fc27b9 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/registry.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/registry.py @@ -8,7 +8,7 @@ CustomColumnConfig, EmbeddingColumnConfig, ExpressionColumnConfig, - ImageGenerationColumnConfig, + ImageColumnConfig, LLMCodeColumnConfig, LLMJudgeColumnConfig, LLMStructuredColumnConfig, @@ -54,7 +54,7 @@ def create_default_column_generator_registry(with_plugins: bool = True) -> Colum registry.register(DataDesignerColumnType.SEED_DATASET, SeedDatasetColumnGenerator, SeedDatasetMultiColumnConfig) registry.register(DataDesignerColumnType.VALIDATION, ValidationColumnGenerator, ValidationColumnConfig) registry.register(DataDesignerColumnType.LLM_STRUCTURED, LLMStructuredCellGenerator, LLMStructuredColumnConfig) - registry.register(DataDesignerColumnType.IMAGE_GENERATION, ImageCellGenerator, ImageGenerationColumnConfig) + registry.register(DataDesignerColumnType.IMAGE, ImageCellGenerator, ImageColumnConfig) if with_plugins: for plugin in PluginRegistry().get_plugins(PluginType.COLUMN_GENERATOR): registry.register( diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/utils/generator_classification.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/utils/generator_classification.py index 2e082779..7a45fc71 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/utils/generator_classification.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/utils/generator_classification.py @@ -22,6 +22,7 @@ def column_type_used_in_execution_dag(column_type: str | DataDesignerColumnType) DataDesignerColumnType.LLM_TEXT, DataDesignerColumnType.VALIDATION, DataDesignerColumnType.EMBEDDING, + DataDesignerColumnType.IMAGE, } dag_column_types.update(plugin_manager.get_plugin_column_types(DataDesignerColumnType)) return column_type in dag_column_types @@ -36,6 +37,7 @@ def column_type_is_model_generated(column_type: str | DataDesignerColumnType) -> DataDesignerColumnType.LLM_STRUCTURED, DataDesignerColumnType.LLM_JUDGE, DataDesignerColumnType.EMBEDDING, + DataDesignerColumnType.IMAGE, } for plugin in plugin_manager.get_column_generator_plugins(): if issubclass(plugin.impl_cls, ColumnGeneratorWithModelRegistry): diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index 6802f805..ad9e265b 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -172,7 +172,7 @@ def _has_image_columns(self) -> bool: """Check if config has any image generation columns.""" from data_designer.config.column_types import DataDesignerColumnType - return any(col.column_type == DataDesignerColumnType.IMAGE_GENERATION for col in self.single_column_configs) + return any(col.column_type == DataDesignerColumnType.IMAGE for col in self.single_column_configs) def _initialize_generators(self) -> list[ColumnGenerator]: """Initialize column generators. diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py index e7055d67..80523ff5 100644 --- a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py +++ b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py @@ -5,7 +5,7 @@ import pytest -from data_designer.config.column_configs import ImageGenerationColumnConfig +from data_designer.config.column_configs import ImageColumnConfig from data_designer.engine.column_generators.generators.base import GenerationStrategy from data_designer.engine.column_generators.generators.image import ImageCellGenerator from data_designer.engine.processing.ginja.exceptions import UserTemplateError @@ -13,9 +13,7 @@ @pytest.fixture def stub_image_column_config(): - return ImageGenerationColumnConfig( - name="test_image", prompt="A {{ style }} image of {{ subject }}", model_alias="test_model" - ) + return ImageColumnConfig(name="test_image", prompt="A {{ style }} image of {{ subject }}", model_alias="test_model") @pytest.fixture @@ -24,14 +22,14 @@ def stub_base64_images() -> list[str]: def test_image_cell_generator_generation_strategy( - stub_image_column_config: ImageGenerationColumnConfig, stub_resource_provider: None + stub_image_column_config: ImageColumnConfig, stub_resource_provider: None ) -> None: generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) assert generator.get_generation_strategy() == GenerationStrategy.CELL_BY_CELL def test_image_cell_generator_media_storage_property( - stub_image_column_config: ImageGenerationColumnConfig, stub_resource_provider: None + stub_image_column_config: ImageColumnConfig, stub_resource_provider: None ) -> None: generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) # Should return media_storage from artifact_storage (always exists) @@ -105,7 +103,7 @@ def test_image_cell_generator_missing_columns_error(stub_image_column_config, st def test_image_cell_generator_empty_prompt_error(stub_resource_provider): """Test that empty rendered prompt raises UserTemplateError.""" # Create config with template that renders to empty string - config = ImageGenerationColumnConfig(name="test_image", prompt="{{ empty }}", model_alias="test_model") + config = ImageColumnConfig(name="test_image", prompt="{{ empty }}", model_alias="test_model") generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) @@ -115,7 +113,7 @@ def test_image_cell_generator_empty_prompt_error(stub_resource_provider): def test_image_cell_generator_whitespace_only_prompt_error(stub_resource_provider): """Test that whitespace-only rendered prompt raises ValueError.""" - config = ImageGenerationColumnConfig(name="test_image", prompt="{{ spaces }}", model_alias="test_model") + config = ImageColumnConfig(name="test_image", prompt="{{ spaces }}", model_alias="test_model") generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) diff --git a/packages/data-designer-engine/tests/engine/column_generators/utils/test_generator_classification.py b/packages/data-designer-engine/tests/engine/column_generators/utils/test_generator_classification.py index bdf15e5d..0be26b11 100644 --- a/packages/data-designer-engine/tests/engine/column_generators/utils/test_generator_classification.py +++ b/packages/data-designer-engine/tests/engine/column_generators/utils/test_generator_classification.py @@ -14,6 +14,7 @@ def test_column_type_is_model_generated() -> None: assert column_type_is_model_generated(DataDesignerColumnType.LLM_STRUCTURED) assert column_type_is_model_generated(DataDesignerColumnType.LLM_JUDGE) assert column_type_is_model_generated(DataDesignerColumnType.EMBEDDING) + assert column_type_is_model_generated(DataDesignerColumnType.IMAGE) assert not column_type_is_model_generated(DataDesignerColumnType.SAMPLER) assert not column_type_is_model_generated(DataDesignerColumnType.VALIDATION) assert not column_type_is_model_generated(DataDesignerColumnType.EXPRESSION) @@ -28,5 +29,6 @@ def test_column_type_used_in_execution_dag() -> None: assert column_type_used_in_execution_dag(DataDesignerColumnType.LLM_TEXT) assert column_type_used_in_execution_dag(DataDesignerColumnType.VALIDATION) assert column_type_used_in_execution_dag(DataDesignerColumnType.EMBEDDING) + assert column_type_used_in_execution_dag(DataDesignerColumnType.IMAGE) assert not column_type_used_in_execution_dag(DataDesignerColumnType.SAMPLER) assert not column_type_used_in_execution_dag(DataDesignerColumnType.SEED_DATASET) From 54ebcc80cb2a9b62fdc98804631193205faa2414 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 12:59:43 -0700 Subject: [PATCH 31/64] update ImageInferenceParams --- .../src/data_designer/config/models.py | 21 +++++-------------- .../tests/config/test_models.py | 21 ++++++++++--------- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 8b16b4bc..0542a8b8 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -425,24 +425,20 @@ def generate_kwargs(self) -> dict[str, float | int]: class ImageInferenceParams(BaseInferenceParams): """Configuration for image generation models. - Works for both diffusion and autoregressive image generation models. Use extra_body for model-specific parameters. + Works for both diffusion and autoregressive image generation models. Pass all model-specific image options via `extra_body`. Attributes: generation_type: Type of generation, always "image" for this class. - quality: Image quality setting (e.g., "standard", "hd"). Optional and model-specific. - size: Image size specification (e.g., "1024x1024", "1792x1024"). Optional and model-specific. Example: ```python - # Standard usage with common params + # OpenAI-style (DALLΒ·E): quality and size in extra_body or as top-level kwargs dd.ImageInferenceParams( - quality="hd", - size="1024x1024" + extra_body={"size": "1024x1024", "quality": "hd"} ) - # With model-specific params via extra_body + # Gemini-style: generationConfig.imageConfig dd.ImageInferenceParams( - quality="auto", extra_body={ "generationConfig": { "imageConfig": { @@ -456,17 +452,10 @@ class ImageInferenceParams(BaseInferenceParams): """ generation_type: Literal[GenerationType.IMAGE] = GenerationType.IMAGE - quality: str | None = None - size: str | None = None @property def generate_kwargs(self) -> dict[str, Any]: - result = super().generate_kwargs - if self.quality is not None: - result["quality"] = self.quality - if self.size is not None: - result["size"] = self.size - return result + return super().generate_kwargs InferenceParamsT: TypeAlias = Annotated[ diff --git a/packages/data-designer-config/tests/config/test_models.py b/packages/data-designer-config/tests/config/test_models.py index 4891c78d..564b235c 100644 --- a/packages/data-designer-config/tests/config/test_models.py +++ b/packages/data-designer-config/tests/config/test_models.py @@ -414,7 +414,7 @@ def test_model_config_construction(): assert model_config.generation_type == GenerationType.EMBEDDING # test construction with image inference parameters - image_params = ImageInferenceParams(quality="hd", size="1024x1024") + image_params = ImageInferenceParams(extra_body={"size": "1024x1024", "quality": "hd"}) model_config = ModelConfig(alias="test", model="test", inference_parameters=image_params) assert model_config.inference_parameters == image_params assert model_config.generation_type == GenerationType.IMAGE @@ -446,24 +446,25 @@ def test_model_config_generation_type_from_dict(): { "alias": "test", "model": "image-model", - "inference_parameters": {"generation_type": "image", "quality": "hd", "size": "1024x1024"}, + "inference_parameters": { + "generation_type": "image", + "extra_body": {"size": "1024x1024", "quality": "hd"}, + }, } ) assert isinstance(model_config.inference_parameters, ImageInferenceParams) - assert model_config.inference_parameters.quality == "hd" - assert model_config.inference_parameters.size == "1024x1024" + assert model_config.inference_parameters.extra_body == {"size": "1024x1024", "quality": "hd"} assert model_config.generation_type == GenerationType.IMAGE def test_image_inference_params_generate_kwargs() -> None: - """ImageInferenceParams.generate_kwargs includes quality and size when set.""" + """ImageInferenceParams.generate_kwargs delegates to base; image params go via extra_body.""" params = ImageInferenceParams() - assert params.generate_kwargs.get("quality") is None - assert params.generate_kwargs.get("size") is None + assert "quality" not in params.generate_kwargs + assert "size" not in params.generate_kwargs - params = ImageInferenceParams(quality="hd", size="1024x1024") - assert params.generate_kwargs["quality"] == "hd" - assert params.generate_kwargs["size"] == "1024x1024" + params = ImageInferenceParams(extra_body={"size": "1024x1024", "quality": "hd"}) + assert params.generate_kwargs.get("extra_body") == {"size": "1024x1024", "quality": "hd"} def test_chat_completion_params_format_for_display_all_params(): From 3aad6081dca8582d88f06b5e464c4b0f0ac79bf2 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 13:01:38 -0700 Subject: [PATCH 32/64] add example tutorial for image generation --- docs/notebook_source/1-the-basics.py | 2 + ...tructured-outputs-and-jinja-expressions.py | 2 + .../3-seeding-with-a-dataset.py | 2 + .../4-providing-images-as-context.py | 2 + docs/notebook_source/5-generating-images.py | 212 ++++++++++++++++++ docs/notebook_source/_README.md | 9 + 6 files changed, 229 insertions(+) create mode 100644 docs/notebook_source/5-generating-images.py diff --git a/docs/notebook_source/1-the-basics.py b/docs/notebook_source/1-the-basics.py index 392efb34..8735d582 100644 --- a/docs/notebook_source/1-the-basics.py +++ b/docs/notebook_source/1-the-basics.py @@ -330,3 +330,5 @@ # # - [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/) # +# - [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) +# diff --git a/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py b/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py index 66b3773f..df581612 100644 --- a/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +++ b/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py @@ -372,3 +372,5 @@ class ProductReview(BaseModel): # # - [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/) # +# - [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) +# diff --git a/docs/notebook_source/3-seeding-with-a-dataset.py b/docs/notebook_source/3-seeding-with-a-dataset.py index c9d694a8..e4f9218e 100644 --- a/docs/notebook_source/3-seeding-with-a-dataset.py +++ b/docs/notebook_source/3-seeding-with-a-dataset.py @@ -274,3 +274,5 @@ # # - [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/) # +# - [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) +# diff --git a/docs/notebook_source/4-providing-images-as-context.py b/docs/notebook_source/4-providing-images-as-context.py index a11880ba..1fd68dac 100644 --- a/docs/notebook_source/4-providing-images-as-context.py +++ b/docs/notebook_source/4-providing-images-as-context.py @@ -299,3 +299,5 @@ def convert_image_to_chat_format(record, height: int) -> dict: # - Combine vision-based summaries with other column types for multi-modal workflows # - Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering # +# - [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) with Data Designer +# diff --git a/docs/notebook_source/5-generating-images.py b/docs/notebook_source/5-generating-images.py new file mode 100644 index 00000000..aee5a0c1 --- /dev/null +++ b/docs/notebook_source/5-generating-images.py @@ -0,0 +1,212 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.18.1 +# kernelspec: +# display_name: .venv +# language: python +# name: python3 +# --- + +# %% [markdown] +# # 🎨 Data Designer Tutorial: Generating Images +# +# #### πŸ“š What you'll learn +# +# This notebook shows how to generate synthetic image data with Data Designer using image-generation models. +# +# - πŸ–ΌοΈ **Image generation columns**: Add columns that produce images from text prompts +# - πŸ“ **Jinja2 prompts**: Drive diversity by referencing other columns in your prompt template +# - πŸ’Ύ **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk and stores paths +# +# Data Designer supports both **diffusion** (e.g. DALLΒ·E, Stable Diffusion, Imagen) and **autoregressive** (e.g. Gemini image, GPT image) models; the API is chosen automatically from the model name. +# +# If this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series. +# + +# %% [markdown] +# ### πŸ“¦ Import Data Designer +# +# - `data_designer.config` provides the configuration API. +# - `DataDesigner` is the main interface for generation. +# + +# %% +from IPython.display import Image as IPImage +from IPython.display import display + +import data_designer.config as dd +from data_designer.interface import DataDesigner + +# %% [markdown] +# ### βš™οΈ Initialize the Data Designer interface +# +# When initialized without arguments, [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used. This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro image model; set `OPENROUTER_API_KEY` in your environment. +# + +# %% +data_designer = DataDesigner() + +# %% [markdown] +# ### πŸŽ›οΈ Define an image-generation model +# +# - Use `ImageInferenceParams` so Data Designer treats this model as an image generator. +# - Image options (size, quality, aspect ratio, etc.) are model-specific; pass them via `extra_body`. +# + +# %% +MODEL_PROVIDER = "openrouter" +MODEL_ID = "black-forest-labs/flux.2-pro" +MODEL_ALIAS = "image-model" + +model_configs = [ + dd.ModelConfig( + alias=MODEL_ALIAS, + model=MODEL_ID, + provider=MODEL_PROVIDER, + inference_parameters=dd.ImageInferenceParams( + extra_body={"size": "1024x1024"}, + ), + ) +] + +# %% [markdown] +# ### πŸ—οΈ Build the config: samplers + image column +# +# We'll generate diverse **dog portrait** images: sampler columns drive subject (breed), age, style, look direction, and emotion. The image-generation column uses a Jinja2 prompt that references all of them. +# + +# %% +config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="subject", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=[ + "a Golden Retriever", + "a German Shepherd", + "a Labrador Retriever", + "a Bulldog", + "a Beagle", + "a Poodle", + "a Corgi", + "a Siberian Husky", + "a Dalmatian", + ], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="age", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=["1-3", "3-6", "6-9", "9-12", "12-15"], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="style", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=[ + "photorealistic", + "oil painting", + "watercolor", + "digital art", + "sketch", + "anime", + ], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="look_direction", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=["left", "right", "front", "up", "down"], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="emotion", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=["happy", "curious", "serious", "sleepy", "excited"], + ), + ) +) + +config_builder.add_column( + dd.ImageColumnConfig( + name="generated_image", + prompt=( + "A {{ style }} portrait of {{ subject }} {{ age }} years old looking {{ look_direction }} " + "towards a crowd of the same kind with an {{ emotion }} expression." + ), + model_alias=MODEL_ALIAS, + ) +) + +data_designer.validate(config_builder) + +# %% [markdown] +# ### πŸ” Preview: images as base64 +# +# In **preview** mode, generated images are stored as base64 strings in the dataframe. Run the next cell to step through each record (images are shown in the sample record display, but only in a notebook environment). +# + +# %% +preview = data_designer.preview(config_builder, num_records=2) + +# %% +for i in range(len(preview.dataset)): + preview.display_sample_record() + +# %% +preview.dataset + +# %% [markdown] +# ### πŸ†™ Create: images saved to disk +# +# In **create** mode, images are written to an `images/` folder with UUID filenames; the dataframe stores relative paths (e.g. `images/1d16b6e2-562f-4f51-91e5-baaa999ea916.png`). +# + +# %% +results = data_designer.create(config_builder, num_records=5, dataset_name="tutorial-5-images") + +# %% +dataset = results.load_dataset() +dataset.head() + +# %% +# Display all image from the created dataset. Paths are relative to the artifact output directory. +for index, row in dataset.iterrows(): + path_or_list = row.get("generated_image") + if path_or_list is not None: + for path in path_or_list: + base = results.artifact_storage.base_dataset_path + full_path = base / path + display(IPImage(data=full_path)) + +# %% [markdown] +# ## ⏭️ Next steps +# +# - [The basics](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/): samplers and LLM text columns +# - [Structured outputs and Jinja](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/2-structured-outputs-and-jinja-expressions/) +# - [Seeding with a dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/) +# - [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/) +# diff --git a/docs/notebook_source/_README.md b/docs/notebook_source/_README.md index 09053c22..7bcd77d1 100644 --- a/docs/notebook_source/_README.md +++ b/docs/notebook_source/_README.md @@ -97,6 +97,15 @@ Learn how to use vision-language models to generate text descriptions from image - Generating detailed summaries from document images - Inspecting and validating vision-based generation results +### [5. Generating Images](5-generating-images.ipynb) + +Generate synthetic image data with Data Designer: + +- Configuring image-generation models with `ImageInferenceParams` +- Adding image columns with Jinja2 prompts and sampler-driven diversity +- Preview (base64 in dataframe) vs create (images saved to disk, paths in dataframe) +- Displaying generated images in the notebook + ## πŸ“– Important Documentation Sections Before diving into the tutorials, familiarize yourself with these key documentation sections: From f252c376917bcb791113318a84ee4e84d005d793 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 18:15:14 -0700 Subject: [PATCH 33/64] support multi-modal context in ImageColumnConfig --- .../data_designer/config/column_configs.py | 4 + .../column_generators/generators/image.py | 9 +- .../src/data_designer/engine/models/facade.py | 28 +++++- .../data_designer/engine/models/registry.py | 2 +- .../generators/test_image.py | 90 ++++++++++++++++++- 5 files changed, 125 insertions(+), 8 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/column_configs.py b/packages/data-designer-config/src/data_designer/config/column_configs.py index e9d89f4e..e3ea013d 100644 --- a/packages/data-designer-config/src/data_designer/config/column_configs.py +++ b/packages/data-designer-config/src/data_designer/config/column_configs.py @@ -497,10 +497,14 @@ class ImageColumnConfig(SingleColumnConfig): reference other columns (e.g., "Generate an image of a {{ character_name }}"). Must be a valid Jinja2 template. model_alias: The model to use for image generation. + multi_modal_context: Optional list of image contexts for multi-modal generation. + Enables autoregressive multi-modal models to generate images based on image inputs. + Only works with autoregressive models that support image-to-image generation. """ prompt: str model_alias: str + multi_modal_context: list[ImageContext] | None = None column_type: Literal["image"] = "image" @staticmethod diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index c8396e24..11bc732c 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -59,8 +59,15 @@ def generate(self, data: dict) -> dict: if not prompt or not prompt.strip(): raise ValueError(f"Rendered prompt for column {self.config.name!r} is empty") + # Process multi-modal context if provided + multi_modal_context = None + if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0: + multi_modal_context = [] + for context in self.config.multi_modal_context: + multi_modal_context.extend(context.get_contexts(deserialized_record)) + # Generate images (returns list of base64 strings) - base64_images = self.model.generate_image(prompt=prompt) + base64_images = self.model.generate_image(prompt=prompt, multi_modal_context=multi_modal_context) # Store via media storage (mode determines disk vs dataframe storage) results = [self.media_storage.save_base64_image(base64_image) for base64_image in base64_images] diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index 11f6e9ec..51940e99 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -304,7 +304,13 @@ def generate_text_embeddings( self._track_token_usage_from_embedding(response) @catch_llm_exceptions - def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: + def generate_image( + self, + prompt: str, + multi_modal_context: list[dict[str, Any]] | None = None, + skip_usage_tracking: bool = False, + **kwargs, + ) -> list[str]: """Generate image(s) and return base64-encoded data. Automatically detects the appropriate API based on model name: @@ -316,6 +322,8 @@ def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwarg Args: prompt: The prompt for image generation + multi_modal_context: Optional list of image contexts for multi-modal generation. + Only used with autoregressive models via chat completions API. skip_usage_tracking: Whether to skip usage tracking **kwargs: Additional arguments to pass to the model (including n=number of images) @@ -334,7 +342,7 @@ def generate_image(self, prompt: str, skip_usage_tracking: bool = False, **kwarg if is_image_diffusion_model(self.model_name): images = self._generate_image_diffusion(prompt, skip_usage_tracking, **kwargs) else: - images = self._generate_image_chat_completion(prompt, skip_usage_tracking, **kwargs) + images = self._generate_image_chat_completion(prompt, multi_modal_context, skip_usage_tracking, **kwargs) # Track image usage if not skip_usage_tracking and len(images) > 0: @@ -353,14 +361,26 @@ def _get_mcp_facade(self, tool_alias: str | None) -> MCPFacade | None: except ValueError as exc: raise MCPConfigurationError(f"Tool alias {tool_alias!r} is not registered.") from exc - def _generate_image_chat_completion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: + def _generate_image_chat_completion( + self, + prompt: str, + multi_modal_context: list[dict[str, Any]] | None = None, + skip_usage_tracking: bool = False, + **kwargs, + ) -> list[str]: """Generate image(s) using autoregressive model via chat completions API. + Args: + prompt: The prompt for image generation + multi_modal_context: Optional list of image contexts for multi-modal generation + skip_usage_tracking: Whether to skip usage tracking + **kwargs: Additional arguments to pass to the model + Returns: List of base64-encoded image strings """ kwargs = self.consolidate_kwargs(**kwargs) - messages = [ChatMessage.as_user(content=prompt)] + messages = prompt_to_messages(user_prompt=prompt, multi_modal_context=multi_modal_context) response = None try: diff --git a/packages/data-designer-engine/src/data_designer/engine/models/registry.py b/packages/data-designer-engine/src/data_designer/engine/models/registry.py index c6f2b7c7..0b103e76 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/registry.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/registry.py @@ -189,7 +189,7 @@ def run_health_check(self, model_aliases: list[str]) -> None: ) elif model.model_generation_type == GenerationType.IMAGE: model.generate_image( - prompt="Generate a simple pixel", + prompt="Generate a simple illustration of a thumbs up sign.", skip_usage_tracking=True, purpose="running health checks", ) diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py index 80523ff5..b433bc55 100644 --- a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py +++ b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py @@ -6,6 +6,7 @@ import pytest from data_designer.config.column_configs import ImageColumnConfig +from data_designer.config.models import ImageContext, ImageFormat, ModalityDataType from data_designer.engine.column_generators.generators.base import GenerationStrategy from data_designer.engine.column_generators.generators.image import ImageCellGenerator from data_designer.engine.processing.ginja.exceptions import UserTemplateError @@ -58,7 +59,7 @@ def test_image_cell_generator_generate_with_storage( assert data[stub_image_column_config.name] == ["images/uuid1.png", "images/uuid2.png"] # Verify model was called with rendered prompt - mock_generate.assert_called_once_with(prompt="A photorealistic image of cat") + mock_generate.assert_called_once_with(prompt="A photorealistic image of cat", multi_modal_context=None) # Verify storage was called for each image assert mock_storage.save_base64_image.call_count == 2 @@ -88,7 +89,7 @@ def test_image_cell_generator_generate_in_dataframe_mode( assert data[stub_image_column_config.name] == stub_base64_images # Verify model was called with rendered prompt - mock_generate.assert_called_once_with(prompt="A watercolor image of dog") + mock_generate.assert_called_once_with(prompt="A watercolor image of dog", multi_modal_context=None) def test_image_cell_generator_missing_columns_error(stub_image_column_config, stub_resource_provider): @@ -119,3 +120,88 @@ def test_image_cell_generator_whitespace_only_prompt_error(stub_resource_provide with pytest.raises(ValueError, match="empty"): generator.generate(data={"spaces": " "}) + + +def test_image_cell_generator_with_multi_modal_context(stub_resource_provider): + """Test generate with multi-modal context for autoregressive models.""" + # Create image context that references a column with URL + image_context = ImageContext(column_name="reference_image", data_type=ModalityDataType.URL) + + config = ImageColumnConfig( + name="test_image", + prompt="Generate a similar image to the reference", + model_alias="test_model", + multi_modal_context=[image_context], + ) + + # Setup mock media storage + mock_storage = Mock() + mock_storage.save_base64_image.return_value = "images/generated.png" + stub_resource_provider.artifact_storage.media_storage = mock_storage + + stub_base64_images = ["base64_generated_image"] + + with patch.object( + stub_resource_provider.model_registry.get_model.return_value, + "generate_image", + return_value=stub_base64_images, + ) as mock_generate: + generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) + data = generator.generate(data={"reference_image": "https://example.com/image.png"}) + + # Check that column was added + assert config.name in data + assert data[config.name] == ["images/generated.png"] + + # Verify model was called with prompt and multi_modal_context + mock_generate.assert_called_once() + call_args = mock_generate.call_args + assert call_args.kwargs["prompt"] == "Generate a similar image to the reference" + assert call_args.kwargs["multi_modal_context"] is not None + assert len(call_args.kwargs["multi_modal_context"]) == 1 + assert call_args.kwargs["multi_modal_context"][0]["type"] == "image_url" + assert call_args.kwargs["multi_modal_context"][0]["image_url"] == "https://example.com/image.png" + + +def test_image_cell_generator_with_base64_multi_modal_context(stub_resource_provider): + """Test generate with base64 multi-modal context.""" + # Create image context that references a column with base64 data + image_context = ImageContext( + column_name="reference_image", data_type=ModalityDataType.BASE64, image_format=ImageFormat.PNG + ) + + config = ImageColumnConfig( + name="test_image", + prompt="Generate a variation of this image", + model_alias="test_model", + multi_modal_context=[image_context], + ) + + # Setup mock media storage + mock_storage = Mock() + mock_storage.save_base64_image.return_value = "images/generated.png" + stub_resource_provider.artifact_storage.media_storage = mock_storage + + stub_base64_images = ["base64_generated_image"] + + with patch.object( + stub_resource_provider.model_registry.get_model.return_value, + "generate_image", + return_value=stub_base64_images, + ) as mock_generate: + generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) + data = generator.generate(data={"reference_image": "iVBORw0KGgoAAAANS"}) + + # Check that column was added + assert config.name in data + assert data[config.name] == ["images/generated.png"] + + # Verify model was called with prompt and multi_modal_context + mock_generate.assert_called_once() + call_args = mock_generate.call_args + assert call_args.kwargs["prompt"] == "Generate a variation of this image" + assert call_args.kwargs["multi_modal_context"] is not None + assert len(call_args.kwargs["multi_modal_context"]) == 1 + assert call_args.kwargs["multi_modal_context"][0]["type"] == "image_url" + # Should be formatted as data URI + assert "data:image/png;base64," in call_args.kwargs["multi_modal_context"][0]["image_url"]["url"] From d6a0f2fcb8b0acb664e6c101a14ae1910095fb62 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 18:39:53 -0700 Subject: [PATCH 34/64] updated tutorial notebook --- docs/notebook_source/5-generating-images.py | 116 +++++++++++++++++--- 1 file changed, 100 insertions(+), 16 deletions(-) diff --git a/docs/notebook_source/5-generating-images.py b/docs/notebook_source/5-generating-images.py index aee5a0c1..28638ff9 100644 --- a/docs/notebook_source/5-generating-images.py +++ b/docs/notebook_source/5-generating-images.py @@ -69,7 +69,7 @@ model=MODEL_ID, provider=MODEL_PROVIDER, inference_parameters=dd.ImageInferenceParams( - extra_body={"size": "1024x1024"}, + extra_body={"height": 512, "width": 512}, ), ) ] @@ -85,7 +85,24 @@ config_builder.add_column( dd.SamplerColumnConfig( - name="subject", + name="style", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=[ + "photorealistic", + "oil painting", + "watercolor", + "digital art", + "sketch", + "anime", + ], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="dog_breed", sampler_type=dd.SamplerType.CATEGORY, params=dd.CategorySamplerParams( values=[ @@ -98,6 +115,58 @@ "a Corgi", "a Siberian Husky", "a Dalmatian", + "a Yorkshire Terrier", + "a Boxer", + "a Dachshund", + "a Doberman Pinscher", + "a Shih Tzu", + "a Chihuahua", + "a Border Collie", + "an Australian Shepherd", + "a Cocker Spaniel", + "a Maltese", + "a Pomeranian", + "a Saint Bernard", + "a Great Dane", + "an Akita", + "a Samoyed", + "a Boston Terrier", + ], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="cat_breed", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=[ + "a Persian", + "a Maine Coon", + "a Siamese", + "a Ragdoll", + "a Bengal", + "an Abyssinian", + "a British Shorthair", + "a Sphynx", + "a Scottish Fold", + "a Russian Blue", + "a Birman", + "an Oriental Shorthair", + "a Norwegian Forest Cat", + "a Devon Rex", + "a Burmese", + "an Egyptian Mau", + "a Tonkinese", + "a Himalayan", + "a Savannah", + "a Chartreux", + "a Somali", + "a Manx", + "a Turkish Angora", + "a Balinese", + "an American Shorthair", ], ), ) @@ -105,7 +174,7 @@ config_builder.add_column( dd.SamplerColumnConfig( - name="age", + name="dog_age", sampler_type=dd.SamplerType.CATEGORY, params=dd.CategorySamplerParams( values=["1-3", "3-6", "6-9", "9-12", "12-15"], @@ -115,24 +184,27 @@ config_builder.add_column( dd.SamplerColumnConfig( - name="style", + name="cat_age", sampler_type=dd.SamplerType.CATEGORY, params=dd.CategorySamplerParams( - values=[ - "photorealistic", - "oil painting", - "watercolor", - "digital art", - "sketch", - "anime", - ], + values=["1-3", "3-6", "6-9", "9-12", "12-18"], + ), + ) +) + +config_builder.add_column( + dd.SamplerColumnConfig( + name="dog_look_direction", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=["left", "right", "front", "up", "down"], ), ) ) config_builder.add_column( dd.SamplerColumnConfig( - name="look_direction", + name="cat_look_direction", sampler_type=dd.SamplerType.CATEGORY, params=dd.CategorySamplerParams( values=["left", "right", "front", "up", "down"], @@ -142,7 +214,7 @@ config_builder.add_column( dd.SamplerColumnConfig( - name="emotion", + name="dog_emotion", sampler_type=dd.SamplerType.CATEGORY, params=dd.CategorySamplerParams( values=["happy", "curious", "serious", "sleepy", "excited"], @@ -150,12 +222,24 @@ ) ) +config_builder.add_column( + dd.SamplerColumnConfig( + name="cat_emotion", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=["aloof", "curious", "content", "sleepy", "playful"], + ), + ) +) + config_builder.add_column( dd.ImageColumnConfig( name="generated_image", prompt=( - "A {{ style }} portrait of {{ subject }} {{ age }} years old looking {{ look_direction }} " - "towards a crowd of the same kind with an {{ emotion }} expression." + """ +A {{ style }} family pet portrait of a {{ dog_breed }} dog of {{ dog_age }} years old looking {{dog_look_direction}} with an {{ dog_emotion }} expression and +{{ cat_breed }} cat of {{ cat_age }} years old looking {{ cat_look_direction }} with an {{ cat_emotion }} expression in the background. Both subjects should be in focus. + """ ), model_alias=MODEL_ALIAS, ) From f5c6cf9418bd432d4b121d6ce82e7f30586e43df Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 18:57:09 -0700 Subject: [PATCH 35/64] organize image artifacts by column name --- .../column_generators/generators/image.py | 6 +- .../engine/storage/media_storage.py | 18 ++-- .../generators/test_image.py | 23 +++-- .../engine/storage/test_media_storage.py | 92 +++++++++++++++---- 4 files changed, 107 insertions(+), 32 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 11bc732c..55721916 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -70,7 +70,11 @@ def generate(self, data: dict) -> dict: base64_images = self.model.generate_image(prompt=prompt, multi_modal_context=multi_modal_context) # Store via media storage (mode determines disk vs dataframe storage) - results = [self.media_storage.save_base64_image(base64_image) for base64_image in base64_images] + # Use column name as subfolder to organize images + results = [ + self.media_storage.save_base64_image(base64_image, subfolder_name=self.config.name) + for base64_image in base64_images + ] data[self.config.name] = results return data diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py index ddac3459..df83e331 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py @@ -66,14 +66,15 @@ def _ensure_images_directory(self) -> None: """Create images directory if it doesn't exist (lazy initialization).""" self.images_dir.mkdir(parents=True, exist_ok=True) - def save_base64_image(self, base64_data: str) -> str: + def save_base64_image(self, base64_data: str, subfolder_name: str) -> str: """Save or return base64 image based on storage mode. Args: base64_data: Base64 encoded image string (with or without data URI prefix) + subfolder_name: Subfolder name to organize images (e.g., "images//") Returns: - DISK mode: Relative path to saved image (e.g., "images/f47ac10b-58cc.png") + DISK mode: Relative path to saved image (e.g., "images/subfolder_name/f47ac10b-58cc.png") DATAFRAME mode: Original base64 data string Raises: @@ -85,8 +86,11 @@ def save_base64_image(self, base64_data: str) -> str: return base64_data # DISK mode: save to disk, validate, and return relative path - # Ensure images directory exists (lazy initialization) - self._ensure_images_directory() + # Determine the target directory (organized by subfolder) + target_dir = self.images_dir / subfolder_name + + # Ensure target directory exists (lazy initialization) + target_dir.mkdir(parents=True, exist_ok=True) # Decode base64 to bytes image_bytes = decode_base64_image(base64_data) @@ -97,8 +101,10 @@ def save_base64_image(self, base64_data: str) -> str: # Generate unique filename image_id = uuid.uuid4() filename = f"{image_id}.{image_format.value}" - full_path = self.images_dir / filename - relative_path = f"{self.images_subdir}/{filename}" + full_path = target_dir / filename + + # Build relative path + relative_path = f"{self.images_subdir}/{subfolder_name}/{filename}" # Write to disk with open(full_path, "wb") as f: diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py index b433bc55..ca5cbfae 100644 --- a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py +++ b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py @@ -43,7 +43,10 @@ def test_image_cell_generator_generate_with_storage( """Test generate with media storage (create mode) - saves to disk.""" # Setup mock media storage mock_storage = Mock() - mock_storage.save_base64_image.side_effect = ["images/uuid1.png", "images/uuid2.png"] + mock_storage.save_base64_image.side_effect = [ + "images/test_image/uuid1.png", + "images/test_image/uuid2.png", + ] stub_resource_provider.artifact_storage.media_storage = mock_storage with patch.object( @@ -54,17 +57,20 @@ def test_image_cell_generator_generate_with_storage( generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) data = generator.generate(data={"style": "photorealistic", "subject": "cat"}) - # Check that column was added with relative paths + # Check that column was added with relative paths (organized in subfolder) assert stub_image_column_config.name in data - assert data[stub_image_column_config.name] == ["images/uuid1.png", "images/uuid2.png"] + assert data[stub_image_column_config.name] == [ + "images/test_image/uuid1.png", + "images/test_image/uuid2.png", + ] # Verify model was called with rendered prompt mock_generate.assert_called_once_with(prompt="A photorealistic image of cat", multi_modal_context=None) - # Verify storage was called for each image + # Verify storage was called for each image with subfolder name assert mock_storage.save_base64_image.call_count == 2 - mock_storage.save_base64_image.assert_any_call("base64_image_1") - mock_storage.save_base64_image.assert_any_call("base64_image_2") + mock_storage.save_base64_image.assert_any_call("base64_image_1", subfolder_name="test_image") + mock_storage.save_base64_image.assert_any_call("base64_image_2", subfolder_name="test_image") def test_image_cell_generator_generate_in_dataframe_mode( @@ -91,6 +97,11 @@ def test_image_cell_generator_generate_in_dataframe_mode( # Verify model was called with rendered prompt mock_generate.assert_called_once_with(prompt="A watercolor image of dog", multi_modal_context=None) + # Verify storage was called for each image with subfolder name (even in DATAFRAME mode) + assert mock_storage.save_base64_image.call_count == 2 + mock_storage.save_base64_image.assert_any_call("base64_image_1", subfolder_name="test_image") + mock_storage.save_base64_image.assert_any_call("base64_image_2", subfolder_name="test_image") + def test_image_cell_generator_missing_columns_error(stub_image_column_config, stub_resource_provider): """Test that missing required columns raises ValueError.""" diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py index abd17afe..105348d2 100644 --- a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -62,10 +62,10 @@ def test_media_storage_init_custom_subdir(tmp_path): def test_save_base64_image_png(media_storage, sample_base64_png): """Test saving a PNG image from base64.""" - relative_path = media_storage.save_base64_image(sample_base64_png) + relative_path = media_storage.save_base64_image(sample_base64_png, subfolder_name="test_column") - # Check return value format - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + # Check return value format (organized by column name) + assert relative_path.startswith(f"{IMAGES_SUBDIR}/test_column/") assert relative_path.endswith(".png") # Check file exists on disk @@ -80,10 +80,10 @@ def test_save_base64_image_png(media_storage, sample_base64_png): def test_save_base64_image_jpg(media_storage, sample_base64_jpg): """Test saving a JPEG image from base64.""" - relative_path = media_storage.save_base64_image(sample_base64_jpg) + relative_path = media_storage.save_base64_image(sample_base64_jpg, subfolder_name="test_column") - # Check return value format - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + # Check return value format (organized by column name) + assert relative_path.startswith(f"{IMAGES_SUBDIR}/test_column/") assert relative_path.endswith(".jpg") # Check file exists on disk @@ -94,10 +94,10 @@ def test_save_base64_image_jpg(media_storage, sample_base64_jpg): def test_save_base64_image_with_data_uri(media_storage, sample_base64_png): """Test saving image from data URI format.""" data_uri = f"data:image/png;base64,{sample_base64_png}" - relative_path = media_storage.save_base64_image(data_uri) + relative_path = media_storage.save_base64_image(data_uri, subfolder_name="test_column") - # Should successfully extract base64 and save - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + # Should successfully extract base64 and save (organized by column name) + assert relative_path.startswith(f"{IMAGES_SUBDIR}/test_column/") assert relative_path.endswith(".png") # Verify file exists and content is correct @@ -111,13 +111,13 @@ def test_save_base64_image_with_data_uri(media_storage, sample_base64_png): def test_save_base64_image_invalid_base64_raises_error(media_storage): """Test that invalid base64 data raises ValueError.""" with pytest.raises(ValueError, match="Invalid base64"): - media_storage.save_base64_image("not-valid-base64!!!") + media_storage.save_base64_image("not-valid-base64!!!", subfolder_name="test_column") def test_save_base64_image_multiple_images_unique_filenames(media_storage, sample_base64_png): """Test that multiple images get unique filenames.""" - path1 = media_storage.save_base64_image(sample_base64_png) - path2 = media_storage.save_base64_image(sample_base64_png) + path1 = media_storage.save_base64_image(sample_base64_png, subfolder_name="test_column") + path2 = media_storage.save_base64_image(sample_base64_png, subfolder_name="test_column") # Paths should be different (different UUIDs) assert path1 != path2 @@ -131,8 +131,8 @@ def test_save_base64_image_disk_mode_validates(tmp_path, sample_base64_png): """Test that DISK mode validates images.""" storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DISK) # Should succeed with valid image - relative_path = storage.save_base64_image(sample_base64_png) - assert relative_path.startswith(f"{IMAGES_SUBDIR}/") + relative_path = storage.save_base64_image(sample_base64_png, subfolder_name="test_column") + assert relative_path.startswith(f"{IMAGES_SUBDIR}/test_column/") def test_save_base64_image_disk_mode_corrupted_image_raises_error(tmp_path): @@ -144,18 +144,20 @@ def test_save_base64_image_disk_mode_corrupted_image_raises_error(tmp_path): corrupted_base64 = base64.b64encode(corrupted_bytes).decode() with pytest.raises(ValueError, match="Image validation failed"): - storage.save_base64_image(corrupted_base64) + storage.save_base64_image(corrupted_base64, subfolder_name="test_column") # Check that no files were left behind (cleanup on validation failure) - assert len(list(storage.images_dir.iterdir())) == 0 + column_dir = storage.images_dir / "test_column" + if column_dir.exists(): + assert len(list(column_dir.iterdir())) == 0 def test_save_base64_image_dataframe_mode_returns_base64(tmp_path, sample_base64_png): """Test that DATAFRAME mode returns base64 directly without disk operations.""" storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DATAFRAME) - # Should return the same base64 data - result = storage.save_base64_image(sample_base64_png) + # Should return the same base64 data (column_name is ignored in DATAFRAME mode) + result = storage.save_base64_image(sample_base64_png, subfolder_name="test_column") assert result == sample_base64_png # Directory should not be created in DATAFRAME mode (lazy initialization) @@ -165,10 +167,62 @@ def test_save_base64_image_dataframe_mode_returns_base64(tmp_path, sample_base64 def test_cleanup(media_storage, sample_base64_png): """Test cleanup removes images directory.""" # Save an image first - media_storage.save_base64_image(sample_base64_png) + media_storage.save_base64_image(sample_base64_png, subfolder_name="test_column") assert media_storage.images_dir.exists() assert len(list(media_storage.images_dir.iterdir())) > 0 # Cleanup should remove directory media_storage.cleanup() assert not media_storage.images_dir.exists() + + +def test_save_base64_image_with_subfolder_name(media_storage, sample_base64_png): + """Test saving image with subfolder name organizes into subdirectory.""" + subfolder = "test_subfolder" + relative_path = media_storage.save_base64_image(sample_base64_png, subfolder_name=subfolder) + + # Check return value format includes subfolder + assert relative_path.startswith(f"{IMAGES_SUBDIR}/{subfolder}/") + assert relative_path.endswith(".png") + + # Check file exists in correct subdirectory + full_path = media_storage.base_path / relative_path + assert full_path.exists() + assert full_path.parent.name == subfolder + + # Verify file content + saved_bytes = full_path.read_bytes() + expected_bytes = base64.b64decode(sample_base64_png) + assert saved_bytes == expected_bytes + + +def test_save_base64_image_with_different_subfolder_names(media_storage, sample_base64_png, sample_base64_jpg): + """Test that images with different subfolder names are stored in separate subdirectories.""" + path1 = media_storage.save_base64_image(sample_base64_png, subfolder_name="subfolder_a") + path2 = media_storage.save_base64_image(sample_base64_jpg, subfolder_name="subfolder_b") + + # Check paths are in different subdirectories + assert "subfolder_a" in path1 + assert "subfolder_b" in path2 + + # Check both directories exist + subfolder_a_dir = media_storage.images_dir / "subfolder_a" + subfolder_b_dir = media_storage.images_dir / "subfolder_b" + assert subfolder_a_dir.exists() + assert subfolder_b_dir.exists() + + # Check files exist in their respective directories + assert (media_storage.base_path / path1).exists() + assert (media_storage.base_path / path2).exists() + + +def test_save_base64_image_dataframe_mode_with_subfolder_name(tmp_path, sample_base64_png): + """Test that DATAFRAME mode returns base64 directly even with subfolder name.""" + storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DATAFRAME) + + # Should return the same base64 data regardless of subfolder name + result = storage.save_base64_image(sample_base64_png, subfolder_name="test_subfolder") + assert result == sample_base64_png + + # Directory should not be created in DATAFRAME mode + assert not storage.images_dir.exists() From 71e2bac46a4e952980498da69845d515cc024635 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 19:33:56 -0700 Subject: [PATCH 36/64] address pr comments --- .../config/utils/visualization.py | 10 ++++--- .../src/data_designer/engine/models/facade.py | 26 ++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 910bc467..9d65cca5 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -3,6 +3,7 @@ from __future__ import annotations +import html import json import os from collections import OrderedDict @@ -81,14 +82,17 @@ def _display_image_if_in_notebook(image_data: str, col_name: str, base_path: str # Use the base64 data directly without resizing img_base64 = base64_data + # Escape column name to prevent HTML injection + escaped_col_name = html.escape(col_name) + # Create HTML with caption and image in left-aligned container - html = f""" + html_content = f"""
-
πŸ–ΌοΈ {col_name}
+
πŸ–ΌοΈ {escaped_col_name}
""" - display(HTML(html)) + display(HTML(html_content)) return True except (ImportError, NameError): # Not in a notebook environment diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index 51940e99..a14231ab 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -11,6 +11,7 @@ from data_designer.config.models import GenerationType, ModelConfig, ModelProvider from data_designer.config.utils.image_helpers import ( extract_base64_from_data_uri, + is_base64_image, is_image_diffusion_model, ) from data_designer.engine.mcp.errors import MCPConfigurationError @@ -40,6 +41,14 @@ def _identity(x: Any) -> Any: return x +def _try_extract_base64(data: str) -> str | None: + """Try to extract base64 image data from a data URI, returning None on failure.""" + try: + return extract_base64_from_data_uri(data) + except ValueError: + return None + + logger = logging.getLogger(__name__) @@ -410,19 +419,22 @@ def _generate_image_chat_completion( image_url = image["image_url"] if isinstance(image_url, dict) and "url" in image_url: - url = image_url["url"] - images.append(extract_base64_from_data_uri(url)) + if (b64 := _try_extract_base64(image_url["url"])) is not None: + images.append(b64) elif isinstance(image_url, str): - images.append(extract_base64_from_data_uri(image_url)) + if (b64 := _try_extract_base64(image_url)) is not None: + images.append(b64) # Fallback: treat as base64 string elif isinstance(image, str): - images.append(extract_base64_from_data_uri(image)) + if (b64 := _try_extract_base64(image)) is not None: + images.append(b64) - # Fallback: check content field + # Fallback: check content field if it looks like image data if not images: content = message.content or "" - if content: - images.append(extract_base64_from_data_uri(content)) + if content and (content.startswith("data:image/") or is_base64_image(content)): + if (b64 := _try_extract_base64(content)) is not None: + images.append(b64) if not images: raise ModelAPIError("No image data found in response") From 46138d81c3917a6abb967ae321b2216770fdc6fa Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 19:34:30 -0700 Subject: [PATCH 37/64] fix license headers --- .../data_designer/engine/column_generators/generators/image.py | 2 +- .../src/data_designer/engine/storage/__init__.py | 2 +- .../src/data_designer/engine/storage/media_storage.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 55721916..730e73bb 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py index 34c776d5..9d416c65 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from data_designer.engine.storage.media_storage import MediaStorage, StorageMode diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py index df83e331..9adefc89 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations From deb5fc2bdd7f05373ca5559a558441644581c3b3 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 19:37:19 -0700 Subject: [PATCH 38/64] generate collab notebooks --- docs/colab_notebooks/1-the-basics.ipynb | 66 +-- ...ctured-outputs-and-jinja-expressions.ipynb | 62 +-- .../3-seeding-with-a-dataset.ipynb | 58 +-- .../4-providing-images-as-context.ipynb | 70 +-- .../colab_notebooks/5-generating-images.ipynb | 437 ++++++++++++++++++ 5 files changed, 569 insertions(+), 124 deletions(-) create mode 100644 docs/colab_notebooks/5-generating-images.ipynb diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb index ec2c5a99..ed8942df 100644 --- a/docs/colab_notebooks/1-the-basics.ipynb +++ b/docs/colab_notebooks/1-the-basics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "c79eea7a", + "id": "945eebf8", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: The Basics\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "2476f160", + "id": "8e8f2e22", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -26,7 +26,7 @@ }, { "cell_type": "markdown", - "id": "3646f62e", + "id": "92d91bf1", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -37,7 +37,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3348e5c8", + "id": "0b9b4427", "metadata": {}, "outputs": [], "source": [ @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19cd9249", + "id": "8878d172", "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a6d13a9", + "id": "4c92bfb3", "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "markdown", - "id": "d445af5b", + "id": "4e39eed1", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -89,7 +89,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4df0031d", + "id": "70c96cfb", "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ }, { "cell_type": "markdown", - "id": "0f69b576", + "id": "99d975c9", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -115,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "65d9be99", + "id": "851228c8", "metadata": {}, "outputs": [], "source": [ @@ -145,7 +145,7 @@ }, { "cell_type": "markdown", - "id": "72582d09", + "id": "fefb639d", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -160,7 +160,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8d7992b4", + "id": "0ba52672", "metadata": {}, "outputs": [], "source": [ @@ -169,7 +169,7 @@ }, { "cell_type": "markdown", - "id": "741a15a0", + "id": "7cc2aefc", "metadata": {}, "source": [ "## 🎲 Getting started with sampler columns\n", @@ -186,7 +186,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3879c70", + "id": "a5a34b1a", "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "markdown", - "id": "1575ef81", + "id": "ee4d1b6a", "metadata": {}, "source": [ "Let's start designing our product review dataset by adding product category and subcategory columns.\n" @@ -204,7 +204,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87a88d7b", + "id": "7782d790", "metadata": {}, "outputs": [], "source": [ @@ -285,7 +285,7 @@ }, { "cell_type": "markdown", - "id": "8c74b738", + "id": "f88e8b18", "metadata": {}, "source": [ "Next, let's add samplers to generate data related to the customer and their review.\n" @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4eb1da1f", + "id": "19174a73", "metadata": {}, "outputs": [], "source": [ @@ -331,7 +331,7 @@ }, { "cell_type": "markdown", - "id": "4324d869", + "id": "01438115", "metadata": {}, "source": [ "## 🦜 LLM-generated columns\n", @@ -346,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1302a503", + "id": "9c8f1275", "metadata": {}, "outputs": [], "source": [ @@ -382,7 +382,7 @@ }, { "cell_type": "markdown", - "id": "7cf8241b", + "id": "f61e3771", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -399,7 +399,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6fc6cf39", + "id": "7f8dc56e", "metadata": {}, "outputs": [], "source": [ @@ -409,7 +409,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c929e068", + "id": "5b66172a", "metadata": {}, "outputs": [], "source": [ @@ -420,7 +420,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dfb04e2a", + "id": "b0eaa931", "metadata": {}, "outputs": [], "source": [ @@ -430,7 +430,7 @@ }, { "cell_type": "markdown", - "id": "adb879da", + "id": "122d099d", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -443,7 +443,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ff58dd9f", + "id": "f40f7ba0", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +453,7 @@ }, { "cell_type": "markdown", - "id": "57c7355d", + "id": "597c41ec", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -466,7 +466,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df49db99", + "id": "acf8caa3", "metadata": {}, "outputs": [], "source": [ @@ -476,7 +476,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2bbc48dd", + "id": "697e9090", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +489,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dc0673fa", + "id": "18f34e66", "metadata": {}, "outputs": [], "source": [ @@ -501,7 +501,7 @@ }, { "cell_type": "markdown", - "id": "7688217b", + "id": "4c498f62", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", @@ -512,7 +512,9 @@ "\n", "- [Seeding synthetic data generation with an external dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n", "\n", - "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n" + "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n", + "\n", + "- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)\n" ] } ], diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb index c813ea50..49be6edb 100644 --- a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "258752cd", + "id": "bd333de9", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "fc4217c3", + "id": "28fb2ee3", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "2b831130", + "id": "fbeb3b2d", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -39,7 +39,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fa1eda43", + "id": "6ef3d2ae", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5f014571", + "id": "07546806", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f7409282", + "id": "81b00725", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "markdown", - "id": "8234dd4b", + "id": "a5cf694f", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21633aed", + "id": "8320e2b0", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "9b215265", + "id": "348e2c5a", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "76260638", + "id": "21019fc5", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "e6bfd93d", + "id": "7bf9d9af", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -162,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a0fbd497", + "id": "88abb685", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "7faae40e", + "id": "d8e790c6", "metadata": {}, "source": [ "### πŸ§‘β€πŸŽ¨ Designing our data\n", @@ -198,7 +198,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f2f94909", + "id": "64465ab1", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ }, { "cell_type": "markdown", - "id": "696f19f4", + "id": "cfbad124", "metadata": {}, "source": [ "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n" @@ -235,7 +235,7 @@ { "cell_type": "code", "execution_count": null, - "id": "312b50cd", + "id": "aa93a4c9", "metadata": {}, "outputs": [], "source": [ @@ -344,7 +344,7 @@ }, { "cell_type": "markdown", - "id": "ecd971ca", + "id": "74aa72fc", "metadata": {}, "source": [ "Next, we will use more advanced Jinja expressions to create new columns.\n", @@ -361,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bda01ffc", + "id": "9ae978cc", "metadata": {}, "outputs": [], "source": [ @@ -414,7 +414,7 @@ }, { "cell_type": "markdown", - "id": "059613e1", + "id": "ec850f14", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -431,7 +431,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23c9b839", + "id": "cb18575e", "metadata": {}, "outputs": [], "source": [ @@ -441,7 +441,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e5adcdbd", + "id": "eee46dc6", "metadata": {}, "outputs": [], "source": [ @@ -452,7 +452,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1cc39cae", + "id": "082d0fc4", "metadata": {}, "outputs": [], "source": [ @@ -462,7 +462,7 @@ }, { "cell_type": "markdown", - "id": "bcca3f06", + "id": "e8d80b94", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -475,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6e1957ca", + "id": "4b0a7299", "metadata": {}, "outputs": [], "source": [ @@ -485,7 +485,7 @@ }, { "cell_type": "markdown", - "id": "9db283d3", + "id": "d7e0c925", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -498,7 +498,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30826883", + "id": "b599d759", "metadata": {}, "outputs": [], "source": [ @@ -508,7 +508,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88d4d3bd", + "id": "07a7c0da", "metadata": {}, "outputs": [], "source": [ @@ -521,7 +521,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8762a2bb", + "id": "7760dffa", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ }, { "cell_type": "markdown", - "id": "0375fcd2", + "id": "6d19000a", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", @@ -542,7 +542,9 @@ "\n", "- [Seeding synthetic data generation with an external dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n", "\n", - "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n" + "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n", + "\n", + "- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)\n" ] } ], diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb index c5d427d0..468aa795 100644 --- a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b2a3e544", + "id": "573c3e7b", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "d57c4f0a", + "id": "63f6c36d", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "f7da8723", + "id": "02cc81c7", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -39,7 +39,7 @@ { "cell_type": "code", "execution_count": null, - "id": "90a12556", + "id": "18d51631", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8fcdfde5", + "id": "67c55f6b", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5899e85c", + "id": "cfe2ff62", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "markdown", - "id": "6c093c90", + "id": "bdbc5b03", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6a2066fe", + "id": "55d9caf1", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "f5e81142", + "id": "aa1623bc", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "880012ea", + "id": "9d1310cf", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "4b77a92c", + "id": "e64ce3b7", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -162,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f4ab6628", + "id": "dafd6155", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "26fb0a63", + "id": "7c01f11c", "metadata": {}, "source": [ "## πŸ₯ Prepare a seed dataset\n", @@ -196,7 +196,7 @@ { "cell_type": "code", "execution_count": null, - "id": "84908e88", + "id": "7941073f", "metadata": {}, "outputs": [], "source": [ @@ -214,7 +214,7 @@ }, { "cell_type": "markdown", - "id": "1947e70a", + "id": "a68c7d55", "metadata": {}, "source": [ "## 🎨 Designing our synthetic patient notes dataset\n", @@ -227,7 +227,7 @@ { "cell_type": "code", "execution_count": null, - "id": "be2fbad1", + "id": "f1b3d4d4", "metadata": {}, "outputs": [], "source": [ @@ -308,7 +308,7 @@ }, { "cell_type": "markdown", - "id": "8fcce5dc", + "id": "eff1bf9f", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -325,7 +325,7 @@ { "cell_type": "code", "execution_count": null, - "id": "82dc02f8", + "id": "b5955230", "metadata": {}, "outputs": [], "source": [ @@ -335,7 +335,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1f2d1583", + "id": "062a7294", "metadata": {}, "outputs": [], "source": [ @@ -346,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "62a9173b", + "id": "6378e1be", "metadata": {}, "outputs": [], "source": [ @@ -356,7 +356,7 @@ }, { "cell_type": "markdown", - "id": "5263e705", + "id": "51e5175e", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -369,7 +369,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5295320f", + "id": "891b6860", "metadata": {}, "outputs": [], "source": [ @@ -379,7 +379,7 @@ }, { "cell_type": "markdown", - "id": "3ecc195f", + "id": "0f52668f", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -392,7 +392,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3865fb59", + "id": "ed083bd8", "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7acf2b0", + "id": "039c42e4", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +415,7 @@ { "cell_type": "code", "execution_count": null, - "id": "81a6e999", + "id": "623ca205", "metadata": {}, "outputs": [], "source": [ @@ -427,14 +427,16 @@ }, { "cell_type": "markdown", - "id": "4503b1cf", + "id": "0a7e7d42", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", "\n", "Check out the following notebook to learn more about:\n", "\n", - "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n" + "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n", + "\n", + "- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)\n" ] } ], diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb index cd175537..62ac63e8 100644 --- a/docs/colab_notebooks/4-providing-images-as-context.ipynb +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "90dda708", + "id": "731384ed", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "52ccb1e5", + "id": "bc66dd23", "metadata": {}, "source": [ "#### πŸ“š What you'll learn\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "9627c4eb", + "id": "4539a931", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -37,7 +37,7 @@ }, { "cell_type": "markdown", - "id": "1817171a", + "id": "f88809bf", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1f15a669", + "id": "3628d4c4", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1201c93b", + "id": "7fcf0f75", "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f814b76c", + "id": "6654714a", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "ac423d57", + "id": "22488cb7", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -113,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3c655c2d", + "id": "39913ca0", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +122,7 @@ }, { "cell_type": "markdown", - "id": "7d41e922", + "id": "fba112ab", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -139,7 +139,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a8b5f4bf", + "id": "70fd86dd", "metadata": {}, "outputs": [], "source": [ @@ -162,7 +162,7 @@ }, { "cell_type": "markdown", - "id": "6455fc58", + "id": "810c7457", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -177,7 +177,7 @@ { "cell_type": "code", "execution_count": null, - "id": "462c2e01", + "id": "9b2204d0", "metadata": {}, "outputs": [], "source": [ @@ -186,7 +186,7 @@ }, { "cell_type": "markdown", - "id": "31369d10", + "id": "29e3dae5", "metadata": {}, "source": [ "### 🌱 Seed Dataset Creation\n", @@ -203,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55d9432a", + "id": "e2cc3506", "metadata": {}, "outputs": [], "source": [ @@ -218,7 +218,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8614c4e9", + "id": "7a821067", "metadata": {}, "outputs": [], "source": [ @@ -266,7 +266,7 @@ { "cell_type": "code", "execution_count": null, - "id": "80550e46", + "id": "359d144b", "metadata": {}, "outputs": [], "source": [ @@ -284,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "65ced9bb", + "id": "985cd308", "metadata": {}, "outputs": [], "source": [ @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34b210e8", + "id": "6a8cb414", "metadata": {}, "outputs": [], "source": [ @@ -306,7 +306,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d506903d", + "id": "a57e1b73", "metadata": {}, "outputs": [], "source": [ @@ -335,7 +335,7 @@ }, { "cell_type": "markdown", - "id": "b91032a2", + "id": "7518100a", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -352,7 +352,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4bd947de", + "id": "4c1fe540", "metadata": {}, "outputs": [], "source": [ @@ -362,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d0ff4c07", + "id": "bceafe91", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +373,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e97e4dfe", + "id": "20f4ace5", "metadata": {}, "outputs": [], "source": [ @@ -383,7 +383,7 @@ }, { "cell_type": "markdown", - "id": "0a284c12", + "id": "16a86d56", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -396,7 +396,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2570e7fd", + "id": "c1bbae97", "metadata": {}, "outputs": [], "source": [ @@ -406,7 +406,7 @@ }, { "cell_type": "markdown", - "id": "28b8eb5a", + "id": "d8d7604f", "metadata": {}, "source": [ "### πŸ”Ž Visual Inspection\n", @@ -417,7 +417,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5d0d9336", + "id": "27c0636c", "metadata": { "lines_to_next_cell": 2 }, @@ -441,7 +441,7 @@ }, { "cell_type": "markdown", - "id": "1c257a81", + "id": "f6b99539", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -454,7 +454,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e6d840e9", + "id": "e5d53787", "metadata": {}, "outputs": [], "source": [ @@ -464,7 +464,7 @@ { "cell_type": "code", "execution_count": null, - "id": "909e6f3f", + "id": "1f859e49", "metadata": {}, "outputs": [], "source": [ @@ -477,7 +477,7 @@ { "cell_type": "code", "execution_count": null, - "id": "adbb4cae", + "id": "6688e3c5", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +489,7 @@ }, { "cell_type": "markdown", - "id": "d085584c", + "id": "28635b09", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", @@ -499,7 +499,9 @@ "- Experiment with different vision models for specific document types\n", "- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)\n", "- Combine vision-based summaries with other column types for multi-modal workflows\n", - "- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering\n" + "- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering\n", + "\n", + "- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) with Data Designer\n" ] } ], diff --git a/docs/colab_notebooks/5-generating-images.ipynb b/docs/colab_notebooks/5-generating-images.ipynb new file mode 100644 index 00000000..485fe258 --- /dev/null +++ b/docs/colab_notebooks/5-generating-images.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ee289e6", + "metadata": {}, + "source": [ + "# 🎨 Data Designer Tutorial: Generating Images\n", + "\n", + "#### πŸ“š What you'll learn\n", + "\n", + "This notebook shows how to generate synthetic image data with Data Designer using image-generation models.\n", + "\n", + "- πŸ–ΌοΈ **Image generation columns**: Add columns that produce images from text prompts\n", + "- πŸ“ **Jinja2 prompts**: Drive diversity by referencing other columns in your prompt template\n", + "- πŸ’Ύ **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk and stores paths\n", + "\n", + "Data Designer supports both **diffusion** (e.g. DALLΒ·E, Stable Diffusion, Imagen) and **autoregressive** (e.g. Gemini image, GPT image) models; the API is chosen automatically from the model name.\n", + "\n", + "If this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series.\n" + ] + }, + { + "cell_type": "markdown", + "id": "86f748c1", + "metadata": {}, + "source": [ + "### πŸ“¦ Import Data Designer\n", + "\n", + "- `data_designer.config` provides the configuration API.\n", + "- `DataDesigner` is the main interface for generation.\n" + ] + }, + { + "cell_type": "markdown", + "id": "c610ee22", + "metadata": {}, + "source": [ + "### ⚑ Colab Setup\n", + "\n", + "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "818ca495", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install -U data-designer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f165bb15", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "from google.colab import userdata\n", + "\n", + "try:\n", + " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n", + "except userdata.SecretNotFoundError:\n", + " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5decfc83", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image as IPImage\n", + "from IPython.display import display\n", + "\n", + "import data_designer.config as dd\n", + "from data_designer.interface import DataDesigner" + ] + }, + { + "cell_type": "markdown", + "id": "929f35d6", + "metadata": {}, + "source": [ + "### βš™οΈ Initialize the Data Designer interface\n", + "\n", + "When initialized without arguments, [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used. This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro image model; set `OPENROUTER_API_KEY` in your environment.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4c8b7d7", + "metadata": {}, + "outputs": [], + "source": [ + "data_designer = DataDesigner()" + ] + }, + { + "cell_type": "markdown", + "id": "8ed7b0b6", + "metadata": {}, + "source": [ + "### πŸŽ›οΈ Define an image-generation model\n", + "\n", + "- Use `ImageInferenceParams` so Data Designer treats this model as an image generator.\n", + "- Image options (size, quality, aspect ratio, etc.) are model-specific; pass them via `extra_body`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6b1ca66", + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_PROVIDER = \"openrouter\"\n", + "MODEL_ID = \"black-forest-labs/flux.2-pro\"\n", + "MODEL_ALIAS = \"image-model\"\n", + "\n", + "model_configs = [\n", + " dd.ModelConfig(\n", + " alias=MODEL_ALIAS,\n", + " model=MODEL_ID,\n", + " provider=MODEL_PROVIDER,\n", + " inference_parameters=dd.ImageInferenceParams(\n", + " extra_body={\"height\": 512, \"width\": 512},\n", + " ),\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "498cfecf", + "metadata": {}, + "source": [ + "### πŸ—οΈ Build the config: samplers + image column\n", + "\n", + "We'll generate diverse **dog portrait** images: sampler columns drive subject (breed), age, style, look direction, and emotion. The image-generation column uses a Jinja2 prompt that references all of them.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e74fc7ab", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"style\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\n", + " \"photorealistic\",\n", + " \"oil painting\",\n", + " \"watercolor\",\n", + " \"digital art\",\n", + " \"sketch\",\n", + " \"anime\",\n", + " ],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"dog_breed\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\n", + " \"a Golden Retriever\",\n", + " \"a German Shepherd\",\n", + " \"a Labrador Retriever\",\n", + " \"a Bulldog\",\n", + " \"a Beagle\",\n", + " \"a Poodle\",\n", + " \"a Corgi\",\n", + " \"a Siberian Husky\",\n", + " \"a Dalmatian\",\n", + " \"a Yorkshire Terrier\",\n", + " \"a Boxer\",\n", + " \"a Dachshund\",\n", + " \"a Doberman Pinscher\",\n", + " \"a Shih Tzu\",\n", + " \"a Chihuahua\",\n", + " \"a Border Collie\",\n", + " \"an Australian Shepherd\",\n", + " \"a Cocker Spaniel\",\n", + " \"a Maltese\",\n", + " \"a Pomeranian\",\n", + " \"a Saint Bernard\",\n", + " \"a Great Dane\",\n", + " \"an Akita\",\n", + " \"a Samoyed\",\n", + " \"a Boston Terrier\",\n", + " ],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"cat_breed\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\n", + " \"a Persian\",\n", + " \"a Maine Coon\",\n", + " \"a Siamese\",\n", + " \"a Ragdoll\",\n", + " \"a Bengal\",\n", + " \"an Abyssinian\",\n", + " \"a British Shorthair\",\n", + " \"a Sphynx\",\n", + " \"a Scottish Fold\",\n", + " \"a Russian Blue\",\n", + " \"a Birman\",\n", + " \"an Oriental Shorthair\",\n", + " \"a Norwegian Forest Cat\",\n", + " \"a Devon Rex\",\n", + " \"a Burmese\",\n", + " \"an Egyptian Mau\",\n", + " \"a Tonkinese\",\n", + " \"a Himalayan\",\n", + " \"a Savannah\",\n", + " \"a Chartreux\",\n", + " \"a Somali\",\n", + " \"a Manx\",\n", + " \"a Turkish Angora\",\n", + " \"a Balinese\",\n", + " \"an American Shorthair\",\n", + " ],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"dog_age\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"1-3\", \"3-6\", \"6-9\", \"9-12\", \"12-15\"],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"cat_age\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"1-3\", \"3-6\", \"6-9\", \"9-12\", \"12-18\"],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"dog_look_direction\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"left\", \"right\", \"front\", \"up\", \"down\"],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"cat_look_direction\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"left\", \"right\", \"front\", \"up\", \"down\"],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"dog_emotion\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"happy\", \"curious\", \"serious\", \"sleepy\", \"excited\"],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"cat_emotion\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"aloof\", \"curious\", \"content\", \"sleepy\", \"playful\"],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " dd.ImageColumnConfig(\n", + " name=\"generated_image\",\n", + " prompt=(\n", + " \"\"\"\n", + "A {{ style }} family pet portrait of a {{ dog_breed }} dog of {{ dog_age }} years old looking {{dog_look_direction}} with an {{ dog_emotion }} expression and\n", + "{{ cat_breed }} cat of {{ cat_age }} years old looking {{ cat_look_direction }} with an {{ cat_emotion }} expression in the background. Both subjects should be in focus.\n", + " \"\"\"\n", + " ),\n", + " model_alias=MODEL_ALIAS,\n", + " )\n", + ")\n", + "\n", + "data_designer.validate(config_builder)" + ] + }, + { + "cell_type": "markdown", + "id": "c592c820", + "metadata": {}, + "source": [ + "### πŸ” Preview: images as base64\n", + "\n", + "In **preview** mode, generated images are stored as base64 strings in the dataframe. Run the next cell to step through each record (images are shown in the sample record display, but only in a notebook environment).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eee17bb1", + "metadata": {}, + "outputs": [], + "source": [ + "preview = data_designer.preview(config_builder, num_records=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cd320cc", + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(preview.dataset)):\n", + " preview.display_sample_record()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffb5e188", + "metadata": {}, + "outputs": [], + "source": [ + "preview.dataset" + ] + }, + { + "cell_type": "markdown", + "id": "87b83328", + "metadata": {}, + "source": [ + "### πŸ†™ Create: images saved to disk\n", + "\n", + "In **create** mode, images are written to an `images/` folder with UUID filenames; the dataframe stores relative paths (e.g. `images/1d16b6e2-562f-4f51-91e5-baaa999ea916.png`).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8f9cc41", + "metadata": {}, + "outputs": [], + "source": [ + "results = data_designer.create(config_builder, num_records=5, dataset_name=\"tutorial-5-images\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d4453e5", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = results.load_dataset()\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "198301ab", + "metadata": {}, + "outputs": [], + "source": [ + "# Display all image from the created dataset. Paths are relative to the artifact output directory.\n", + "for index, row in dataset.iterrows():\n", + " path_or_list = row.get(\"generated_image\")\n", + " if path_or_list is not None:\n", + " for path in path_or_list:\n", + " base = results.artifact_storage.base_dataset_path\n", + " full_path = base / path\n", + " display(IPImage(data=full_path))" + ] + }, + { + "cell_type": "markdown", + "id": "2bdcef2b", + "metadata": {}, + "source": [ + "## ⏭️ Next steps\n", + "\n", + "- [The basics](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/): samplers and LLM text columns\n", + "- [Structured outputs and Jinja](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/2-structured-outputs-and-jinja-expressions/)\n", + "- [Seeding with a dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n", + "- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d11d049aad3474476a03a099c701f596c24c8ca2 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 19:39:10 -0700 Subject: [PATCH 39/64] move pillow to lib dep from notebook --- packages/data-designer-config/pyproject.toml | 1 + uv.lock | 2 ++ 2 files changed, 3 insertions(+) diff --git a/packages/data-designer-config/pyproject.toml b/packages/data-designer-config/pyproject.toml index 04af4adc..569c8fe0 100644 --- a/packages/data-designer-config/pyproject.toml +++ b/packages/data-designer-config/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "jinja2>=3.1.6,<4", "numpy>=1.23.5,<3", "pandas>=2.3.3,<3", + "pillow>=12.0.0,<13", "pyarrow>=19.0.1,<20", # Required for parquet I/O operations "pydantic[email]>=2.9.2,<3", "pygments>=2.19.2,<3", diff --git a/uv.lock b/uv.lock index b26a9385..6a4a8432 100644 --- a/uv.lock +++ b/uv.lock @@ -965,6 +965,7 @@ dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pandas" }, + { name = "pillow" }, { name = "pyarrow" }, { name = "pydantic", extra = ["email"] }, { name = "pygments" }, @@ -978,6 +979,7 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6,<4" }, { name = "numpy", specifier = ">=1.23.5,<3" }, { name = "pandas", specifier = ">=2.3.3,<3" }, + { name = "pillow", specifier = ">=12.0.0,<13" }, { name = "pyarrow", specifier = ">=19.0.1,<20" }, { name = "pydantic", extras = ["email"], specifier = ">=2.9.2,<3" }, { name = "pygments", specifier = ">=2.19.2,<3" }, From 511e1f26a3180579227d01598bdf7e90b2e07b26 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 19:41:49 -0700 Subject: [PATCH 40/64] update uv lock" --- pyproject.toml | 7 +++---- uv.lock | 37 ++++++++++++------------------------- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7b71536..988e6f04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,11 +42,11 @@ dev-dependencies = [ [dependency-groups] dev = [ "jsonpath-ng>=1.5.3,<2", - "pytest>=9.0.2,<10", - "pytest-asyncio>=1.3.0,<2", + "pytest>=8.3.3,<9", + "pytest-asyncio>=0.24.0,<1", "pytest-cov>=7.0.0,<8", "pytest-env>=1.2.0,<2", - "pytest-httpx>=0.36.0,<1", + "pytest-httpx>=0.35.0,<1", "pre-commit>=4.0.0,<5", ] docs = [ @@ -63,7 +63,6 @@ notebooks = [ "datasets>=4.0.0,<5", "ipykernel>=6.29.0,<7", "jupyter>=1.0.0,<2", - "pillow>=12.0.0,<13", ] recipes = [ "bm25s>=0.2.0,<1", diff --git a/uv.lock b/uv.lock index 6a4a8432..9a111de5 100644 --- a/uv.lock +++ b/uv.lock @@ -308,15 +308,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" }, ] -[[package]] -name = "backports-asyncio-runner" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, -] - [[package]] name = "backrefs" version = "6.1" @@ -1071,7 +1062,6 @@ notebooks = [ { name = "datasets" }, { name = "ipykernel" }, { name = "jupyter" }, - { name = "pillow" }, ] recipes = [ { name = "bm25s" }, @@ -1085,11 +1075,11 @@ requires-dist = [{ name = "matplotlib", specifier = ">=3.10.8" }] dev = [ { name = "jsonpath-ng", specifier = ">=1.5.3,<2" }, { name = "pre-commit", specifier = ">=4.0.0,<5" }, - { name = "pytest", specifier = ">=9.0.2,<10" }, - { name = "pytest-asyncio", specifier = ">=1.3.0,<2" }, + { name = "pytest", specifier = ">=8.3.3,<9" }, + { name = "pytest-asyncio", specifier = ">=0.24.0,<1" }, { name = "pytest-cov", specifier = ">=7.0.0,<8" }, { name = "pytest-env", specifier = ">=1.2.0,<2" }, - { name = "pytest-httpx", specifier = ">=0.36.0,<1" }, + { name = "pytest-httpx", specifier = ">=0.35.0,<1" }, { name = "ruff", specifier = ">=0.14.10,<1" }, ] docs = [ @@ -1106,7 +1096,6 @@ notebooks = [ { name = "datasets", specifier = ">=4.0.0,<5" }, { name = "ipykernel", specifier = ">=6.29.0,<7" }, { name = "jupyter", specifier = ">=1.0.0,<2" }, - { name = "pillow", specifier = ">=12.0.0,<13" }, ] recipes = [ { name = "bm25s", specifier = ">=0.2.0,<1" }, @@ -4407,7 +4396,7 @@ wheels = [ [[package]] name = "pytest" -version = "9.0.2" +version = "8.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -4418,23 +4407,21 @@ dependencies = [ { name = "pygments" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, ] [[package]] name = "pytest-asyncio" -version = "1.3.0" +version = "0.26.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" }, { name = "pytest" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/c4/453c52c659521066969523e87d85d54139bbd17b78f09532fb8eb8cdb58e/pytest_asyncio-0.26.0.tar.gz", hash = "sha256:c4df2a697648241ff39e7f0e4a73050b03f123f760673956cf0d72a4990e312f", size = 54156, upload-time = "2025-03-25T06:22:28.883Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, + { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694, upload-time = "2025-03-25T06:22:27.807Z" }, ] [[package]] @@ -4466,15 +4453,15 @@ wheels = [ [[package]] name = "pytest-httpx" -version = "0.36.0" +version = "0.35.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/5574834da9499066fa1a5ea9c336f94dba2eae02298d36dab192fcf95c86/pytest_httpx-0.36.0.tar.gz", hash = "sha256:9edb66a5fd4388ce3c343189bc67e7e1cb50b07c2e3fc83b97d511975e8a831b", size = 56793, upload-time = "2025-12-02T16:34:57.414Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1f/89/5b12b7b29e3d0af3a4b9c071ee92fa25a9017453731a38f08ba01c280f4c/pytest_httpx-0.35.0.tar.gz", hash = "sha256:d619ad5d2e67734abfbb224c3d9025d64795d4b8711116b1a13f72a251ae511f", size = 54146, upload-time = "2024-11-28T19:16:54.237Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e2/d2/1eb1ea9c84f0d2033eb0b49675afdc71aa4ea801b74615f00f3c33b725e3/pytest_httpx-0.36.0-py3-none-any.whl", hash = "sha256:bd4c120bb80e142df856e825ec9f17981effb84d159f9fa29ed97e2357c3a9c8", size = 20229, upload-time = "2025-12-02T16:34:56.45Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ed/026d467c1853dd83102411a78126b4842618e86c895f93528b0528c7a620/pytest_httpx-0.35.0-py3-none-any.whl", hash = "sha256:ee11a00ffcea94a5cbff47af2114d34c5b231c326902458deed73f9c459fd744", size = 19442, upload-time = "2024-11-28T19:16:52.787Z" }, ] [[package]] From 2b22df8517fd225a462608b19b568a7561968e92 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:00:24 -0700 Subject: [PATCH 41/64] remove legacy flag from display_sample_record --- .../data_designer/config/utils/visualization.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 9d65cca5..2132b83b 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -47,13 +47,12 @@ console = Console() -def _display_image_if_in_notebook(image_data: str, col_name: str, base_path: str | None = None) -> bool: +def _display_image_if_in_notebook(image_data: str, col_name: str) -> bool: """Display image with caption in Jupyter notebook if available. Args: image_data: Base64-encoded image data, data URI, or file path. col_name: Name of the column (used for caption). - base_path: Optional base path to resolve relative image paths. Returns: True if image was displayed, False otherwise. @@ -66,7 +65,7 @@ def _display_image_if_in_notebook(image_data: str, col_name: str, base_path: str # Check if it's a file path and load it if is_image_path(image_data) and not image_data.startswith("data:image/"): - loaded_base64 = load_image_path_to_base64(image_data, base_path) + loaded_base64 = load_image_path_to_base64(image_data) if loaded_base64 is None: console.print( f"[yellow]⚠️ Could not load image from path '{image_data}' for column '{col_name}'[/yellow]" @@ -191,11 +190,6 @@ def display_sample_record( None if hide_seed_columns or self.dataset_metadata is None else self.dataset_metadata.seed_column_names ) - # Try to get base path from artifact storage if available - base_path = None - if hasattr(self, "artifact_storage") and self.artifact_storage is not None: - base_path = str(self.artifact_storage.base_dataset_path) - display_sample_record( record=record, processor_data_to_display=processor_data_to_display, @@ -204,7 +198,6 @@ def display_sample_record( syntax_highlighting_theme=syntax_highlighting_theme, record_index=i, seed_column_names=seed_column_names, - base_path=base_path, ) if index is None: self._display_cycle_index = (self._display_cycle_index + 1) % num_records @@ -238,7 +231,6 @@ def display_sample_record( syntax_highlighting_theme: str = "dracula", record_index: int | None = None, seed_column_names: list[str] | None = None, - base_path: str | None = None, ): if isinstance(record, (dict, pd.Series)): record = pd.DataFrame([record]).iloc[0] @@ -420,7 +412,7 @@ def display_sample_record( # Display images at the bottom with captions (only in notebook) if len(images_to_display_later) > 0: for col_name, image_data in images_to_display_later: - _display_image_if_in_notebook(image_data, col_name, base_path=base_path) + _display_image_if_in_notebook(image_data, col_name) def get_truncated_list_as_string(long_list: list[Any], max_items: int = 2) -> str: From 92395447905b869370caa23c9c7732ac2823c6f2 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:02:40 -0700 Subject: [PATCH 42/64] remove unnecessary override of generate kwargs --- .../data-designer-config/src/data_designer/config/models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 0542a8b8..51954a74 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -453,10 +453,6 @@ class ImageInferenceParams(BaseInferenceParams): generation_type: Literal[GenerationType.IMAGE] = GenerationType.IMAGE - @property - def generate_kwargs(self) -> dict[str, Any]: - return super().generate_kwargs - InferenceParamsT: TypeAlias = Annotated[ ChatCompletionInferenceParams | EmbeddingInferenceParams | ImageInferenceParams, From 3a779aaabc8ae4cc75fba1c4db310c6ce8c60042 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:10:23 -0700 Subject: [PATCH 43/64] Restore some changes not needed --- .../dataset_builders/column_wise_builder.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index 7bd578d0..9077e807 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Callable from data_designer.config.column_configs import CustomColumnConfig -from data_designer.config.column_types import ColumnConfigT +from data_designer.config.column_types import ColumnConfigT, DataDesignerColumnType from data_designer.config.config_builder import BuilderConfig from data_designer.config.data_designer_config import DataDesignerConfig from data_designer.config.dataset_builders import BuildStage @@ -170,21 +170,15 @@ def process_preview(self, dataset: pd.DataFrame) -> pd.DataFrame: def _has_image_columns(self) -> bool: """Check if config has any image generation columns.""" - from data_designer.config.column_types import DataDesignerColumnType - return any(col.column_type == DataDesignerColumnType.IMAGE for col in self.single_column_configs) def _initialize_generators(self) -> list[ColumnGenerator]: - """Initialize column generators. - - Generators access media storage via ResourceProvider.artifact_storage.media_storage - """ - generators = [] - for config in self._column_configs: - generator_cls = self._registry.column_generators.get_for_config_type(type(config)) - generator = generator_cls(config=config, resource_provider=self._resource_provider) - generators.append(generator) - return generators + return [ + self._registry.column_generators.get_for_config_type(type(config))( + config=config, resource_provider=self._resource_provider + ) + for config in self._column_configs + ] def _write_builder_config(self) -> None: self.artifact_storage.mkdir_if_needed(self.artifact_storage.base_dataset_path) From 33b6cd9bf942ec3b6b9fcf365a4297b3f5ea1138 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:16:02 -0700 Subject: [PATCH 44/64] use a specific image generation exception instead of generic ModelAPIError --- .../src/data_designer/engine/models/errors.py | 3 ++ .../src/data_designer/engine/models/facade.py | 10 ++--- .../tests/engine/models/test_facade.py | 45 ++++++++++++++++++- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/models/errors.py b/packages/data-designer-engine/src/data_designer/engine/models/errors.py index 3e1ddf01..8ca1ebfd 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/errors.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/errors.py @@ -83,6 +83,9 @@ class ModelStructuredOutputError(DataDesignerError): ... class ModelGenerationValidationFailureError(DataDesignerError): ... +class ImageGenerationError(DataDesignerError): ... + + class FormattedLLMErrorMessage(BaseModel): cause: str solution: str diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index a14231ab..447ad87b 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -18,7 +18,7 @@ from data_designer.engine.model_provider import ModelProviderRegistry from data_designer.engine.models.errors import ( GenerationValidationFailureError, - ModelAPIError, + ImageGenerationError, catch_llm_exceptions, get_exception_primary_cause, ) @@ -340,7 +340,7 @@ def generate_image( List of base64-encoded image strings (without data URI prefix) Raises: - ModelAPIError: If image generation fails or returns invalid data + ImageGenerationError: If image generation fails or returns invalid data """ logger.debug( f"Generating image with model {self.model_name!r}...", @@ -406,7 +406,7 @@ def _generate_image_chat_completion( # Validate response structure if not response.choices or len(response.choices) == 0: - raise ModelAPIError("Response missing choices") + raise ImageGenerationError("Image generation response missing choices") message = response.choices[0].message images = [] @@ -437,7 +437,7 @@ def _generate_image_chat_completion( images.append(b64) if not images: - raise ModelAPIError("No image data found in response") + raise ImageGenerationError("No image data found in response") return images @@ -469,7 +469,7 @@ def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = Fal # Validate response if not response.data or len(response.data) == 0: - raise ModelAPIError("Image generation returned no data") + raise ImageGenerationError("Image generation returned no data") # Return all images as list return [img.b64_json for img in response.data] diff --git a/packages/data-designer-engine/tests/engine/models/test_facade.py b/packages/data-designer-engine/tests/engine/models/test_facade.py index 0323ce98..b80a22e7 100644 --- a/packages/data-designer-engine/tests/engine/models/test_facade.py +++ b/packages/data-designer-engine/tests/engine/models/test_facade.py @@ -9,7 +9,7 @@ import pytest from data_designer.engine.mcp.errors import MCPConfigurationError, MCPToolError -from data_designer.engine.models.errors import ModelGenerationValidationFailureError +from data_designer.engine.models.errors import ImageGenerationError, ModelGenerationValidationFailureError from data_designer.engine.models.facade import ModelFacade from data_designer.engine.models.parsers.errors import ParserException from data_designer.engine.models.utils import ChatMessage @@ -1106,6 +1106,49 @@ def test_generate_image_skip_usage_tracking( assert stub_model_facade.usage_stats.image_usage.has_usage is False +@patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True) +def test_generate_image_chat_completion_no_choices( + mock_completion: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image raises ImageGenerationError when response has no choices.""" + mock_response = litellm.types.utils.ModelResponse(choices=[]) + mock_completion.return_value = mock_response + + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=False): + with pytest.raises(ImageGenerationError, match="Image generation response missing choices"): + stub_model_facade.generate_image(prompt="test prompt") + + +@patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True) +def test_generate_image_chat_completion_no_image_data( + mock_completion: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image raises ImageGenerationError when no image data in response.""" + mock_message = litellm.types.utils.Message(role="assistant", content="just text, no image") + mock_response = litellm.types.utils.ModelResponse(choices=[litellm.types.utils.Choices(message=mock_message)]) + mock_completion.return_value = mock_response + + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=False): + with pytest.raises(ImageGenerationError, match="No image data found in response"): + stub_model_facade.generate_image(prompt="test prompt") + + +@patch("data_designer.engine.models.facade.CustomRouter.image_generation", autospec=True) +def test_generate_image_diffusion_no_data( + mock_image_generation: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image raises ImageGenerationError when diffusion API returns no data.""" + mock_response = litellm.types.utils.ImageResponse(data=[]) + mock_image_generation.return_value = mock_response + + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=True): + with pytest.raises(ImageGenerationError, match="Image generation returned no data"): + stub_model_facade.generate_image(prompt="test prompt") + + @patch("data_designer.engine.models.facade.CustomRouter.image_generation", autospec=True) def test_generate_image_accumulates_usage( mock_image_generation: Any, From 3a98cafc0e9e08b1dd310d167054b062f20a7802 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:28:17 -0700 Subject: [PATCH 45/64] more cleanup --- .../src/data_designer/engine/models/facade.py | 2 +- .../engine/storage/media_storage.py | 10 - .../tests/engine/models/test_facade.py | 2 +- .../engine/storage/test_media_storage.py | 12 - .../integrations/huggingface/client.py | 4 +- pyproject.toml | 3 - uv.lock | 410 ------------------ 7 files changed, 4 insertions(+), 439 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index 447ad87b..e637d9f4 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -437,7 +437,7 @@ def _generate_image_chat_completion( images.append(b64) if not images: - raise ImageGenerationError("No image data found in response") + raise ImageGenerationError("No image data found in image generation response") return images diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py index 9adefc89..3726b7f7 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py @@ -3,7 +3,6 @@ from __future__ import annotations -import shutil import uuid from pathlib import Path @@ -30,10 +29,6 @@ class MediaStorage: Currently supports: - Images (PNG, JPG, WEBP) - Future support planned for: - - Audio - - Video - Storage modes: - DISK: Save media to disk and return relative paths (for dataset creation) - DATAFRAME: Return base64 data directly (for preview mode) @@ -130,8 +125,3 @@ def _validate_image(self, image_path: Path) -> None: # Clean up invalid file image_path.unlink(missing_ok=True) raise - - def cleanup(self) -> None: - """Clean up image directory (for preview mode).""" - if self.images_dir.exists(): - shutil.rmtree(self.images_dir) diff --git a/packages/data-designer-engine/tests/engine/models/test_facade.py b/packages/data-designer-engine/tests/engine/models/test_facade.py index b80a22e7..65c66896 100644 --- a/packages/data-designer-engine/tests/engine/models/test_facade.py +++ b/packages/data-designer-engine/tests/engine/models/test_facade.py @@ -1131,7 +1131,7 @@ def test_generate_image_chat_completion_no_image_data( mock_completion.return_value = mock_response with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=False): - with pytest.raises(ImageGenerationError, match="No image data found in response"): + with pytest.raises(ImageGenerationError, match="No image data found in image generation response"): stub_model_facade.generate_image(prompt="test prompt") diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py index 105348d2..3648486d 100644 --- a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -164,18 +164,6 @@ def test_save_base64_image_dataframe_mode_returns_base64(tmp_path, sample_base64 assert not storage.images_dir.exists() -def test_cleanup(media_storage, sample_base64_png): - """Test cleanup removes images directory.""" - # Save an image first - media_storage.save_base64_image(sample_base64_png, subfolder_name="test_column") - assert media_storage.images_dir.exists() - assert len(list(media_storage.images_dir.iterdir())) > 0 - - # Cleanup should remove directory - media_storage.cleanup() - assert not media_storage.images_dir.exists() - - def test_save_base64_image_with_subfolder_name(media_storage, sample_base64_png): """Test saving image with subfolder name organizes into subdirectory.""" subfolder = "test_subfolder" diff --git a/packages/data-designer/src/data_designer/integrations/huggingface/client.py b/packages/data-designer/src/data_designer/integrations/huggingface/client.py index 2e84ee3c..1d0a0f0e 100644 --- a/packages/data-designer/src/data_designer/integrations/huggingface/client.py +++ b/packages/data-designer/src/data_designer/integrations/huggingface/client.py @@ -193,11 +193,11 @@ def _upload_images_folder(self, repo_id: str, images_folder: Path) -> None: if not images_folder.exists(): return - image_files = list(images_folder.glob("*")) + image_files = list(images_folder.rglob("*.*")) if not image_files: return - logger.info(f" |-- {RandomEmoji.loading()} Uploading {len(image_files)} images...") + logger.info(f" |-- {RandomEmoji.loading()} Uploading {len(image_files)} image files...") try: self._api.upload_folder( diff --git a/pyproject.toml b/pyproject.toml index 988e6f04..35566648 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,9 +14,6 @@ name = "data-designer-workspace" version = "0.0.0" # Placeholder, never used since package = false description = "DataDesigner monorepo workspace" requires-python = ">=3.10" -dependencies = [ - "matplotlib>=3.10.8", -] [build-system] requires = ["hatchling"] diff --git a/uv.lock b/uv.lock index 9a111de5..17306f0e 100644 --- a/uv.lock +++ b/uv.lock @@ -595,163 +595,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl", hash = "sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417", size = 7294, upload-time = "2025-07-25T14:02:02.896Z" }, ] -[[package]] -name = "contourpy" -version = "1.3.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11'", -] -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/12/a3/da4153ec8fe25d263aa48c1a4cbde7f49b59af86f0b6f7862788c60da737/contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934", size = 268551, upload-time = "2025-04-15T17:34:46.581Z" }, - { url = "https://files.pythonhosted.org/packages/2f/6c/330de89ae1087eb622bfca0177d32a7ece50c3ef07b28002de4757d9d875/contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989", size = 253399, upload-time = "2025-04-15T17:34:51.427Z" }, - { url = "https://files.pythonhosted.org/packages/c1/bd/20c6726b1b7f81a8bee5271bed5c165f0a8e1f572578a9d27e2ccb763cb2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9be002b31c558d1ddf1b9b415b162c603405414bacd6932d031c5b5a8b757f0d", size = 312061, upload-time = "2025-04-15T17:34:55.961Z" }, - { url = "https://files.pythonhosted.org/packages/22/fc/a9665c88f8a2473f823cf1ec601de9e5375050f1958cbb356cdf06ef1ab6/contourpy-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2e74acbcba3bfdb6d9d8384cdc4f9260cae86ed9beee8bd5f54fee49a430b9", size = 351956, upload-time = "2025-04-15T17:35:00.992Z" }, - { url = "https://files.pythonhosted.org/packages/25/eb/9f0a0238f305ad8fb7ef42481020d6e20cf15e46be99a1fcf939546a177e/contourpy-1.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e259bced5549ac64410162adc973c5e2fb77f04df4a439d00b478e57a0e65512", size = 320872, upload-time = "2025-04-15T17:35:06.177Z" }, - { url = "https://files.pythonhosted.org/packages/32/5c/1ee32d1c7956923202f00cf8d2a14a62ed7517bdc0ee1e55301227fc273c/contourpy-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad687a04bc802cbe8b9c399c07162a3c35e227e2daccf1668eb1f278cb698631", size = 325027, upload-time = "2025-04-15T17:35:11.244Z" }, - { url = "https://files.pythonhosted.org/packages/83/bf/9baed89785ba743ef329c2b07fd0611d12bfecbedbdd3eeecf929d8d3b52/contourpy-1.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cdd22595308f53ef2f891040ab2b93d79192513ffccbd7fe19be7aa773a5e09f", size = 1306641, upload-time = "2025-04-15T17:35:26.701Z" }, - { url = "https://files.pythonhosted.org/packages/d4/cc/74e5e83d1e35de2d28bd97033426b450bc4fd96e092a1f7a63dc7369b55d/contourpy-1.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4f54d6a2defe9f257327b0f243612dd051cc43825587520b1bf74a31e2f6ef2", size = 1374075, upload-time = "2025-04-15T17:35:43.204Z" }, - { url = "https://files.pythonhosted.org/packages/0c/42/17f3b798fd5e033b46a16f8d9fcb39f1aba051307f5ebf441bad1ecf78f8/contourpy-1.3.2-cp310-cp310-win32.whl", hash = "sha256:f939a054192ddc596e031e50bb13b657ce318cf13d264f095ce9db7dc6ae81c0", size = 177534, upload-time = "2025-04-15T17:35:46.554Z" }, - { url = "https://files.pythonhosted.org/packages/54/ec/5162b8582f2c994721018d0c9ece9dc6ff769d298a8ac6b6a652c307e7df/contourpy-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c440093bbc8fc21c637c03bafcbef95ccd963bc6e0514ad887932c18ca2a759a", size = 221188, upload-time = "2025-04-15T17:35:50.064Z" }, - { url = "https://files.pythonhosted.org/packages/b3/b9/ede788a0b56fc5b071639d06c33cb893f68b1178938f3425debebe2dab78/contourpy-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a37a2fb93d4df3fc4c0e363ea4d16f83195fc09c891bc8ce072b9d084853445", size = 269636, upload-time = "2025-04-15T17:35:54.473Z" }, - { url = "https://files.pythonhosted.org/packages/e6/75/3469f011d64b8bbfa04f709bfc23e1dd71be54d05b1b083be9f5b22750d1/contourpy-1.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7cd50c38f500bbcc9b6a46643a40e0913673f869315d8e70de0438817cb7773", size = 254636, upload-time = "2025-04-15T17:35:58.283Z" }, - { url = "https://files.pythonhosted.org/packages/8d/2f/95adb8dae08ce0ebca4fd8e7ad653159565d9739128b2d5977806656fcd2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6658ccc7251a4433eebd89ed2672c2ed96fba367fd25ca9512aa92a4b46c4f1", size = 313053, upload-time = "2025-04-15T17:36:03.235Z" }, - { url = "https://files.pythonhosted.org/packages/c3/a6/8ccf97a50f31adfa36917707fe39c9a0cbc24b3bbb58185577f119736cc9/contourpy-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:70771a461aaeb335df14deb6c97439973d253ae70660ca085eec25241137ef43", size = 352985, upload-time = "2025-04-15T17:36:08.275Z" }, - { url = "https://files.pythonhosted.org/packages/1d/b6/7925ab9b77386143f39d9c3243fdd101621b4532eb126743201160ffa7e6/contourpy-1.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65a887a6e8c4cd0897507d814b14c54a8c2e2aa4ac9f7686292f9769fcf9a6ab", size = 323750, upload-time = "2025-04-15T17:36:13.29Z" }, - { url = "https://files.pythonhosted.org/packages/c2/f3/20c5d1ef4f4748e52d60771b8560cf00b69d5c6368b5c2e9311bcfa2a08b/contourpy-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3859783aefa2b8355697f16642695a5b9792e7a46ab86da1118a4a23a51a33d7", size = 326246, upload-time = "2025-04-15T17:36:18.329Z" }, - { url = "https://files.pythonhosted.org/packages/8c/e5/9dae809e7e0b2d9d70c52b3d24cba134dd3dad979eb3e5e71f5df22ed1f5/contourpy-1.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eab0f6db315fa4d70f1d8ab514e527f0366ec021ff853d7ed6a2d33605cf4b83", size = 1308728, upload-time = "2025-04-15T17:36:33.878Z" }, - { url = "https://files.pythonhosted.org/packages/e2/4a/0058ba34aeea35c0b442ae61a4f4d4ca84d6df8f91309bc2d43bb8dd248f/contourpy-1.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d91a3ccc7fea94ca0acab82ceb77f396d50a1f67412efe4c526f5d20264e6ecd", size = 1375762, upload-time = "2025-04-15T17:36:51.295Z" }, - { url = "https://files.pythonhosted.org/packages/09/33/7174bdfc8b7767ef2c08ed81244762d93d5c579336fc0b51ca57b33d1b80/contourpy-1.3.2-cp311-cp311-win32.whl", hash = "sha256:1c48188778d4d2f3d48e4643fb15d8608b1d01e4b4d6b0548d9b336c28fc9b6f", size = 178196, upload-time = "2025-04-15T17:36:55.002Z" }, - { url = "https://files.pythonhosted.org/packages/5e/fe/4029038b4e1c4485cef18e480b0e2cd2d755448bb071eb9977caac80b77b/contourpy-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:5ebac872ba09cb8f2131c46b8739a7ff71de28a24c869bcad554477eb089a878", size = 222017, upload-time = "2025-04-15T17:36:58.576Z" }, - { url = "https://files.pythonhosted.org/packages/34/f7/44785876384eff370c251d58fd65f6ad7f39adce4a093c934d4a67a7c6b6/contourpy-1.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4caf2bcd2969402bf77edc4cb6034c7dd7c0803213b3523f111eb7460a51b8d2", size = 271580, upload-time = "2025-04-15T17:37:03.105Z" }, - { url = "https://files.pythonhosted.org/packages/93/3b/0004767622a9826ea3d95f0e9d98cd8729015768075d61f9fea8eeca42a8/contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82199cb78276249796419fe36b7386bd8d2cc3f28b3bc19fe2454fe2e26c4c15", size = 255530, upload-time = "2025-04-15T17:37:07.026Z" }, - { url = "https://files.pythonhosted.org/packages/e7/bb/7bd49e1f4fa805772d9fd130e0d375554ebc771ed7172f48dfcd4ca61549/contourpy-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:106fab697af11456fcba3e352ad50effe493a90f893fca6c2ca5c033820cea92", size = 307688, upload-time = "2025-04-15T17:37:11.481Z" }, - { url = "https://files.pythonhosted.org/packages/fc/97/e1d5dbbfa170725ef78357a9a0edc996b09ae4af170927ba8ce977e60a5f/contourpy-1.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d14f12932a8d620e307f715857107b1d1845cc44fdb5da2bc8e850f5ceba9f87", size = 347331, upload-time = "2025-04-15T17:37:18.212Z" }, - { url = "https://files.pythonhosted.org/packages/6f/66/e69e6e904f5ecf6901be3dd16e7e54d41b6ec6ae3405a535286d4418ffb4/contourpy-1.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:532fd26e715560721bb0d5fc7610fce279b3699b018600ab999d1be895b09415", size = 318963, upload-time = "2025-04-15T17:37:22.76Z" }, - { url = "https://files.pythonhosted.org/packages/a8/32/b8a1c8965e4f72482ff2d1ac2cd670ce0b542f203c8e1d34e7c3e6925da7/contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b383144cf2d2c29f01a1e8170f50dacf0eac02d64139dcd709a8ac4eb3cfe", size = 323681, upload-time = "2025-04-15T17:37:33.001Z" }, - { url = "https://files.pythonhosted.org/packages/30/c6/12a7e6811d08757c7162a541ca4c5c6a34c0f4e98ef2b338791093518e40/contourpy-1.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c49f73e61f1f774650a55d221803b101d966ca0c5a2d6d5e4320ec3997489441", size = 1308674, upload-time = "2025-04-15T17:37:48.64Z" }, - { url = "https://files.pythonhosted.org/packages/2a/8a/bebe5a3f68b484d3a2b8ffaf84704b3e343ef1addea528132ef148e22b3b/contourpy-1.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d80b2c0300583228ac98d0a927a1ba6a2ba6b8a742463c564f1d419ee5b211e", size = 1380480, upload-time = "2025-04-15T17:38:06.7Z" }, - { url = "https://files.pythonhosted.org/packages/34/db/fcd325f19b5978fb509a7d55e06d99f5f856294c1991097534360b307cf1/contourpy-1.3.2-cp312-cp312-win32.whl", hash = "sha256:90df94c89a91b7362e1142cbee7568f86514412ab8a2c0d0fca72d7e91b62912", size = 178489, upload-time = "2025-04-15T17:38:10.338Z" }, - { url = "https://files.pythonhosted.org/packages/01/c8/fadd0b92ffa7b5eb5949bf340a63a4a496a6930a6c37a7ba0f12acb076d6/contourpy-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c942a01d9163e2e5cfb05cb66110121b8d07ad438a17f9e766317bcb62abf73", size = 223042, upload-time = "2025-04-15T17:38:14.239Z" }, - { url = "https://files.pythonhosted.org/packages/2e/61/5673f7e364b31e4e7ef6f61a4b5121c5f170f941895912f773d95270f3a2/contourpy-1.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:de39db2604ae755316cb5967728f4bea92685884b1e767b7c24e983ef5f771cb", size = 271630, upload-time = "2025-04-15T17:38:19.142Z" }, - { url = "https://files.pythonhosted.org/packages/ff/66/a40badddd1223822c95798c55292844b7e871e50f6bfd9f158cb25e0bd39/contourpy-1.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f9e896f447c5c8618f1edb2bafa9a4030f22a575ec418ad70611450720b5b08", size = 255670, upload-time = "2025-04-15T17:38:23.688Z" }, - { url = "https://files.pythonhosted.org/packages/1e/c7/cf9fdee8200805c9bc3b148f49cb9482a4e3ea2719e772602a425c9b09f8/contourpy-1.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71e2bd4a1c4188f5c2b8d274da78faab884b59df20df63c34f74aa1813c4427c", size = 306694, upload-time = "2025-04-15T17:38:28.238Z" }, - { url = "https://files.pythonhosted.org/packages/dd/e7/ccb9bec80e1ba121efbffad7f38021021cda5be87532ec16fd96533bb2e0/contourpy-1.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de425af81b6cea33101ae95ece1f696af39446db9682a0b56daaa48cfc29f38f", size = 345986, upload-time = "2025-04-15T17:38:33.502Z" }, - { url = "https://files.pythonhosted.org/packages/dc/49/ca13bb2da90391fa4219fdb23b078d6065ada886658ac7818e5441448b78/contourpy-1.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:977e98a0e0480d3fe292246417239d2d45435904afd6d7332d8455981c408b85", size = 318060, upload-time = "2025-04-15T17:38:38.672Z" }, - { url = "https://files.pythonhosted.org/packages/c8/65/5245ce8c548a8422236c13ffcdcdada6a2a812c361e9e0c70548bb40b661/contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:434f0adf84911c924519d2b08fc10491dd282b20bdd3fa8f60fd816ea0b48841", size = 322747, upload-time = "2025-04-15T17:38:43.712Z" }, - { url = "https://files.pythonhosted.org/packages/72/30/669b8eb48e0a01c660ead3752a25b44fdb2e5ebc13a55782f639170772f9/contourpy-1.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c66c4906cdbc50e9cba65978823e6e00b45682eb09adbb78c9775b74eb222422", size = 1308895, upload-time = "2025-04-15T17:39:00.224Z" }, - { url = "https://files.pythonhosted.org/packages/05/5a/b569f4250decee6e8d54498be7bdf29021a4c256e77fe8138c8319ef8eb3/contourpy-1.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b7fc0cd78ba2f4695fd0a6ad81a19e7e3ab825c31b577f384aa9d7817dc3bef", size = 1379098, upload-time = "2025-04-15T17:43:29.649Z" }, - { url = "https://files.pythonhosted.org/packages/19/ba/b227c3886d120e60e41b28740ac3617b2f2b971b9f601c835661194579f1/contourpy-1.3.2-cp313-cp313-win32.whl", hash = "sha256:15ce6ab60957ca74cff444fe66d9045c1fd3e92c8936894ebd1f3eef2fff075f", size = 178535, upload-time = "2025-04-15T17:44:44.532Z" }, - { url = "https://files.pythonhosted.org/packages/12/6e/2fed56cd47ca739b43e892707ae9a13790a486a3173be063681ca67d2262/contourpy-1.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e1578f7eafce927b168752ed7e22646dad6cd9bca673c60bff55889fa236ebf9", size = 223096, upload-time = "2025-04-15T17:44:48.194Z" }, - { url = "https://files.pythonhosted.org/packages/54/4c/e76fe2a03014a7c767d79ea35c86a747e9325537a8b7627e0e5b3ba266b4/contourpy-1.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0475b1f6604896bc7c53bb070e355e9321e1bc0d381735421a2d2068ec56531f", size = 285090, upload-time = "2025-04-15T17:43:34.084Z" }, - { url = "https://files.pythonhosted.org/packages/7b/e2/5aba47debd55d668e00baf9651b721e7733975dc9fc27264a62b0dd26eb8/contourpy-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c85bb486e9be652314bb5b9e2e3b0d1b2e643d5eec4992c0fbe8ac71775da739", size = 268643, upload-time = "2025-04-15T17:43:38.626Z" }, - { url = "https://files.pythonhosted.org/packages/a1/37/cd45f1f051fe6230f751cc5cdd2728bb3a203f5619510ef11e732109593c/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:745b57db7758f3ffc05a10254edd3182a2a83402a89c00957a8e8a22f5582823", size = 310443, upload-time = "2025-04-15T17:43:44.522Z" }, - { url = "https://files.pythonhosted.org/packages/8b/a2/36ea6140c306c9ff6dd38e3bcec80b3b018474ef4d17eb68ceecd26675f4/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:970e9173dbd7eba9b4e01aab19215a48ee5dd3f43cef736eebde064a171f89a5", size = 349865, upload-time = "2025-04-15T17:43:49.545Z" }, - { url = "https://files.pythonhosted.org/packages/95/b7/2fc76bc539693180488f7b6cc518da7acbbb9e3b931fd9280504128bf956/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c4639a9c22230276b7bffb6a850dfc8258a2521305e1faefe804d006b2e532", size = 321162, upload-time = "2025-04-15T17:43:54.203Z" }, - { url = "https://files.pythonhosted.org/packages/f4/10/76d4f778458b0aa83f96e59d65ece72a060bacb20cfbee46cf6cd5ceba41/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc829960f34ba36aad4302e78eabf3ef16a3a100863f0d4eeddf30e8a485a03b", size = 327355, upload-time = "2025-04-15T17:44:01.025Z" }, - { url = "https://files.pythonhosted.org/packages/43/a3/10cf483ea683f9f8ab096c24bad3cce20e0d1dd9a4baa0e2093c1c962d9d/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d32530b534e986374fc19eaa77fcb87e8a99e5431499949b828312bdcd20ac52", size = 1307935, upload-time = "2025-04-15T17:44:17.322Z" }, - { url = "https://files.pythonhosted.org/packages/78/73/69dd9a024444489e22d86108e7b913f3528f56cfc312b5c5727a44188471/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e298e7e70cf4eb179cc1077be1c725b5fd131ebc81181bf0c03525c8abc297fd", size = 1372168, upload-time = "2025-04-15T17:44:33.43Z" }, - { url = "https://files.pythonhosted.org/packages/0f/1b/96d586ccf1b1a9d2004dd519b25fbf104a11589abfd05484ff12199cca21/contourpy-1.3.2-cp313-cp313t-win32.whl", hash = "sha256:d0e589ae0d55204991450bb5c23f571c64fe43adaa53f93fc902a84c96f52fe1", size = 189550, upload-time = "2025-04-15T17:44:37.092Z" }, - { url = "https://files.pythonhosted.org/packages/b0/e6/6000d0094e8a5e32ad62591c8609e269febb6e4db83a1c75ff8868b42731/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69", size = 238214, upload-time = "2025-04-15T17:44:40.827Z" }, - { url = "https://files.pythonhosted.org/packages/33/05/b26e3c6ecc05f349ee0013f0bb850a761016d89cec528a98193a48c34033/contourpy-1.3.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fd93cc7f3139b6dd7aab2f26a90dde0aa9fc264dbf70f6740d498a70b860b82c", size = 265681, upload-time = "2025-04-15T17:44:59.314Z" }, - { url = "https://files.pythonhosted.org/packages/2b/25/ac07d6ad12affa7d1ffed11b77417d0a6308170f44ff20fa1d5aa6333f03/contourpy-1.3.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:107ba8a6a7eec58bb475329e6d3b95deba9440667c4d62b9b6063942b61d7f16", size = 315101, upload-time = "2025-04-15T17:45:04.165Z" }, - { url = "https://files.pythonhosted.org/packages/8f/4d/5bb3192bbe9d3f27e3061a6a8e7733c9120e203cb8515767d30973f71030/contourpy-1.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ded1706ed0c1049224531b81128efbd5084598f18d8a2d9efae833edbd2b40ad", size = 220599, upload-time = "2025-04-15T17:45:08.456Z" }, - { url = "https://files.pythonhosted.org/packages/ff/c0/91f1215d0d9f9f343e4773ba6c9b89e8c0cc7a64a6263f21139da639d848/contourpy-1.3.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5f5964cdad279256c084b69c3f412b7801e15356b16efa9d78aa974041903da0", size = 266807, upload-time = "2025-04-15T17:45:15.535Z" }, - { url = "https://files.pythonhosted.org/packages/d4/79/6be7e90c955c0487e7712660d6cead01fa17bff98e0ea275737cc2bc8e71/contourpy-1.3.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49b65a95d642d4efa8f64ba12558fcb83407e58a2dfba9d796d77b63ccfcaff5", size = 318729, upload-time = "2025-04-15T17:45:20.166Z" }, - { url = "https://files.pythonhosted.org/packages/87/68/7f46fb537958e87427d98a4074bcde4b67a70b04900cfc5ce29bc2f556c1/contourpy-1.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8c5acb8dddb0752bf252e01a3035b21443158910ac16a3b0d20e7fed7d534ce5", size = 221791, upload-time = "2025-04-15T17:45:24.794Z" }, -] - -[[package]] -name = "contourpy" -version = "1.3.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14'", - "python_full_version >= '3.12' and python_full_version < '3.14'", - "python_full_version == '3.11.*'", -] -dependencies = [ - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/2e/c4390a31919d8a78b90e8ecf87cd4b4c4f05a5b48d05ec17db8e5404c6f4/contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1", size = 288773, upload-time = "2025-07-26T12:01:02.277Z" }, - { url = "https://files.pythonhosted.org/packages/0d/44/c4b0b6095fef4dc9c420e041799591e3b63e9619e3044f7f4f6c21c0ab24/contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381", size = 270149, upload-time = "2025-07-26T12:01:04.072Z" }, - { url = "https://files.pythonhosted.org/packages/30/2e/dd4ced42fefac8470661d7cb7e264808425e6c5d56d175291e93890cce09/contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7", size = 329222, upload-time = "2025-07-26T12:01:05.688Z" }, - { url = "https://files.pythonhosted.org/packages/f2/74/cc6ec2548e3d276c71389ea4802a774b7aa3558223b7bade3f25787fafc2/contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1", size = 377234, upload-time = "2025-07-26T12:01:07.054Z" }, - { url = "https://files.pythonhosted.org/packages/03/b3/64ef723029f917410f75c09da54254c5f9ea90ef89b143ccadb09df14c15/contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a", size = 380555, upload-time = "2025-07-26T12:01:08.801Z" }, - { url = "https://files.pythonhosted.org/packages/5f/4b/6157f24ca425b89fe2eb7e7be642375711ab671135be21e6faa100f7448c/contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db", size = 355238, upload-time = "2025-07-26T12:01:10.319Z" }, - { url = "https://files.pythonhosted.org/packages/98/56/f914f0dd678480708a04cfd2206e7c382533249bc5001eb9f58aa693e200/contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620", size = 1326218, upload-time = "2025-07-26T12:01:12.659Z" }, - { url = "https://files.pythonhosted.org/packages/fb/d7/4a972334a0c971acd5172389671113ae82aa7527073980c38d5868ff1161/contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f", size = 1392867, upload-time = "2025-07-26T12:01:15.533Z" }, - { url = "https://files.pythonhosted.org/packages/75/3e/f2cc6cd56dc8cff46b1a56232eabc6feea52720083ea71ab15523daab796/contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff", size = 183677, upload-time = "2025-07-26T12:01:17.088Z" }, - { url = "https://files.pythonhosted.org/packages/98/4b/9bd370b004b5c9d8045c6c33cf65bae018b27aca550a3f657cdc99acdbd8/contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42", size = 225234, upload-time = "2025-07-26T12:01:18.256Z" }, - { url = "https://files.pythonhosted.org/packages/d9/b6/71771e02c2e004450c12b1120a5f488cad2e4d5b590b1af8bad060360fe4/contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470", size = 193123, upload-time = "2025-07-26T12:01:19.848Z" }, - { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" }, - { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" }, - { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" }, - { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" }, - { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" }, - { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" }, - { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" }, - { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" }, - { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" }, - { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" }, - { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" }, - { url = "https://files.pythonhosted.org/packages/68/35/0167aad910bbdb9599272bd96d01a9ec6852f36b9455cf2ca67bd4cc2d23/contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5", size = 293257, upload-time = "2025-07-26T12:01:39.367Z" }, - { url = "https://files.pythonhosted.org/packages/96/e4/7adcd9c8362745b2210728f209bfbcf7d91ba868a2c5f40d8b58f54c509b/contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1", size = 274034, upload-time = "2025-07-26T12:01:40.645Z" }, - { url = "https://files.pythonhosted.org/packages/73/23/90e31ceeed1de63058a02cb04b12f2de4b40e3bef5e082a7c18d9c8ae281/contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286", size = 334672, upload-time = "2025-07-26T12:01:41.942Z" }, - { url = "https://files.pythonhosted.org/packages/ed/93/b43d8acbe67392e659e1d984700e79eb67e2acb2bd7f62012b583a7f1b55/contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5", size = 381234, upload-time = "2025-07-26T12:01:43.499Z" }, - { url = "https://files.pythonhosted.org/packages/46/3b/bec82a3ea06f66711520f75a40c8fc0b113b2a75edb36aa633eb11c4f50f/contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67", size = 385169, upload-time = "2025-07-26T12:01:45.219Z" }, - { url = "https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9", size = 362859, upload-time = "2025-07-26T12:01:46.519Z" }, - { url = "https://files.pythonhosted.org/packages/33/71/e2a7945b7de4e58af42d708a219f3b2f4cff7386e6b6ab0a0fa0033c49a9/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659", size = 1332062, upload-time = "2025-07-26T12:01:48.964Z" }, - { url = "https://files.pythonhosted.org/packages/12/fc/4e87ac754220ccc0e807284f88e943d6d43b43843614f0a8afa469801db0/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7", size = 1403932, upload-time = "2025-07-26T12:01:51.979Z" }, - { url = "https://files.pythonhosted.org/packages/a6/2e/adc197a37443f934594112222ac1aa7dc9a98faf9c3842884df9a9d8751d/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d", size = 185024, upload-time = "2025-07-26T12:01:53.245Z" }, - { url = "https://files.pythonhosted.org/packages/18/0b/0098c214843213759692cc638fce7de5c289200a830e5035d1791d7a2338/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263", size = 226578, upload-time = "2025-07-26T12:01:54.422Z" }, - { url = "https://files.pythonhosted.org/packages/8a/9a/2f6024a0c5995243cd63afdeb3651c984f0d2bc727fd98066d40e141ad73/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9", size = 193524, upload-time = "2025-07-26T12:01:55.73Z" }, - { url = "https://files.pythonhosted.org/packages/c0/b3/f8a1a86bd3298513f500e5b1f5fd92b69896449f6cab6a146a5d52715479/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d", size = 306730, upload-time = "2025-07-26T12:01:57.051Z" }, - { url = "https://files.pythonhosted.org/packages/3f/11/4780db94ae62fc0c2053909b65dc3246bd7cecfc4f8a20d957ad43aa4ad8/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216", size = 287897, upload-time = "2025-07-26T12:01:58.663Z" }, - { url = "https://files.pythonhosted.org/packages/ae/15/e59f5f3ffdd6f3d4daa3e47114c53daabcb18574a26c21f03dc9e4e42ff0/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae", size = 326751, upload-time = "2025-07-26T12:02:00.343Z" }, - { url = "https://files.pythonhosted.org/packages/0f/81/03b45cfad088e4770b1dcf72ea78d3802d04200009fb364d18a493857210/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20", size = 375486, upload-time = "2025-07-26T12:02:02.128Z" }, - { url = "https://files.pythonhosted.org/packages/0c/ba/49923366492ffbdd4486e970d421b289a670ae8cf539c1ea9a09822b371a/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99", size = 388106, upload-time = "2025-07-26T12:02:03.615Z" }, - { url = "https://files.pythonhosted.org/packages/9f/52/5b00ea89525f8f143651f9f03a0df371d3cbd2fccd21ca9b768c7a6500c2/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b", size = 352548, upload-time = "2025-07-26T12:02:05.165Z" }, - { url = "https://files.pythonhosted.org/packages/32/1d/a209ec1a3a3452d490f6b14dd92e72280c99ae3d1e73da74f8277d4ee08f/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a", size = 1322297, upload-time = "2025-07-26T12:02:07.379Z" }, - { url = "https://files.pythonhosted.org/packages/bc/9e/46f0e8ebdd884ca0e8877e46a3f4e633f6c9c8c4f3f6e72be3fe075994aa/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e", size = 1391023, upload-time = "2025-07-26T12:02:10.171Z" }, - { url = "https://files.pythonhosted.org/packages/b9/70/f308384a3ae9cd2209e0849f33c913f658d3326900d0ff5d378d6a1422d2/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3", size = 196157, upload-time = "2025-07-26T12:02:11.488Z" }, - { url = "https://files.pythonhosted.org/packages/b2/dd/880f890a6663b84d9e34a6f88cded89d78f0091e0045a284427cb6b18521/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8", size = 240570, upload-time = "2025-07-26T12:02:12.754Z" }, - { url = "https://files.pythonhosted.org/packages/80/99/2adc7d8ffead633234817ef8e9a87115c8a11927a94478f6bb3d3f4d4f7d/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301", size = 199713, upload-time = "2025-07-26T12:02:14.4Z" }, - { url = "https://files.pythonhosted.org/packages/72/8b/4546f3ab60f78c514ffb7d01a0bd743f90de36f0019d1be84d0a708a580a/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a", size = 292189, upload-time = "2025-07-26T12:02:16.095Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e1/3542a9cb596cadd76fcef413f19c79216e002623158befe6daa03dbfa88c/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77", size = 273251, upload-time = "2025-07-26T12:02:17.524Z" }, - { url = "https://files.pythonhosted.org/packages/b1/71/f93e1e9471d189f79d0ce2497007731c1e6bf9ef6d1d61b911430c3db4e5/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5", size = 335810, upload-time = "2025-07-26T12:02:18.9Z" }, - { url = "https://files.pythonhosted.org/packages/91/f9/e35f4c1c93f9275d4e38681a80506b5510e9327350c51f8d4a5a724d178c/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4", size = 382871, upload-time = "2025-07-26T12:02:20.418Z" }, - { url = "https://files.pythonhosted.org/packages/b5/71/47b512f936f66a0a900d81c396a7e60d73419868fba959c61efed7a8ab46/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36", size = 386264, upload-time = "2025-07-26T12:02:21.916Z" }, - { url = "https://files.pythonhosted.org/packages/04/5f/9ff93450ba96b09c7c2b3f81c94de31c89f92292f1380261bd7195bea4ea/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3", size = 363819, upload-time = "2025-07-26T12:02:23.759Z" }, - { url = "https://files.pythonhosted.org/packages/3e/a6/0b185d4cc480ee494945cde102cb0149ae830b5fa17bf855b95f2e70ad13/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b", size = 1333650, upload-time = "2025-07-26T12:02:26.181Z" }, - { url = "https://files.pythonhosted.org/packages/43/d7/afdc95580ca56f30fbcd3060250f66cedbde69b4547028863abd8aa3b47e/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36", size = 1404833, upload-time = "2025-07-26T12:02:28.782Z" }, - { url = "https://files.pythonhosted.org/packages/e2/e2/366af18a6d386f41132a48f033cbd2102e9b0cf6345d35ff0826cd984566/contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d", size = 189692, upload-time = "2025-07-26T12:02:30.128Z" }, - { url = "https://files.pythonhosted.org/packages/7d/c2/57f54b03d0f22d4044b8afb9ca0e184f8b1afd57b4f735c2fa70883dc601/contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd", size = 232424, upload-time = "2025-07-26T12:02:31.395Z" }, - { url = "https://files.pythonhosted.org/packages/18/79/a9416650df9b525737ab521aa181ccc42d56016d2123ddcb7b58e926a42c/contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339", size = 198300, upload-time = "2025-07-26T12:02:32.956Z" }, - { url = "https://files.pythonhosted.org/packages/1f/42/38c159a7d0f2b7b9c04c64ab317042bb6952b713ba875c1681529a2932fe/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772", size = 306769, upload-time = "2025-07-26T12:02:34.2Z" }, - { url = "https://files.pythonhosted.org/packages/c3/6c/26a8205f24bca10974e77460de68d3d7c63e282e23782f1239f226fcae6f/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77", size = 287892, upload-time = "2025-07-26T12:02:35.807Z" }, - { url = "https://files.pythonhosted.org/packages/66/06/8a475c8ab718ebfd7925661747dbb3c3ee9c82ac834ccb3570be49d129f4/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13", size = 326748, upload-time = "2025-07-26T12:02:37.193Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a3/c5ca9f010a44c223f098fccd8b158bb1cb287378a31ac141f04730dc49be/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe", size = 375554, upload-time = "2025-07-26T12:02:38.894Z" }, - { url = "https://files.pythonhosted.org/packages/80/5b/68bd33ae63fac658a4145088c1e894405e07584a316738710b636c6d0333/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f", size = 388118, upload-time = "2025-07-26T12:02:40.642Z" }, - { url = "https://files.pythonhosted.org/packages/40/52/4c285a6435940ae25d7410a6c36bda5145839bc3f0beb20c707cda18b9d2/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0", size = 352555, upload-time = "2025-07-26T12:02:42.25Z" }, - { url = "https://files.pythonhosted.org/packages/24/ee/3e81e1dd174f5c7fefe50e85d0892de05ca4e26ef1c9a59c2a57e43b865a/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4", size = 1322295, upload-time = "2025-07-26T12:02:44.668Z" }, - { url = "https://files.pythonhosted.org/packages/3c/b2/6d913d4d04e14379de429057cd169e5e00f6c2af3bb13e1710bcbdb5da12/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f", size = 1391027, upload-time = "2025-07-26T12:02:47.09Z" }, - { url = "https://files.pythonhosted.org/packages/93/8a/68a4ec5c55a2971213d29a9374913f7e9f18581945a7a31d1a39b5d2dfe5/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae", size = 202428, upload-time = "2025-07-26T12:02:48.691Z" }, - { url = "https://files.pythonhosted.org/packages/fa/96/fd9f641ffedc4fa3ace923af73b9d07e869496c9cc7a459103e6e978992f/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc", size = 250331, upload-time = "2025-07-26T12:02:50.137Z" }, - { url = "https://files.pythonhosted.org/packages/ae/8c/469afb6465b853afff216f9528ffda78a915ff880ed58813ba4faf4ba0b6/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b", size = 203831, upload-time = "2025-07-26T12:02:51.449Z" }, - { url = "https://files.pythonhosted.org/packages/a5/29/8dcfe16f0107943fa92388c23f6e05cff0ba58058c4c95b00280d4c75a14/contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497", size = 278809, upload-time = "2025-07-26T12:02:52.74Z" }, - { url = "https://files.pythonhosted.org/packages/85/a9/8b37ef4f7dafeb335daee3c8254645ef5725be4d9c6aa70b50ec46ef2f7e/contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8", size = 261593, upload-time = "2025-07-26T12:02:54.037Z" }, - { url = "https://files.pythonhosted.org/packages/0a/59/ebfb8c677c75605cc27f7122c90313fd2f375ff3c8d19a1694bda74aaa63/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e", size = 302202, upload-time = "2025-07-26T12:02:55.947Z" }, - { url = "https://files.pythonhosted.org/packages/3c/37/21972a15834d90bfbfb009b9d004779bd5a07a0ec0234e5ba8f64d5736f4/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989", size = 329207, upload-time = "2025-07-26T12:02:57.468Z" }, - { url = "https://files.pythonhosted.org/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" }, -] - [[package]] name = "coverage" version = "7.13.2" @@ -921,15 +764,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/c3/e90f4a4feae6410f914f8ebac129b9ae7a8c92eb60a638012dde42030a9d/cryptography-46.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6b5063083824e5509fdba180721d55909ffacccc8adbec85268b48439423d78c", size = 3438528, upload-time = "2025-10-15T23:18:26.227Z" }, ] -[[package]] -name = "cycler" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, -] - [[package]] name = "data-designer" source = { editable = "packages/data-designer" } @@ -1033,9 +867,6 @@ requires-dist = [ name = "data-designer-workspace" version = "0.0.0" source = { virtual = "." } -dependencies = [ - { name = "matplotlib" }, -] [package.dev-dependencies] dev = [ @@ -1069,7 +900,6 @@ recipes = [ ] [package.metadata] -requires-dist = [{ name = "matplotlib", specifier = ">=3.10.8" }] [package.metadata.requires-dev] dev = [ @@ -1394,63 +1224,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" }, ] -[[package]] -name = "fonttools" -version = "4.61.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5b/94/8a28707adb00bed1bf22dac16ccafe60faf2ade353dcb32c3617ee917307/fonttools-4.61.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c7db70d57e5e1089a274cbb2b1fd635c9a24de809a231b154965d415d6c6d24", size = 2854799, upload-time = "2025-12-12T17:29:27.5Z" }, - { url = "https://files.pythonhosted.org/packages/94/93/c2e682faaa5ee92034818d8f8a8145ae73eb83619600495dcf8503fa7771/fonttools-4.61.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5fe9fd43882620017add5eabb781ebfbc6998ee49b35bd7f8f79af1f9f99a958", size = 2403032, upload-time = "2025-12-12T17:29:30.115Z" }, - { url = "https://files.pythonhosted.org/packages/f1/62/1748f7e7e1ee41aa52279fd2e3a6d0733dc42a673b16932bad8e5d0c8b28/fonttools-4.61.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8db08051fc9e7d8bc622f2112511b8107d8f27cd89e2f64ec45e9825e8288da", size = 4897863, upload-time = "2025-12-12T17:29:32.535Z" }, - { url = "https://files.pythonhosted.org/packages/69/69/4ca02ee367d2c98edcaeb83fc278d20972502ee071214ad9d8ca85e06080/fonttools-4.61.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a76d4cb80f41ba94a6691264be76435e5f72f2cb3cab0b092a6212855f71c2f6", size = 4859076, upload-time = "2025-12-12T17:29:34.907Z" }, - { url = "https://files.pythonhosted.org/packages/8c/f5/660f9e3cefa078861a7f099107c6d203b568a6227eef163dd173bfc56bdc/fonttools-4.61.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a13fc8aeb24bad755eea8f7f9d409438eb94e82cf86b08fe77a03fbc8f6a96b1", size = 4875623, upload-time = "2025-12-12T17:29:37.33Z" }, - { url = "https://files.pythonhosted.org/packages/63/d1/9d7c5091d2276ed47795c131c1bf9316c3c1ab2789c22e2f59e0572ccd38/fonttools-4.61.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b846a1fcf8beadeb9ea4f44ec5bdde393e2f1569e17d700bfc49cd69bde75881", size = 4993327, upload-time = "2025-12-12T17:29:39.781Z" }, - { url = "https://files.pythonhosted.org/packages/6f/2d/28def73837885ae32260d07660a052b99f0aa00454867d33745dfe49dbf0/fonttools-4.61.1-cp310-cp310-win32.whl", hash = "sha256:78a7d3ab09dc47ac1a363a493e6112d8cabed7ba7caad5f54dbe2f08676d1b47", size = 1502180, upload-time = "2025-12-12T17:29:42.217Z" }, - { url = "https://files.pythonhosted.org/packages/63/fa/bfdc98abb4dd2bd491033e85e3ba69a2313c850e759a6daa014bc9433b0f/fonttools-4.61.1-cp310-cp310-win_amd64.whl", hash = "sha256:eff1ac3cc66c2ac7cda1e64b4e2f3ffef474b7335f92fc3833fc632d595fcee6", size = 1550654, upload-time = "2025-12-12T17:29:44.564Z" }, - { url = "https://files.pythonhosted.org/packages/69/12/bf9f4eaa2fad039356cc627587e30ed008c03f1cebd3034376b5ee8d1d44/fonttools-4.61.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c6604b735bb12fef8e0efd5578c9fb5d3d8532d5001ea13a19cddf295673ee09", size = 2852213, upload-time = "2025-12-12T17:29:46.675Z" }, - { url = "https://files.pythonhosted.org/packages/ac/49/4138d1acb6261499bedde1c07f8c2605d1d8f9d77a151e5507fd3ef084b6/fonttools-4.61.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ce02f38a754f207f2f06557523cd39a06438ba3aafc0639c477ac409fc64e37", size = 2401689, upload-time = "2025-12-12T17:29:48.769Z" }, - { url = "https://files.pythonhosted.org/packages/e5/fe/e6ce0fe20a40e03aef906af60aa87668696f9e4802fa283627d0b5ed777f/fonttools-4.61.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77efb033d8d7ff233385f30c62c7c79271c8885d5c9657d967ede124671bbdfb", size = 5058809, upload-time = "2025-12-12T17:29:51.701Z" }, - { url = "https://files.pythonhosted.org/packages/79/61/1ca198af22f7dd22c17ab86e9024ed3c06299cfdb08170640e9996d501a0/fonttools-4.61.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75c1a6dfac6abd407634420c93864a1e274ebc1c7531346d9254c0d8f6ca00f9", size = 5036039, upload-time = "2025-12-12T17:29:53.659Z" }, - { url = "https://files.pythonhosted.org/packages/99/cc/fa1801e408586b5fce4da9f5455af8d770f4fc57391cd5da7256bb364d38/fonttools-4.61.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0de30bfe7745c0d1ffa2b0b7048fb7123ad0d71107e10ee090fa0b16b9452e87", size = 5034714, upload-time = "2025-12-12T17:29:55.592Z" }, - { url = "https://files.pythonhosted.org/packages/bf/aa/b7aeafe65adb1b0a925f8f25725e09f078c635bc22754f3fecb7456955b0/fonttools-4.61.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58b0ee0ab5b1fc9921eccfe11d1435added19d6494dde14e323f25ad2bc30c56", size = 5158648, upload-time = "2025-12-12T17:29:57.861Z" }, - { url = "https://files.pythonhosted.org/packages/99/f9/08ea7a38663328881384c6e7777bbefc46fd7d282adfd87a7d2b84ec9d50/fonttools-4.61.1-cp311-cp311-win32.whl", hash = "sha256:f79b168428351d11e10c5aeb61a74e1851ec221081299f4cf56036a95431c43a", size = 2280681, upload-time = "2025-12-12T17:29:59.943Z" }, - { url = "https://files.pythonhosted.org/packages/07/ad/37dd1ae5fa6e01612a1fbb954f0927681f282925a86e86198ccd7b15d515/fonttools-4.61.1-cp311-cp311-win_amd64.whl", hash = "sha256:fe2efccb324948a11dd09d22136fe2ac8a97d6c1347cf0b58a911dcd529f66b7", size = 2331951, upload-time = "2025-12-12T17:30:02.254Z" }, - { url = "https://files.pythonhosted.org/packages/6f/16/7decaa24a1bd3a70c607b2e29f0adc6159f36a7e40eaba59846414765fd4/fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e", size = 2851593, upload-time = "2025-12-12T17:30:04.225Z" }, - { url = "https://files.pythonhosted.org/packages/94/98/3c4cb97c64713a8cf499b3245c3bf9a2b8fd16a3e375feff2aed78f96259/fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2", size = 2400231, upload-time = "2025-12-12T17:30:06.47Z" }, - { url = "https://files.pythonhosted.org/packages/b7/37/82dbef0f6342eb01f54bca073ac1498433d6ce71e50c3c3282b655733b31/fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796", size = 4954103, upload-time = "2025-12-12T17:30:08.432Z" }, - { url = "https://files.pythonhosted.org/packages/6c/44/f3aeac0fa98e7ad527f479e161aca6c3a1e47bb6996b053d45226fe37bf2/fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d", size = 5004295, upload-time = "2025-12-12T17:30:10.56Z" }, - { url = "https://files.pythonhosted.org/packages/14/e8/7424ced75473983b964d09f6747fa09f054a6d656f60e9ac9324cf40c743/fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8", size = 4944109, upload-time = "2025-12-12T17:30:12.874Z" }, - { url = "https://files.pythonhosted.org/packages/c8/8b/6391b257fa3d0b553d73e778f953a2f0154292a7a7a085e2374b111e5410/fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0", size = 5093598, upload-time = "2025-12-12T17:30:15.79Z" }, - { url = "https://files.pythonhosted.org/packages/d9/71/fd2ea96cdc512d92da5678a1c98c267ddd4d8c5130b76d0f7a80f9a9fde8/fonttools-4.61.1-cp312-cp312-win32.whl", hash = "sha256:75da8f28eff26defba42c52986de97b22106cb8f26515b7c22443ebc9c2d3261", size = 2269060, upload-time = "2025-12-12T17:30:18.058Z" }, - { url = "https://files.pythonhosted.org/packages/80/3b/a3e81b71aed5a688e89dfe0e2694b26b78c7d7f39a5ffd8a7d75f54a12a8/fonttools-4.61.1-cp312-cp312-win_amd64.whl", hash = "sha256:497c31ce314219888c0e2fce5ad9178ca83fe5230b01a5006726cdf3ac9f24d9", size = 2319078, upload-time = "2025-12-12T17:30:22.862Z" }, - { url = "https://files.pythonhosted.org/packages/4b/cf/00ba28b0990982530addb8dc3e9e6f2fa9cb5c20df2abdda7baa755e8fe1/fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c", size = 2846454, upload-time = "2025-12-12T17:30:24.938Z" }, - { url = "https://files.pythonhosted.org/packages/5a/ca/468c9a8446a2103ae645d14fee3f610567b7042aba85031c1c65e3ef7471/fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e", size = 2398191, upload-time = "2025-12-12T17:30:27.343Z" }, - { url = "https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5", size = 4928410, upload-time = "2025-12-12T17:30:29.771Z" }, - { url = "https://files.pythonhosted.org/packages/b0/8d/6fb3494dfe61a46258cd93d979cf4725ded4eb46c2a4ca35e4490d84daea/fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd", size = 4984460, upload-time = "2025-12-12T17:30:32.073Z" }, - { url = "https://files.pythonhosted.org/packages/f7/f1/a47f1d30b3dc00d75e7af762652d4cbc3dff5c2697a0dbd5203c81afd9c3/fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3", size = 4925800, upload-time = "2025-12-12T17:30:34.339Z" }, - { url = "https://files.pythonhosted.org/packages/a7/01/e6ae64a0981076e8a66906fab01539799546181e32a37a0257b77e4aa88b/fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d", size = 5067859, upload-time = "2025-12-12T17:30:36.593Z" }, - { url = "https://files.pythonhosted.org/packages/73/aa/28e40b8d6809a9b5075350a86779163f074d2b617c15d22343fce81918db/fonttools-4.61.1-cp313-cp313-win32.whl", hash = "sha256:4d7092bb38c53bbc78e9255a59158b150bcdc115a1e3b3ce0b5f267dc35dd63c", size = 2267821, upload-time = "2025-12-12T17:30:38.478Z" }, - { url = "https://files.pythonhosted.org/packages/1a/59/453c06d1d83dc0951b69ef692d6b9f1846680342927df54e9a1ca91c6f90/fonttools-4.61.1-cp313-cp313-win_amd64.whl", hash = "sha256:21e7c8d76f62ab13c9472ccf74515ca5b9a761d1bde3265152a6dc58700d895b", size = 2318169, upload-time = "2025-12-12T17:30:40.951Z" }, - { url = "https://files.pythonhosted.org/packages/32/8f/4e7bf82c0cbb738d3c2206c920ca34ca74ef9dabde779030145d28665104/fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd", size = 2846094, upload-time = "2025-12-12T17:30:43.511Z" }, - { url = "https://files.pythonhosted.org/packages/71/09/d44e45d0a4f3a651f23a1e9d42de43bc643cce2971b19e784cc67d823676/fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e", size = 2396589, upload-time = "2025-12-12T17:30:45.681Z" }, - { url = "https://files.pythonhosted.org/packages/89/18/58c64cafcf8eb677a99ef593121f719e6dcbdb7d1c594ae5a10d4997ca8a/fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c", size = 4877892, upload-time = "2025-12-12T17:30:47.709Z" }, - { url = "https://files.pythonhosted.org/packages/8a/ec/9e6b38c7ba1e09eb51db849d5450f4c05b7e78481f662c3b79dbde6f3d04/fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75", size = 4972884, upload-time = "2025-12-12T17:30:49.656Z" }, - { url = "https://files.pythonhosted.org/packages/5e/87/b5339da8e0256734ba0dbbf5b6cdebb1dd79b01dc8c270989b7bcd465541/fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063", size = 4924405, upload-time = "2025-12-12T17:30:51.735Z" }, - { url = "https://files.pythonhosted.org/packages/0b/47/e3409f1e1e69c073a3a6fd8cb886eb18c0bae0ee13db2c8d5e7f8495e8b7/fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2", size = 5035553, upload-time = "2025-12-12T17:30:54.823Z" }, - { url = "https://files.pythonhosted.org/packages/bf/b6/1f6600161b1073a984294c6c031e1a56ebf95b6164249eecf30012bb2e38/fonttools-4.61.1-cp314-cp314-win32.whl", hash = "sha256:9b666a475a65f4e839d3d10473fad6d47e0a9db14a2f4a224029c5bfde58ad2c", size = 2271915, upload-time = "2025-12-12T17:30:57.913Z" }, - { url = "https://files.pythonhosted.org/packages/52/7b/91e7b01e37cc8eb0e1f770d08305b3655e4f002fc160fb82b3390eabacf5/fonttools-4.61.1-cp314-cp314-win_amd64.whl", hash = "sha256:4f5686e1fe5fce75d82d93c47a438a25bf0d1319d2843a926f741140b2b16e0c", size = 2323487, upload-time = "2025-12-12T17:30:59.804Z" }, - { url = "https://files.pythonhosted.org/packages/39/5c/908ad78e46c61c3e3ed70c3b58ff82ab48437faf84ec84f109592cabbd9f/fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa", size = 2929571, upload-time = "2025-12-12T17:31:02.574Z" }, - { url = "https://files.pythonhosted.org/packages/bd/41/975804132c6dea64cdbfbaa59f3518a21c137a10cccf962805b301ac6ab2/fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91", size = 2435317, upload-time = "2025-12-12T17:31:04.974Z" }, - { url = "https://files.pythonhosted.org/packages/b0/5a/aef2a0a8daf1ebaae4cfd83f84186d4a72ee08fd6a8451289fcd03ffa8a4/fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19", size = 4882124, upload-time = "2025-12-12T17:31:07.456Z" }, - { url = "https://files.pythonhosted.org/packages/80/33/d6db3485b645b81cea538c9d1c9219d5805f0877fda18777add4671c5240/fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba", size = 5100391, upload-time = "2025-12-12T17:31:09.732Z" }, - { url = "https://files.pythonhosted.org/packages/6c/d6/675ba631454043c75fcf76f0ca5463eac8eb0666ea1d7badae5fea001155/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7", size = 4978800, upload-time = "2025-12-12T17:31:11.681Z" }, - { url = "https://files.pythonhosted.org/packages/7f/33/d3ec753d547a8d2bdaedd390d4a814e8d5b45a093d558f025c6b990b554c/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118", size = 5006426, upload-time = "2025-12-12T17:31:13.764Z" }, - { url = "https://files.pythonhosted.org/packages/b4/40/cc11f378b561a67bea850ab50063366a0d1dd3f6d0a30ce0f874b0ad5664/fonttools-4.61.1-cp314-cp314t-win32.whl", hash = "sha256:aed04cabe26f30c1647ef0e8fbb207516fd40fe9472e9439695f5c6998e60ac5", size = 2335377, upload-time = "2025-12-12T17:31:16.49Z" }, - { url = "https://files.pythonhosted.org/packages/e4/ff/c9a2b66b39f8628531ea58b320d66d951267c98c6a38684daa8f50fb02f8/fonttools-4.61.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2180f14c141d2f0f3da43f3a81bc8aa4684860f6b0e6f9e165a4831f24e6a23b", size = 2400613, upload-time = "2025-12-12T17:31:18.769Z" }, - { url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" }, -] - [[package]] name = "fqdn" version = "1.5.1" @@ -2532,114 +2305,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/5a/736dd2f4535dbf3bf26523f9158c011389ef88dd06ec2eef67fd744f1c7b/jupytext-1.19.1-py3-none-any.whl", hash = "sha256:d8975035155d034bdfde5c0c37891425314b7ea8d3a6c4b5d18c294348714cd9", size = 170478, upload-time = "2026-01-25T21:35:11.17Z" }, ] -[[package]] -name = "kiwisolver" -version = "1.4.9" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/5d/8ce64e36d4e3aac5ca96996457dcf33e34e6051492399a3f1fec5657f30b/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b", size = 124159, upload-time = "2025-08-10T21:25:35.472Z" }, - { url = "https://files.pythonhosted.org/packages/96/1e/22f63ec454874378175a5f435d6ea1363dd33fb2af832c6643e4ccea0dc8/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f", size = 66578, upload-time = "2025-08-10T21:25:36.73Z" }, - { url = "https://files.pythonhosted.org/packages/41/4c/1925dcfff47a02d465121967b95151c82d11027d5ec5242771e580e731bd/kiwisolver-1.4.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84fd60810829c27ae375114cd379da1fa65e6918e1da405f356a775d49a62bcf", size = 65312, upload-time = "2025-08-10T21:25:37.658Z" }, - { url = "https://files.pythonhosted.org/packages/d4/42/0f333164e6307a0687d1eb9ad256215aae2f4bd5d28f4653d6cd319a3ba3/kiwisolver-1.4.9-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b78efa4c6e804ecdf727e580dbb9cba85624d2e1c6b5cb059c66290063bd99a9", size = 1628458, upload-time = "2025-08-10T21:25:39.067Z" }, - { url = "https://files.pythonhosted.org/packages/86/b6/2dccb977d651943995a90bfe3495c2ab2ba5cd77093d9f2318a20c9a6f59/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4efec7bcf21671db6a3294ff301d2fc861c31faa3c8740d1a94689234d1b415", size = 1225640, upload-time = "2025-08-10T21:25:40.489Z" }, - { url = "https://files.pythonhosted.org/packages/50/2b/362ebd3eec46c850ccf2bfe3e30f2fc4c008750011f38a850f088c56a1c6/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90f47e70293fc3688b71271100a1a5453aa9944a81d27ff779c108372cf5567b", size = 1244074, upload-time = "2025-08-10T21:25:42.221Z" }, - { url = "https://files.pythonhosted.org/packages/6f/bb/f09a1e66dab8984773d13184a10a29fe67125337649d26bdef547024ed6b/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fdca1def57a2e88ef339de1737a1449d6dbf5fab184c54a1fca01d541317154", size = 1293036, upload-time = "2025-08-10T21:25:43.801Z" }, - { url = "https://files.pythonhosted.org/packages/ea/01/11ecf892f201cafda0f68fa59212edaea93e96c37884b747c181303fccd1/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cf554f21be770f5111a1690d42313e140355e687e05cf82cb23d0a721a64a48", size = 2175310, upload-time = "2025-08-10T21:25:45.045Z" }, - { url = "https://files.pythonhosted.org/packages/7f/5f/bfe11d5b934f500cc004314819ea92427e6e5462706a498c1d4fc052e08f/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fc1795ac5cd0510207482c3d1d3ed781143383b8cfd36f5c645f3897ce066220", size = 2270943, upload-time = "2025-08-10T21:25:46.393Z" }, - { url = "https://files.pythonhosted.org/packages/3d/de/259f786bf71f1e03e73d87e2db1a9a3bcab64d7b4fd780167123161630ad/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ccd09f20ccdbbd341b21a67ab50a119b64a403b09288c27481575105283c1586", size = 2440488, upload-time = "2025-08-10T21:25:48.074Z" }, - { url = "https://files.pythonhosted.org/packages/1b/76/c989c278faf037c4d3421ec07a5c452cd3e09545d6dae7f87c15f54e4edf/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:540c7c72324d864406a009d72f5d6856f49693db95d1fbb46cf86febef873634", size = 2246787, upload-time = "2025-08-10T21:25:49.442Z" }, - { url = "https://files.pythonhosted.org/packages/a2/55/c2898d84ca440852e560ca9f2a0d28e6e931ac0849b896d77231929900e7/kiwisolver-1.4.9-cp310-cp310-win_amd64.whl", hash = "sha256:ede8c6d533bc6601a47ad4046080d36b8fc99f81e6f1c17b0ac3c2dc91ac7611", size = 73730, upload-time = "2025-08-10T21:25:51.102Z" }, - { url = "https://files.pythonhosted.org/packages/e8/09/486d6ac523dd33b80b368247f238125d027964cfacb45c654841e88fb2ae/kiwisolver-1.4.9-cp310-cp310-win_arm64.whl", hash = "sha256:7b4da0d01ac866a57dd61ac258c5607b4cd677f63abaec7b148354d2b2cdd536", size = 65036, upload-time = "2025-08-10T21:25:52.063Z" }, - { url = "https://files.pythonhosted.org/packages/6f/ab/c80b0d5a9d8a1a65f4f815f2afff9798b12c3b9f31f1d304dd233dd920e2/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16", size = 124167, upload-time = "2025-08-10T21:25:53.403Z" }, - { url = "https://files.pythonhosted.org/packages/a0/c0/27fe1a68a39cf62472a300e2879ffc13c0538546c359b86f149cc19f6ac3/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089", size = 66579, upload-time = "2025-08-10T21:25:54.79Z" }, - { url = "https://files.pythonhosted.org/packages/31/a2/a12a503ac1fd4943c50f9822678e8015a790a13b5490354c68afb8489814/kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543", size = 65309, upload-time = "2025-08-10T21:25:55.76Z" }, - { url = "https://files.pythonhosted.org/packages/66/e1/e533435c0be77c3f64040d68d7a657771194a63c279f55573188161e81ca/kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61", size = 1435596, upload-time = "2025-08-10T21:25:56.861Z" }, - { url = "https://files.pythonhosted.org/packages/67/1e/51b73c7347f9aabdc7215aa79e8b15299097dc2f8e67dee2b095faca9cb0/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1", size = 1246548, upload-time = "2025-08-10T21:25:58.246Z" }, - { url = "https://files.pythonhosted.org/packages/21/aa/72a1c5d1e430294f2d32adb9542719cfb441b5da368d09d268c7757af46c/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872", size = 1263618, upload-time = "2025-08-10T21:25:59.857Z" }, - { url = "https://files.pythonhosted.org/packages/a3/af/db1509a9e79dbf4c260ce0cfa3903ea8945f6240e9e59d1e4deb731b1a40/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26", size = 1317437, upload-time = "2025-08-10T21:26:01.105Z" }, - { url = "https://files.pythonhosted.org/packages/e0/f2/3ea5ee5d52abacdd12013a94130436e19969fa183faa1e7c7fbc89e9a42f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028", size = 2195742, upload-time = "2025-08-10T21:26:02.675Z" }, - { url = "https://files.pythonhosted.org/packages/6f/9b/1efdd3013c2d9a2566aa6a337e9923a00590c516add9a1e89a768a3eb2fc/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771", size = 2290810, upload-time = "2025-08-10T21:26:04.009Z" }, - { url = "https://files.pythonhosted.org/packages/fb/e5/cfdc36109ae4e67361f9bc5b41323648cb24a01b9ade18784657e022e65f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a", size = 2461579, upload-time = "2025-08-10T21:26:05.317Z" }, - { url = "https://files.pythonhosted.org/packages/62/86/b589e5e86c7610842213994cdea5add00960076bef4ae290c5fa68589cac/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464", size = 2268071, upload-time = "2025-08-10T21:26:06.686Z" }, - { url = "https://files.pythonhosted.org/packages/3b/c6/f8df8509fd1eee6c622febe54384a96cfaf4d43bf2ccec7a0cc17e4715c9/kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2", size = 73840, upload-time = "2025-08-10T21:26:07.94Z" }, - { url = "https://files.pythonhosted.org/packages/e2/2d/16e0581daafd147bc11ac53f032a2b45eabac897f42a338d0a13c1e5c436/kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7", size = 65159, upload-time = "2025-08-10T21:26:09.048Z" }, - { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" }, - { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" }, - { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" }, - { url = "https://files.pythonhosted.org/packages/70/90/6d240beb0f24b74371762873e9b7f499f1e02166a2d9c5801f4dbf8fa12e/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04", size = 1474756, upload-time = "2025-08-10T21:26:13.096Z" }, - { url = "https://files.pythonhosted.org/packages/12/42/f36816eaf465220f683fb711efdd1bbf7a7005a2473d0e4ed421389bd26c/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752", size = 1276404, upload-time = "2025-08-10T21:26:14.457Z" }, - { url = "https://files.pythonhosted.org/packages/2e/64/bc2de94800adc830c476dce44e9b40fd0809cddeef1fde9fcf0f73da301f/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77", size = 1294410, upload-time = "2025-08-10T21:26:15.73Z" }, - { url = "https://files.pythonhosted.org/packages/5f/42/2dc82330a70aa8e55b6d395b11018045e58d0bb00834502bf11509f79091/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198", size = 1343631, upload-time = "2025-08-10T21:26:17.045Z" }, - { url = "https://files.pythonhosted.org/packages/22/fd/f4c67a6ed1aab149ec5a8a401c323cee7a1cbe364381bb6c9c0d564e0e20/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d", size = 2224963, upload-time = "2025-08-10T21:26:18.737Z" }, - { url = "https://files.pythonhosted.org/packages/45/aa/76720bd4cb3713314677d9ec94dcc21ced3f1baf4830adde5bb9b2430a5f/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab", size = 2321295, upload-time = "2025-08-10T21:26:20.11Z" }, - { url = "https://files.pythonhosted.org/packages/80/19/d3ec0d9ab711242f56ae0dc2fc5d70e298bb4a1f9dfab44c027668c673a1/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2", size = 2487987, upload-time = "2025-08-10T21:26:21.49Z" }, - { url = "https://files.pythonhosted.org/packages/39/e9/61e4813b2c97e86b6fdbd4dd824bf72d28bcd8d4849b8084a357bc0dd64d/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145", size = 2291817, upload-time = "2025-08-10T21:26:22.812Z" }, - { url = "https://files.pythonhosted.org/packages/a0/41/85d82b0291db7504da3c2defe35c9a8a5c9803a730f297bd823d11d5fb77/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54", size = 73895, upload-time = "2025-08-10T21:26:24.37Z" }, - { url = "https://files.pythonhosted.org/packages/e2/92/5f3068cf15ee5cb624a0c7596e67e2a0bb2adee33f71c379054a491d07da/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60", size = 64992, upload-time = "2025-08-10T21:26:25.732Z" }, - { url = "https://files.pythonhosted.org/packages/31/c1/c2686cda909742ab66c7388e9a1a8521a59eb89f8bcfbee28fc980d07e24/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8", size = 123681, upload-time = "2025-08-10T21:26:26.725Z" }, - { url = "https://files.pythonhosted.org/packages/ca/f0/f44f50c9f5b1a1860261092e3bc91ecdc9acda848a8b8c6abfda4a24dd5c/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2", size = 66464, upload-time = "2025-08-10T21:26:27.733Z" }, - { url = "https://files.pythonhosted.org/packages/2d/7a/9d90a151f558e29c3936b8a47ac770235f436f2120aca41a6d5f3d62ae8d/kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f", size = 64961, upload-time = "2025-08-10T21:26:28.729Z" }, - { url = "https://files.pythonhosted.org/packages/e9/e9/f218a2cb3a9ffbe324ca29a9e399fa2d2866d7f348ec3a88df87fc248fc5/kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098", size = 1474607, upload-time = "2025-08-10T21:26:29.798Z" }, - { url = "https://files.pythonhosted.org/packages/d9/28/aac26d4c882f14de59041636292bc838db8961373825df23b8eeb807e198/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed", size = 1276546, upload-time = "2025-08-10T21:26:31.401Z" }, - { url = "https://files.pythonhosted.org/packages/8b/ad/8bfc1c93d4cc565e5069162f610ba2f48ff39b7de4b5b8d93f69f30c4bed/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525", size = 1294482, upload-time = "2025-08-10T21:26:32.721Z" }, - { url = "https://files.pythonhosted.org/packages/da/f1/6aca55ff798901d8ce403206d00e033191f63d82dd708a186e0ed2067e9c/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78", size = 1343720, upload-time = "2025-08-10T21:26:34.032Z" }, - { url = "https://files.pythonhosted.org/packages/d1/91/eed031876c595c81d90d0f6fc681ece250e14bf6998c3d7c419466b523b7/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b", size = 2224907, upload-time = "2025-08-10T21:26:35.824Z" }, - { url = "https://files.pythonhosted.org/packages/e9/ec/4d1925f2e49617b9cca9c34bfa11adefad49d00db038e692a559454dfb2e/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799", size = 2321334, upload-time = "2025-08-10T21:26:37.534Z" }, - { url = "https://files.pythonhosted.org/packages/43/cb/450cd4499356f68802750c6ddc18647b8ea01ffa28f50d20598e0befe6e9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3", size = 2488313, upload-time = "2025-08-10T21:26:39.191Z" }, - { url = "https://files.pythonhosted.org/packages/71/67/fc76242bd99f885651128a5d4fa6083e5524694b7c88b489b1b55fdc491d/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c", size = 2291970, upload-time = "2025-08-10T21:26:40.828Z" }, - { url = "https://files.pythonhosted.org/packages/75/bd/f1a5d894000941739f2ae1b65a32892349423ad49c2e6d0771d0bad3fae4/kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d", size = 73894, upload-time = "2025-08-10T21:26:42.33Z" }, - { url = "https://files.pythonhosted.org/packages/95/38/dce480814d25b99a391abbddadc78f7c117c6da34be68ca8b02d5848b424/kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2", size = 64995, upload-time = "2025-08-10T21:26:43.889Z" }, - { url = "https://files.pythonhosted.org/packages/e2/37/7d218ce5d92dadc5ebdd9070d903e0c7cf7edfe03f179433ac4d13ce659c/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1", size = 126510, upload-time = "2025-08-10T21:26:44.915Z" }, - { url = "https://files.pythonhosted.org/packages/23/b0/e85a2b48233daef4b648fb657ebbb6f8367696a2d9548a00b4ee0eb67803/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1", size = 67903, upload-time = "2025-08-10T21:26:45.934Z" }, - { url = "https://files.pythonhosted.org/packages/44/98/f2425bc0113ad7de24da6bb4dae1343476e95e1d738be7c04d31a5d037fd/kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11", size = 66402, upload-time = "2025-08-10T21:26:47.101Z" }, - { url = "https://files.pythonhosted.org/packages/98/d8/594657886df9f34c4177cc353cc28ca7e6e5eb562d37ccc233bff43bbe2a/kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c", size = 1582135, upload-time = "2025-08-10T21:26:48.665Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c6/38a115b7170f8b306fc929e166340c24958347308ea3012c2b44e7e295db/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197", size = 1389409, upload-time = "2025-08-10T21:26:50.335Z" }, - { url = "https://files.pythonhosted.org/packages/bf/3b/e04883dace81f24a568bcee6eb3001da4ba05114afa622ec9b6fafdc1f5e/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c", size = 1401763, upload-time = "2025-08-10T21:26:51.867Z" }, - { url = "https://files.pythonhosted.org/packages/9f/80/20ace48e33408947af49d7d15c341eaee69e4e0304aab4b7660e234d6288/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185", size = 1453643, upload-time = "2025-08-10T21:26:53.592Z" }, - { url = "https://files.pythonhosted.org/packages/64/31/6ce4380a4cd1f515bdda976a1e90e547ccd47b67a1546d63884463c92ca9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748", size = 2330818, upload-time = "2025-08-10T21:26:55.051Z" }, - { url = "https://files.pythonhosted.org/packages/fa/e9/3f3fcba3bcc7432c795b82646306e822f3fd74df0ee81f0fa067a1f95668/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64", size = 2419963, upload-time = "2025-08-10T21:26:56.421Z" }, - { url = "https://files.pythonhosted.org/packages/99/43/7320c50e4133575c66e9f7dadead35ab22d7c012a3b09bb35647792b2a6d/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff", size = 2594639, upload-time = "2025-08-10T21:26:57.882Z" }, - { url = "https://files.pythonhosted.org/packages/65/d6/17ae4a270d4a987ef8a385b906d2bdfc9fce502d6dc0d3aea865b47f548c/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07", size = 2391741, upload-time = "2025-08-10T21:26:59.237Z" }, - { url = "https://files.pythonhosted.org/packages/2a/8f/8f6f491d595a9e5912971f3f863d81baddccc8a4d0c3749d6a0dd9ffc9df/kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c", size = 68646, upload-time = "2025-08-10T21:27:00.52Z" }, - { url = "https://files.pythonhosted.org/packages/6b/32/6cc0fbc9c54d06c2969faa9c1d29f5751a2e51809dd55c69055e62d9b426/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386", size = 123806, upload-time = "2025-08-10T21:27:01.537Z" }, - { url = "https://files.pythonhosted.org/packages/b2/dd/2bfb1d4a4823d92e8cbb420fe024b8d2167f72079b3bb941207c42570bdf/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552", size = 66605, upload-time = "2025-08-10T21:27:03.335Z" }, - { url = "https://files.pythonhosted.org/packages/f7/69/00aafdb4e4509c2ca6064646cba9cd4b37933898f426756adb2cb92ebbed/kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3", size = 64925, upload-time = "2025-08-10T21:27:04.339Z" }, - { url = "https://files.pythonhosted.org/packages/43/dc/51acc6791aa14e5cb6d8a2e28cefb0dc2886d8862795449d021334c0df20/kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58", size = 1472414, upload-time = "2025-08-10T21:27:05.437Z" }, - { url = "https://files.pythonhosted.org/packages/3d/bb/93fa64a81db304ac8a246f834d5094fae4b13baf53c839d6bb6e81177129/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4", size = 1281272, upload-time = "2025-08-10T21:27:07.063Z" }, - { url = "https://files.pythonhosted.org/packages/70/e6/6df102916960fb8d05069d4bd92d6d9a8202d5a3e2444494e7cd50f65b7a/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df", size = 1298578, upload-time = "2025-08-10T21:27:08.452Z" }, - { url = "https://files.pythonhosted.org/packages/7c/47/e142aaa612f5343736b087864dbaebc53ea8831453fb47e7521fa8658f30/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6", size = 1345607, upload-time = "2025-08-10T21:27:10.125Z" }, - { url = "https://files.pythonhosted.org/packages/54/89/d641a746194a0f4d1a3670fb900d0dbaa786fb98341056814bc3f058fa52/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5", size = 2230150, upload-time = "2025-08-10T21:27:11.484Z" }, - { url = "https://files.pythonhosted.org/packages/aa/6b/5ee1207198febdf16ac11f78c5ae40861b809cbe0e6d2a8d5b0b3044b199/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf", size = 2325979, upload-time = "2025-08-10T21:27:12.917Z" }, - { url = "https://files.pythonhosted.org/packages/fc/ff/b269eefd90f4ae14dcc74973d5a0f6d28d3b9bb1afd8c0340513afe6b39a/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5", size = 2491456, upload-time = "2025-08-10T21:27:14.353Z" }, - { url = "https://files.pythonhosted.org/packages/fc/d4/10303190bd4d30de547534601e259a4fbf014eed94aae3e5521129215086/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce", size = 2294621, upload-time = "2025-08-10T21:27:15.808Z" }, - { url = "https://files.pythonhosted.org/packages/28/e0/a9a90416fce5c0be25742729c2ea52105d62eda6c4be4d803c2a7be1fa50/kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7", size = 75417, upload-time = "2025-08-10T21:27:17.436Z" }, - { url = "https://files.pythonhosted.org/packages/1f/10/6949958215b7a9a264299a7db195564e87900f709db9245e4ebdd3c70779/kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c", size = 66582, upload-time = "2025-08-10T21:27:18.436Z" }, - { url = "https://files.pythonhosted.org/packages/ec/79/60e53067903d3bc5469b369fe0dfc6b3482e2133e85dae9daa9527535991/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548", size = 126514, upload-time = "2025-08-10T21:27:19.465Z" }, - { url = "https://files.pythonhosted.org/packages/25/d1/4843d3e8d46b072c12a38c97c57fab4608d36e13fe47d47ee96b4d61ba6f/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d", size = 67905, upload-time = "2025-08-10T21:27:20.51Z" }, - { url = "https://files.pythonhosted.org/packages/8c/ae/29ffcbd239aea8b93108de1278271ae764dfc0d803a5693914975f200596/kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c", size = 66399, upload-time = "2025-08-10T21:27:21.496Z" }, - { url = "https://files.pythonhosted.org/packages/a1/ae/d7ba902aa604152c2ceba5d352d7b62106bedbccc8e95c3934d94472bfa3/kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122", size = 1582197, upload-time = "2025-08-10T21:27:22.604Z" }, - { url = "https://files.pythonhosted.org/packages/f2/41/27c70d427eddb8bc7e4f16420a20fefc6f480312122a59a959fdfe0445ad/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64", size = 1390125, upload-time = "2025-08-10T21:27:24.036Z" }, - { url = "https://files.pythonhosted.org/packages/41/42/b3799a12bafc76d962ad69083f8b43b12bf4fe78b097b12e105d75c9b8f1/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134", size = 1402612, upload-time = "2025-08-10T21:27:25.773Z" }, - { url = "https://files.pythonhosted.org/packages/d2/b5/a210ea073ea1cfaca1bb5c55a62307d8252f531beb364e18aa1e0888b5a0/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370", size = 1453990, upload-time = "2025-08-10T21:27:27.089Z" }, - { url = "https://files.pythonhosted.org/packages/5f/ce/a829eb8c033e977d7ea03ed32fb3c1781b4fa0433fbadfff29e39c676f32/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21", size = 2331601, upload-time = "2025-08-10T21:27:29.343Z" }, - { url = "https://files.pythonhosted.org/packages/e0/4b/b5e97eb142eb9cd0072dacfcdcd31b1c66dc7352b0f7c7255d339c0edf00/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a", size = 2422041, upload-time = "2025-08-10T21:27:30.754Z" }, - { url = "https://files.pythonhosted.org/packages/40/be/8eb4cd53e1b85ba4edc3a9321666f12b83113a178845593307a3e7891f44/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f", size = 2594897, upload-time = "2025-08-10T21:27:32.803Z" }, - { url = "https://files.pythonhosted.org/packages/99/dd/841e9a66c4715477ea0abc78da039832fbb09dac5c35c58dc4c41a407b8a/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369", size = 2391835, upload-time = "2025-08-10T21:27:34.23Z" }, - { url = "https://files.pythonhosted.org/packages/0c/28/4b2e5c47a0da96896fdfdb006340ade064afa1e63675d01ea5ac222b6d52/kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891", size = 79988, upload-time = "2025-08-10T21:27:35.587Z" }, - { url = "https://files.pythonhosted.org/packages/80/be/3578e8afd18c88cdf9cb4cffde75a96d2be38c5a903f1ed0ceec061bd09e/kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32", size = 70260, upload-time = "2025-08-10T21:27:36.606Z" }, - { url = "https://files.pythonhosted.org/packages/a2/63/fde392691690f55b38d5dd7b3710f5353bf7a8e52de93a22968801ab8978/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4d1d9e582ad4d63062d34077a9a1e9f3c34088a2ec5135b1f7190c07cf366527", size = 60183, upload-time = "2025-08-10T21:27:37.669Z" }, - { url = "https://files.pythonhosted.org/packages/27/b1/6aad34edfdb7cced27f371866f211332bba215bfd918ad3322a58f480d8b/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:deed0c7258ceb4c44ad5ec7d9918f9f14fd05b2be86378d86cf50e63d1e7b771", size = 58675, upload-time = "2025-08-10T21:27:39.031Z" }, - { url = "https://files.pythonhosted.org/packages/9d/1a/23d855a702bb35a76faed5ae2ba3de57d323f48b1f6b17ee2176c4849463/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a590506f303f512dff6b7f75fd2fd18e16943efee932008fe7140e5fa91d80e", size = 80277, upload-time = "2025-08-10T21:27:40.129Z" }, - { url = "https://files.pythonhosted.org/packages/5a/5b/5239e3c2b8fb5afa1e8508f721bb77325f740ab6994d963e61b2b7abcc1e/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e09c2279a4d01f099f52d5c4b3d9e208e91edcbd1a175c9662a8b16e000fece9", size = 77994, upload-time = "2025-08-10T21:27:41.181Z" }, - { url = "https://files.pythonhosted.org/packages/f9/1c/5d4d468fb16f8410e596ed0eac02d2c68752aa7dc92997fe9d60a7147665/kiwisolver-1.4.9-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e7cdf45d594ee04d5be1b24dd9d49f3d1590959b2271fb30b5ca2b262c00fb", size = 73744, upload-time = "2025-08-10T21:27:42.254Z" }, - { url = "https://files.pythonhosted.org/packages/a3/0f/36d89194b5a32c054ce93e586d4049b6c2c22887b0eb229c61c68afd3078/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5", size = 60104, upload-time = "2025-08-10T21:27:43.287Z" }, - { url = "https://files.pythonhosted.org/packages/52/ba/4ed75f59e4658fd21fe7dde1fee0ac397c678ec3befba3fe6482d987af87/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa", size = 58592, upload-time = "2025-08-10T21:27:44.314Z" }, - { url = "https://files.pythonhosted.org/packages/33/01/a8ea7c5ea32a9b45ceeaee051a04c8ed4320f5add3c51bfa20879b765b70/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2", size = 80281, upload-time = "2025-08-10T21:27:45.369Z" }, - { url = "https://files.pythonhosted.org/packages/da/e3/dbd2ecdce306f1d07a1aaf324817ee993aab7aee9db47ceac757deabafbe/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f", size = 78009, upload-time = "2025-08-10T21:27:46.376Z" }, - { url = "https://files.pythonhosted.org/packages/da/e9/0d4add7873a73e462aeb45c036a2dead2562b825aa46ba326727b3f31016/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1", size = 73929, upload-time = "2025-08-10T21:27:48.236Z" }, -] - [[package]] name = "lark" version = "1.3.1" @@ -2913,81 +2578,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] -[[package]] -name = "matplotlib" -version = "3.10.8" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "contourpy", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "cycler" }, - { name = "fonttools" }, - { name = "kiwisolver" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "packaging" }, - { name = "pillow" }, - { name = "pyparsing" }, - { name = "python-dateutil" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8a/76/d3c6e3a13fe484ebe7718d14e269c9569c4eb0020a968a327acb3b9a8fe6/matplotlib-3.10.8.tar.gz", hash = "sha256:2299372c19d56bcd35cf05a2738308758d32b9eaed2371898d8f5bd33f084aa3", size = 34806269, upload-time = "2025-12-10T22:56:51.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/58/be/a30bd917018ad220c400169fba298f2bb7003c8ccbc0c3e24ae2aacad1e8/matplotlib-3.10.8-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:00270d217d6b20d14b584c521f810d60c5c78406dc289859776550df837dcda7", size = 8239828, upload-time = "2025-12-10T22:55:02.313Z" }, - { url = "https://files.pythonhosted.org/packages/58/27/ca01e043c4841078e82cf6e80a6993dfecd315c3d79f5f3153afbb8e1ec6/matplotlib-3.10.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b3c1cc42aa184b3f738cfa18c1c1d72fd496d85467a6cf7b807936d39aa656", size = 8128050, upload-time = "2025-12-10T22:55:04.997Z" }, - { url = "https://files.pythonhosted.org/packages/cb/aa/7ab67f2b729ae6a91bcf9dcac0affb95fb8c56f7fd2b2af894ae0b0cf6fa/matplotlib-3.10.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ee40c27c795bda6a5292e9cff9890189d32f7e3a0bf04e0e3c9430c4a00c37df", size = 8700452, upload-time = "2025-12-10T22:55:07.47Z" }, - { url = "https://files.pythonhosted.org/packages/73/ae/2d5817b0acee3c49b7e7ccfbf5b273f284957cc8e270adf36375db353190/matplotlib-3.10.8-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a48f2b74020919552ea25d222d5cc6af9ca3f4eb43a93e14d068457f545c2a17", size = 9534928, upload-time = "2025-12-10T22:55:10.566Z" }, - { url = "https://files.pythonhosted.org/packages/c9/5b/8e66653e9f7c39cb2e5cab25fce4810daffa2bff02cbf5f3077cea9e942c/matplotlib-3.10.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f254d118d14a7f99d616271d6c3c27922c092dac11112670b157798b89bf4933", size = 9586377, upload-time = "2025-12-10T22:55:12.362Z" }, - { url = "https://files.pythonhosted.org/packages/e2/e2/fd0bbadf837f81edb0d208ba8f8cb552874c3b16e27cb91a31977d90875d/matplotlib-3.10.8-cp310-cp310-win_amd64.whl", hash = "sha256:f9b587c9c7274c1613a30afabf65a272114cd6cdbe67b3406f818c79d7ab2e2a", size = 8128127, upload-time = "2025-12-10T22:55:14.436Z" }, - { url = "https://files.pythonhosted.org/packages/f8/86/de7e3a1cdcfc941483af70609edc06b83e7c8a0e0dc9ac325200a3f4d220/matplotlib-3.10.8-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6be43b667360fef5c754dda5d25a32e6307a03c204f3c0fc5468b78fa87b4160", size = 8251215, upload-time = "2025-12-10T22:55:16.175Z" }, - { url = "https://files.pythonhosted.org/packages/fd/14/baad3222f424b19ce6ad243c71de1ad9ec6b2e4eb1e458a48fdc6d120401/matplotlib-3.10.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2b336e2d91a3d7006864e0990c83b216fcdca64b5a6484912902cef87313d78", size = 8139625, upload-time = "2025-12-10T22:55:17.712Z" }, - { url = "https://files.pythonhosted.org/packages/8f/a0/7024215e95d456de5883e6732e708d8187d9753a21d32f8ddb3befc0c445/matplotlib-3.10.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:efb30e3baaea72ce5928e32bab719ab4770099079d66726a62b11b1ef7273be4", size = 8712614, upload-time = "2025-12-10T22:55:20.8Z" }, - { url = "https://files.pythonhosted.org/packages/5a/f4/b8347351da9a5b3f41e26cf547252d861f685c6867d179a7c9d60ad50189/matplotlib-3.10.8-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d56a1efd5bfd61486c8bc968fa18734464556f0fb8e51690f4ac25d85cbbbbc2", size = 9540997, upload-time = "2025-12-10T22:55:23.258Z" }, - { url = "https://files.pythonhosted.org/packages/9e/c0/c7b914e297efe0bc36917bf216b2acb91044b91e930e878ae12981e461e5/matplotlib-3.10.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:238b7ce5717600615c895050239ec955d91f321c209dd110db988500558e70d6", size = 9596825, upload-time = "2025-12-10T22:55:25.217Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d3/a4bbc01c237ab710a1f22b4da72f4ff6d77eb4c7735ea9811a94ae239067/matplotlib-3.10.8-cp311-cp311-win_amd64.whl", hash = "sha256:18821ace09c763ec93aef5eeff087ee493a24051936d7b9ebcad9662f66501f9", size = 8135090, upload-time = "2025-12-10T22:55:27.162Z" }, - { url = "https://files.pythonhosted.org/packages/89/dd/a0b6588f102beab33ca6f5218b31725216577b2a24172f327eaf6417d5c9/matplotlib-3.10.8-cp311-cp311-win_arm64.whl", hash = "sha256:bab485bcf8b1c7d2060b4fcb6fc368a9e6f4cd754c9c2fea281f4be21df394a2", size = 8012377, upload-time = "2025-12-10T22:55:29.185Z" }, - { url = "https://files.pythonhosted.org/packages/9e/67/f997cdcbb514012eb0d10cd2b4b332667997fb5ebe26b8d41d04962fa0e6/matplotlib-3.10.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:64fcc24778ca0404ce0cb7b6b77ae1f4c7231cdd60e6778f999ee05cbd581b9a", size = 8260453, upload-time = "2025-12-10T22:55:30.709Z" }, - { url = "https://files.pythonhosted.org/packages/7e/65/07d5f5c7f7c994f12c768708bd2e17a4f01a2b0f44a1c9eccad872433e2e/matplotlib-3.10.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9a5ca4ac220a0cdd1ba6bcba3608547117d30468fefce49bb26f55c1a3d5c58", size = 8148321, upload-time = "2025-12-10T22:55:33.265Z" }, - { url = "https://files.pythonhosted.org/packages/3e/f3/c5195b1ae57ef85339fd7285dfb603b22c8b4e79114bae5f4f0fcf688677/matplotlib-3.10.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3ab4aabc72de4ff77b3ec33a6d78a68227bf1123465887f9905ba79184a1cc04", size = 8716944, upload-time = "2025-12-10T22:55:34.922Z" }, - { url = "https://files.pythonhosted.org/packages/00/f9/7638f5cc82ec8a7aa005de48622eecc3ed7c9854b96ba15bd76b7fd27574/matplotlib-3.10.8-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24d50994d8c5816ddc35411e50a86ab05f575e2530c02752e02538122613371f", size = 9550099, upload-time = "2025-12-10T22:55:36.789Z" }, - { url = "https://files.pythonhosted.org/packages/57/61/78cd5920d35b29fd2a0fe894de8adf672ff52939d2e9b43cb83cd5ce1bc7/matplotlib-3.10.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:99eefd13c0dc3b3c1b4d561c1169e65fe47aab7b8158754d7c084088e2329466", size = 9613040, upload-time = "2025-12-10T22:55:38.715Z" }, - { url = "https://files.pythonhosted.org/packages/30/4e/c10f171b6e2f44d9e3a2b96efa38b1677439d79c99357600a62cc1e9594e/matplotlib-3.10.8-cp312-cp312-win_amd64.whl", hash = "sha256:dd80ecb295460a5d9d260df63c43f4afbdd832d725a531f008dad1664f458adf", size = 8142717, upload-time = "2025-12-10T22:55:41.103Z" }, - { url = "https://files.pythonhosted.org/packages/f1/76/934db220026b5fef85f45d51a738b91dea7d70207581063cd9bd8fafcf74/matplotlib-3.10.8-cp312-cp312-win_arm64.whl", hash = "sha256:3c624e43ed56313651bc18a47f838b60d7b8032ed348911c54906b130b20071b", size = 8012751, upload-time = "2025-12-10T22:55:42.684Z" }, - { url = "https://files.pythonhosted.org/packages/3d/b9/15fd5541ef4f5b9a17eefd379356cf12175fe577424e7b1d80676516031a/matplotlib-3.10.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3f2e409836d7f5ac2f1c013110a4d50b9f7edc26328c108915f9075d7d7a91b6", size = 8261076, upload-time = "2025-12-10T22:55:44.648Z" }, - { url = "https://files.pythonhosted.org/packages/8d/a0/2ba3473c1b66b9c74dc7107c67e9008cb1782edbe896d4c899d39ae9cf78/matplotlib-3.10.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56271f3dac49a88d7fca5060f004d9d22b865f743a12a23b1e937a0be4818ee1", size = 8148794, upload-time = "2025-12-10T22:55:46.252Z" }, - { url = "https://files.pythonhosted.org/packages/75/97/a471f1c3eb1fd6f6c24a31a5858f443891d5127e63a7788678d14e249aea/matplotlib-3.10.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a0a7f52498f72f13d4a25ea70f35f4cb60642b466cbb0a9be951b5bc3f45a486", size = 8718474, upload-time = "2025-12-10T22:55:47.864Z" }, - { url = "https://files.pythonhosted.org/packages/01/be/cd478f4b66f48256f42927d0acbcd63a26a893136456cd079c0cc24fbabf/matplotlib-3.10.8-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:646d95230efb9ca614a7a594d4fcacde0ac61d25e37dd51710b36477594963ce", size = 9549637, upload-time = "2025-12-10T22:55:50.048Z" }, - { url = "https://files.pythonhosted.org/packages/5d/7c/8dc289776eae5109e268c4fb92baf870678dc048a25d4ac903683b86d5bf/matplotlib-3.10.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f89c151aab2e2e23cb3fe0acad1e8b82841fd265379c4cecd0f3fcb34c15e0f6", size = 9613678, upload-time = "2025-12-10T22:55:52.21Z" }, - { url = "https://files.pythonhosted.org/packages/64/40/37612487cc8a437d4dd261b32ca21fe2d79510fe74af74e1f42becb1bdb8/matplotlib-3.10.8-cp313-cp313-win_amd64.whl", hash = "sha256:e8ea3e2d4066083e264e75c829078f9e149fa119d27e19acd503de65e0b13149", size = 8142686, upload-time = "2025-12-10T22:55:54.253Z" }, - { url = "https://files.pythonhosted.org/packages/66/52/8d8a8730e968185514680c2a6625943f70269509c3dcfc0dcf7d75928cb8/matplotlib-3.10.8-cp313-cp313-win_arm64.whl", hash = "sha256:c108a1d6fa78a50646029cb6d49808ff0fc1330fda87fa6f6250c6b5369b6645", size = 8012917, upload-time = "2025-12-10T22:55:56.268Z" }, - { url = "https://files.pythonhosted.org/packages/b5/27/51fe26e1062f298af5ef66343d8ef460e090a27fea73036c76c35821df04/matplotlib-3.10.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ad3d9833a64cf48cc4300f2b406c3d0f4f4724a91c0bd5640678a6ba7c102077", size = 8305679, upload-time = "2025-12-10T22:55:57.856Z" }, - { url = "https://files.pythonhosted.org/packages/2c/1e/4de865bc591ac8e3062e835f42dd7fe7a93168d519557837f0e37513f629/matplotlib-3.10.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:eb3823f11823deade26ce3b9f40dcb4a213da7a670013929f31d5f5ed1055b22", size = 8198336, upload-time = "2025-12-10T22:55:59.371Z" }, - { url = "https://files.pythonhosted.org/packages/c6/cb/2f7b6e75fb4dce87ef91f60cac4f6e34f4c145ab036a22318ec837971300/matplotlib-3.10.8-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d9050fee89a89ed57b4fb2c1bfac9a3d0c57a0d55aed95949eedbc42070fea39", size = 8731653, upload-time = "2025-12-10T22:56:01.032Z" }, - { url = "https://files.pythonhosted.org/packages/46/b3/bd9c57d6ba670a37ab31fb87ec3e8691b947134b201f881665b28cc039ff/matplotlib-3.10.8-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b44d07310e404ba95f8c25aa5536f154c0a8ec473303535949e52eb71d0a1565", size = 9561356, upload-time = "2025-12-10T22:56:02.95Z" }, - { url = "https://files.pythonhosted.org/packages/c0/3d/8b94a481456dfc9dfe6e39e93b5ab376e50998cddfd23f4ae3b431708f16/matplotlib-3.10.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0a33deb84c15ede243aead39f77e990469fff93ad1521163305095b77b72ce4a", size = 9614000, upload-time = "2025-12-10T22:56:05.411Z" }, - { url = "https://files.pythonhosted.org/packages/bd/cd/bc06149fe5585ba800b189a6a654a75f1f127e8aab02fd2be10df7fa500c/matplotlib-3.10.8-cp313-cp313t-win_amd64.whl", hash = "sha256:3a48a78d2786784cc2413e57397981fb45c79e968d99656706018d6e62e57958", size = 8220043, upload-time = "2025-12-10T22:56:07.551Z" }, - { url = "https://files.pythonhosted.org/packages/e3/de/b22cf255abec916562cc04eef457c13e58a1990048de0c0c3604d082355e/matplotlib-3.10.8-cp313-cp313t-win_arm64.whl", hash = "sha256:15d30132718972c2c074cd14638c7f4592bd98719e2308bccea40e0538bc0cb5", size = 8062075, upload-time = "2025-12-10T22:56:09.178Z" }, - { url = "https://files.pythonhosted.org/packages/3c/43/9c0ff7a2f11615e516c3b058e1e6e8f9614ddeca53faca06da267c48345d/matplotlib-3.10.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b53285e65d4fa4c86399979e956235deb900be5baa7fc1218ea67fbfaeaadd6f", size = 8262481, upload-time = "2025-12-10T22:56:10.885Z" }, - { url = "https://files.pythonhosted.org/packages/6f/ca/e8ae28649fcdf039fda5ef554b40a95f50592a3c47e6f7270c9561c12b07/matplotlib-3.10.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:32f8dce744be5569bebe789e46727946041199030db8aeb2954d26013a0eb26b", size = 8151473, upload-time = "2025-12-10T22:56:12.377Z" }, - { url = "https://files.pythonhosted.org/packages/f1/6f/009d129ae70b75e88cbe7e503a12a4c0670e08ed748a902c2568909e9eb5/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cf267add95b1c88300d96ca837833d4112756045364f5c734a2276038dae27d", size = 9553896, upload-time = "2025-12-10T22:56:14.432Z" }, - { url = "https://files.pythonhosted.org/packages/f5/26/4221a741eb97967bc1fd5e4c52b9aa5a91b2f4ec05b59f6def4d820f9df9/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2cf5bd12cecf46908f286d7838b2abc6c91cda506c0445b8223a7c19a00df008", size = 9824193, upload-time = "2025-12-10T22:56:16.29Z" }, - { url = "https://files.pythonhosted.org/packages/1f/f3/3abf75f38605772cf48a9daf5821cd4f563472f38b4b828c6fba6fa6d06e/matplotlib-3.10.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:41703cc95688f2516b480f7f339d8851a6035f18e100ee6a32bc0b8536a12a9c", size = 9615444, upload-time = "2025-12-10T22:56:18.155Z" }, - { url = "https://files.pythonhosted.org/packages/93/a5/de89ac80f10b8dc615807ee1133cd99ac74082581196d4d9590bea10690d/matplotlib-3.10.8-cp314-cp314-win_amd64.whl", hash = "sha256:83d282364ea9f3e52363da262ce32a09dfe241e4080dcedda3c0db059d3c1f11", size = 8272719, upload-time = "2025-12-10T22:56:20.366Z" }, - { url = "https://files.pythonhosted.org/packages/69/ce/b006495c19ccc0a137b48083168a37bd056392dee02f87dba0472f2797fe/matplotlib-3.10.8-cp314-cp314-win_arm64.whl", hash = "sha256:2c1998e92cd5999e295a731bcb2911c75f597d937341f3030cc24ef2733d78a8", size = 8144205, upload-time = "2025-12-10T22:56:22.239Z" }, - { url = "https://files.pythonhosted.org/packages/68/d9/b31116a3a855bd313c6fcdb7226926d59b041f26061c6c5b1be66a08c826/matplotlib-3.10.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b5a2b97dbdc7d4f353ebf343744f1d1f1cca8aa8bfddb4262fcf4306c3761d50", size = 8305785, upload-time = "2025-12-10T22:56:24.218Z" }, - { url = "https://files.pythonhosted.org/packages/1e/90/6effe8103f0272685767ba5f094f453784057072f49b393e3ea178fe70a5/matplotlib-3.10.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3f5c3e4da343bba819f0234186b9004faba952cc420fbc522dc4e103c1985908", size = 8198361, upload-time = "2025-12-10T22:56:26.787Z" }, - { url = "https://files.pythonhosted.org/packages/d7/65/a73188711bea603615fc0baecca1061429ac16940e2385433cc778a9d8e7/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f62550b9a30afde8c1c3ae450e5eb547d579dd69b25c2fc7a1c67f934c1717a", size = 9561357, upload-time = "2025-12-10T22:56:28.953Z" }, - { url = "https://files.pythonhosted.org/packages/f4/3d/b5c5d5d5be8ce63292567f0e2c43dde9953d3ed86ac2de0a72e93c8f07a1/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:495672de149445ec1b772ff2c9ede9b769e3cb4f0d0aa7fa730d7f59e2d4e1c1", size = 9823610, upload-time = "2025-12-10T22:56:31.455Z" }, - { url = "https://files.pythonhosted.org/packages/4d/4b/e7beb6bbd49f6bae727a12b270a2654d13c397576d25bd6786e47033300f/matplotlib-3.10.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:595ba4d8fe983b88f0eec8c26a241e16d6376fe1979086232f481f8f3f67494c", size = 9614011, upload-time = "2025-12-10T22:56:33.85Z" }, - { url = "https://files.pythonhosted.org/packages/7c/e6/76f2813d31f032e65f6f797e3f2f6e4aab95b65015924b1c51370395c28a/matplotlib-3.10.8-cp314-cp314t-win_amd64.whl", hash = "sha256:25d380fe8b1dc32cf8f0b1b448470a77afb195438bafdf1d858bfb876f3edf7b", size = 8362801, upload-time = "2025-12-10T22:56:36.107Z" }, - { url = "https://files.pythonhosted.org/packages/5d/49/d651878698a0b67f23aa28e17f45a6d6dd3d3f933fa29087fa4ce5947b5a/matplotlib-3.10.8-cp314-cp314t-win_arm64.whl", hash = "sha256:113bb52413ea508ce954a02c10ffd0d565f9c3bc7f2eddc27dfe1731e71c7b5f", size = 8192560, upload-time = "2025-12-10T22:56:38.008Z" }, - { url = "https://files.pythonhosted.org/packages/f5/43/31d59500bb950b0d188e149a2e552040528c13d6e3d6e84d0cccac593dcd/matplotlib-3.10.8-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f97aeb209c3d2511443f8797e3e5a569aebb040d4f8bc79aa3ee78a8fb9e3dd8", size = 8237252, upload-time = "2025-12-10T22:56:39.529Z" }, - { url = "https://files.pythonhosted.org/packages/0c/2c/615c09984f3c5f907f51c886538ad785cf72e0e11a3225de2c0f9442aecc/matplotlib-3.10.8-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fb061f596dad3a0f52b60dc6a5dec4a0c300dec41e058a7efe09256188d170b7", size = 8124693, upload-time = "2025-12-10T22:56:41.758Z" }, - { url = "https://files.pythonhosted.org/packages/91/e1/2757277a1c56041e1fc104b51a0f7b9a4afc8eb737865d63cababe30bc61/matplotlib-3.10.8-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:12d90df9183093fcd479f4172ac26b322b1248b15729cb57f42f71f24c7e37a3", size = 8702205, upload-time = "2025-12-10T22:56:43.415Z" }, - { url = "https://files.pythonhosted.org/packages/04/30/3afaa31c757f34b7725ab9d2ba8b48b5e89c2019c003e7d0ead143aabc5a/matplotlib-3.10.8-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6da7c2ce169267d0d066adcf63758f0604aa6c3eebf67458930f9d9b79ad1db1", size = 8249198, upload-time = "2025-12-10T22:56:45.584Z" }, - { url = "https://files.pythonhosted.org/packages/48/2f/6334aec331f57485a642a7c8be03cb286f29111ae71c46c38b363230063c/matplotlib-3.10.8-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9153c3292705be9f9c64498a8872118540c3f4123d1a1c840172edf262c8be4a", size = 8136817, upload-time = "2025-12-10T22:56:47.339Z" }, - { url = "https://files.pythonhosted.org/packages/73/e4/6d6f14b2a759c622f191b2d67e9075a3f56aaccb3be4bb9bb6890030d0a0/matplotlib-3.10.8-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ae029229a57cd1e8fe542485f27e7ca7b23aa9e8944ddb4985d0bc444f1eca2", size = 8713867, upload-time = "2025-12-10T22:56:48.954Z" }, -] - [[package]] name = "matplotlib-inline" version = "0.2.1" From cd39941dfc6ac4d75cd93bb68d10d7ba6da18b45 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:28:42 -0700 Subject: [PATCH 46/64] more tests for hf image folder upload --- .../integrations/huggingface/test_client.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/packages/data-designer/tests/integrations/huggingface/test_client.py b/packages/data-designer/tests/integrations/huggingface/test_client.py index 735ea3bc..ba11a485 100644 --- a/packages/data-designer/tests/integrations/huggingface/test_client.py +++ b/packages/data-designer/tests/integrations/huggingface/test_client.py @@ -462,6 +462,79 @@ def test_validate_dataset_path_invalid_builder_config_json(tmp_path: Path) -> No client.upload_dataset("test/dataset", base_path, "Test") +def test_upload_dataset_uploads_images_folder( + mock_hf_api: MagicMock, mock_dataset_card: MagicMock, sample_dataset_path: Path +) -> None: + """Test that upload_dataset uploads images when images folder exists with subfolders.""" + # Create images directory with column subfolders (matches MediaStorage structure) + images_dir = sample_dataset_path / "images" + col_dir = images_dir / "my_image_column" + col_dir.mkdir(parents=True) + (col_dir / "uuid1.png").write_bytes(b"fake png data") + (col_dir / "uuid2.png").write_bytes(b"fake png data") + + client = HuggingFaceHubClient(token="test-token") + client.upload_dataset(repo_id="test/dataset", base_dataset_path=sample_dataset_path, description="Test dataset") + + # Check that upload_folder was called for images + image_calls = [call for call in mock_hf_api.upload_folder.call_args_list if call.kwargs["path_in_repo"] == "images"] + assert len(image_calls) == 1 + assert image_calls[0].kwargs["folder_path"] == str(images_dir) + assert image_calls[0].kwargs["repo_type"] == "dataset" + + +def test_upload_dataset_skips_images_when_folder_missing( + mock_hf_api: MagicMock, mock_dataset_card: MagicMock, sample_dataset_path: Path +) -> None: + """Test that upload_dataset skips images upload when images folder doesn't exist.""" + # sample_dataset_path has no images/ directory by default + client = HuggingFaceHubClient(token="test-token") + client.upload_dataset(repo_id="test/dataset", base_dataset_path=sample_dataset_path, description="Test dataset") + + # No upload_folder call should target "images" + image_calls = [call for call in mock_hf_api.upload_folder.call_args_list if call.kwargs["path_in_repo"] == "images"] + assert len(image_calls) == 0 + + +def test_upload_dataset_skips_images_when_folder_empty( + mock_hf_api: MagicMock, mock_dataset_card: MagicMock, sample_dataset_path: Path +) -> None: + """Test that upload_dataset skips images upload when images folder exists but is empty.""" + images_dir = sample_dataset_path / "images" + images_dir.mkdir() + + client = HuggingFaceHubClient(token="test-token") + client.upload_dataset(repo_id="test/dataset", base_dataset_path=sample_dataset_path, description="Test dataset") + + image_calls = [call for call in mock_hf_api.upload_folder.call_args_list if call.kwargs["path_in_repo"] == "images"] + assert len(image_calls) == 0 + + +def test_upload_dataset_images_upload_failure( + mock_hf_api: MagicMock, mock_dataset_card: MagicMock, sample_dataset_path: Path +) -> None: + """Test that upload_dataset raises error when images upload fails.""" + # Create images directory with a file + images_dir = sample_dataset_path / "images" + col_dir = images_dir / "col" + col_dir.mkdir(parents=True) + (col_dir / "img.png").write_bytes(b"fake") + + # Make upload_folder fail only for images + original_upload_folder = mock_hf_api.upload_folder + + def failing_upload_folder(**kwargs): + if kwargs.get("path_in_repo") == "images": + raise Exception("Network error") + return original_upload_folder(**kwargs) + + mock_hf_api.upload_folder.side_effect = failing_upload_folder + + client = HuggingFaceHubClient(token="test-token") + with pytest.raises(HuggingFaceHubClientUploadError, match="Failed to upload images"): + client.upload_dataset(repo_id="test/dataset", base_dataset_path=sample_dataset_path, description="Test dataset") + + def test_upload_dataset_invalid_repo_id(mock_hf_api: MagicMock, sample_dataset_path: Path) -> None: """Test upload_dataset fails with invalid repo_id.""" client = HuggingFaceHubClient(token="test-token") From 52e023d8e40fcd2591410fa09c0d38a9fd41885c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:40:00 -0700 Subject: [PATCH 47/64] Fix test --- .../tests/integrations/huggingface/test_client.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/data-designer/tests/integrations/huggingface/test_client.py b/packages/data-designer/tests/integrations/huggingface/test_client.py index ba11a485..924a6bfe 100644 --- a/packages/data-designer/tests/integrations/huggingface/test_client.py +++ b/packages/data-designer/tests/integrations/huggingface/test_client.py @@ -521,12 +521,9 @@ def test_upload_dataset_images_upload_failure( (col_dir / "img.png").write_bytes(b"fake") # Make upload_folder fail only for images - original_upload_folder = mock_hf_api.upload_folder - def failing_upload_folder(**kwargs): if kwargs.get("path_in_repo") == "images": raise Exception("Network error") - return original_upload_folder(**kwargs) mock_hf_api.upload_folder.side_effect = failing_upload_folder From c53a1dce4eeeb86f29d8d9f0017d3cf6ee7081aa Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:45:17 -0700 Subject: [PATCH 48/64] set init=False for media_storage --- .../data_designer/engine/dataset_builders/artifact_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py index a7316be3..7d2c6e72 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py @@ -47,7 +47,7 @@ class ArtifactStorage(BaseModel): partial_results_folder_name: str = "tmp-partial-parquet-files" dropped_columns_folder_name: str = "dropped-columns-parquet-files" processors_outputs_folder_name: str = PROCESSORS_OUTPUTS_FOLDER_NAME - media_storage: MediaStorage = Field(default=None, exclude=True) + media_storage: MediaStorage = Field(default=None, init=False, exclude=True) @property def artifact_path_exists(self) -> bool: From 8f813b1318f8431a70784b35bcc60b03f417da0f Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Mon, 9 Feb 2026 20:58:15 -0700 Subject: [PATCH 49/64] handle image url in _display_image_if_in_notebook --- .../config/utils/visualization.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index 2132b83b..bd2876d4 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -51,7 +51,7 @@ def _display_image_if_in_notebook(image_data: str, col_name: str) -> bool: """Display image with caption in Jupyter notebook if available. Args: - image_data: Base64-encoded image data, data URI, or file path. + image_data: Base64-encoded image data, data URI, file path, or URL. col_name: Name of the column (used for caption). Returns: @@ -63,7 +63,22 @@ def _display_image_if_in_notebook(image_data: str, col_name: str) -> bool: get_ipython() # This will raise NameError if not in IPython/Jupyter - # Check if it's a file path and load it + # Escape column name to prevent HTML injection + escaped_col_name = html.escape(col_name) + + # URLs: render directly as + if is_image_url(image_data): + escaped_url = html.escape(image_data) + html_content = f""" +
+
πŸ–ΌοΈ {escaped_col_name}
+ +
+ """ + display(HTML(html_content)) + return True + + # File paths: load from disk and convert to base64 if is_image_path(image_data) and not image_data.startswith("data:image/"): loaded_base64 = load_image_path_to_base64(image_data) if loaded_base64 is None: @@ -76,13 +91,7 @@ def _display_image_if_in_notebook(image_data: str, col_name: str) -> bool: base64_data = image_data # Extract base64 from data URI if present - base64_data = extract_base64_from_data_uri(base64_data) - - # Use the base64 data directly without resizing - img_base64 = base64_data - - # Escape column name to prevent HTML injection - escaped_col_name = html.escape(col_name) + img_base64 = extract_base64_from_data_uri(base64_data) # Create HTML with caption and image in left-aligned container html_content = f""" From 782a346a0c974050764584a4649f5d6b865c14b5 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 11:26:18 -0700 Subject: [PATCH 50/64] Fix path traversal vulnerability in MediaStorage subfolder handling Sanitize subfolder names before using them in filesystem paths to prevent path traversal attacks via column names containing special characters like '../', '/', or '\'. Co-Authored-By: Claude Sonnet 4.5 --- .../engine/storage/media_storage.py | 12 +++++++-- .../engine/storage/test_media_storage.py | 25 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py index 3726b7f7..81387525 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py @@ -61,6 +61,11 @@ def _ensure_images_directory(self) -> None: """Create images directory if it doesn't exist (lazy initialization).""" self.images_dir.mkdir(parents=True, exist_ok=True) + def _sanitize_subfolder_name(self, name: str) -> str: + """Sanitize subfolder name to prevent path traversal and filesystem issues.""" + # Replace path separators and parent directory references with underscores + return name.replace("/", "_").replace("\\", "_").replace("..", "_") + def save_base64_image(self, base64_data: str, subfolder_name: str) -> str: """Save or return base64 image based on storage mode. @@ -81,8 +86,11 @@ def save_base64_image(self, base64_data: str, subfolder_name: str) -> str: return base64_data # DISK mode: save to disk, validate, and return relative path + # Sanitize subfolder name to prevent path traversal + sanitized_subfolder = self._sanitize_subfolder_name(subfolder_name) + # Determine the target directory (organized by subfolder) - target_dir = self.images_dir / subfolder_name + target_dir = self.images_dir / sanitized_subfolder # Ensure target directory exists (lazy initialization) target_dir.mkdir(parents=True, exist_ok=True) @@ -99,7 +107,7 @@ def save_base64_image(self, base64_data: str, subfolder_name: str) -> str: full_path = target_dir / filename # Build relative path - relative_path = f"{self.images_subdir}/{subfolder_name}/{filename}" + relative_path = f"{self.images_subdir}/{sanitized_subfolder}/{filename}" # Write to disk with open(full_path, "wb") as f: diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py index 3648486d..f908a4c2 100644 --- a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -214,3 +214,28 @@ def test_save_base64_image_dataframe_mode_with_subfolder_name(tmp_path, sample_b # Directory should not be created in DATAFRAME mode assert not storage.images_dir.exists() + + +@pytest.mark.parametrize( + "unsafe_name,expected_sanitized", + [ + ("../evil", "__evil"), # Parent directory traversal: .. -> _, / -> _ + ("foo/bar", "foo_bar"), # Path separator (forward slash) + ("foo\\bar", "foo_bar"), # Path separator (backslash) + ("test..name", "test_name"), # Double dots in middle: .. -> _ + ], +) +def test_save_base64_image_sanitizes_subfolder_name(media_storage, sample_base64_png, unsafe_name, expected_sanitized): + """Test that subfolder names are sanitized to prevent path traversal.""" + relative_path = media_storage.save_base64_image(sample_base64_png, subfolder_name=unsafe_name) + + # Check that path contains sanitized subfolder name + assert expected_sanitized in relative_path + assert "/" not in expected_sanitized # No path separators + assert "\\" not in expected_sanitized # No backslashes + assert ".." not in expected_sanitized # No parent references + + # Verify file is inside images directory (not escaped via path traversal) + full_path = media_storage.base_path / relative_path + assert full_path.exists() + assert media_storage.images_dir in full_path.parents From 2d7a2023530216bdabda0966eda107b6be3a1d16 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 11:31:18 -0700 Subject: [PATCH 51/64] Fix PIL format detection in detect_image_format Compare against enum values instead of enum members in PIL fallback path. This fixes JPEG/WEBP/GIF detection when magic bytes don't match. --- .../src/data_designer/config/utils/image_helpers.py | 4 ++-- .../tests/config/utils/test_image_helpers.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 678d3b80..c20c81ea 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -114,8 +114,8 @@ def detect_image_format(image_bytes: bytes) -> ImageFormat: try: img = Image.open(io.BytesIO(image_bytes)) format_str = img.format.lower() if img.format else None - if format_str in [ImageFormat.PNG, ImageFormat.JPG, ImageFormat.JPEG, ImageFormat.WEBP]: - return ImageFormat(format_str if format_str != ImageFormat.JPEG else ImageFormat.JPG) + if format_str in [fmt.value for fmt in ImageFormat]: + return ImageFormat(format_str if format_str != ImageFormat.JPEG.value else ImageFormat.JPG.value) except Exception: pass diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index aa1ca451..f24696e4 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -224,9 +224,9 @@ def test_detect_image_format_with_pil_fallback_unsupported_format(tmp_path): img.save(gif_path, format="GIF") gif_bytes = gif_path.read_bytes() - # Should use PIL fallback and default to PNG (GIF not in ImageFormat enum) + # Should use PIL fallback and correctly detect GIF format result = detect_image_format(gif_bytes) - assert result == ImageFormat.PNG + assert result == ImageFormat.GIF def test_detect_image_format_with_pil_fallback_jpeg(): From 5fca3a664d335a24546fc321d90068e2e449262c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 12:21:53 -0700 Subject: [PATCH 52/64] Fix Pydantic v2 compatibility in ArtifactStorage Replace Field(init=False) with PrivateAttr for media_storage attribute. Pydantic v2 does not support init kwarg in Field(). Use PrivateAttr for non-serialized attributes with property accessors for backward compatibility. --- .../dataset_builders/artifact_storage.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py index 7d2c6e72..43b817b0 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py @@ -11,7 +11,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator +from pydantic import BaseModel, ConfigDict, PrivateAttr, field_validator, model_validator from data_designer.config.utils.io_helpers import read_parquet_dataset from data_designer.config.utils.type_helpers import StrEnum, resolve_string_enum @@ -47,7 +47,17 @@ class ArtifactStorage(BaseModel): partial_results_folder_name: str = "tmp-partial-parquet-files" dropped_columns_folder_name: str = "dropped-columns-parquet-files" processors_outputs_folder_name: str = PROCESSORS_OUTPUTS_FOLDER_NAME - media_storage: MediaStorage = Field(default=None, init=False, exclude=True) + _media_storage: MediaStorage = PrivateAttr(default=None) + + @property + def media_storage(self) -> MediaStorage: + """Access media storage instance.""" + return self._media_storage + + @media_storage.setter + def media_storage(self, value: MediaStorage) -> None: + """Set media storage instance.""" + self._media_storage = value @property def artifact_path_exists(self) -> bool: @@ -119,7 +129,7 @@ def validate_folder_names(self): raise ArtifactStorageError(f"πŸ›‘ Directory name '{name}' contains invalid characters.") # Initialize media storage with DISK mode by default - self.media_storage = MediaStorage( + self._media_storage = MediaStorage( base_path=self.base_dataset_path, mode=StorageMode.DISK, ) @@ -132,7 +142,7 @@ def set_media_storage_mode(self, mode: StorageMode) -> None: Args: mode: StorageMode.DISK (save to disk) or StorageMode.DATAFRAME (store in memory) """ - self.media_storage.mode = mode + self._media_storage.mode = mode @staticmethod def mkdir_if_needed(path: Path | str) -> Path: From cf2b3648f61d0367182c02a3cf355e08be1b8c9d Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 15:12:36 -0700 Subject: [PATCH 53/64] Address PR review comments - Reduce num_records to 2 for image generation in tutorial notebook - Add tests for different image response formats (dict and plain string) - Parametrize PNG/JPG media storage tests for better maintainability --- docs/notebook_source/5-generating-images.py | 2 +- .../tests/engine/models/test_facade.py | 66 ++++++++++++++++++- .../engine/storage/test_media_storage.py | 33 +++++----- 3 files changed, 81 insertions(+), 20 deletions(-) diff --git a/docs/notebook_source/5-generating-images.py b/docs/notebook_source/5-generating-images.py index 28638ff9..b445b950 100644 --- a/docs/notebook_source/5-generating-images.py +++ b/docs/notebook_source/5-generating-images.py @@ -270,7 +270,7 @@ # # %% -results = data_designer.create(config_builder, num_records=5, dataset_name="tutorial-5-images") +results = data_designer.create(config_builder, num_records=2, dataset_name="tutorial-5-images") # %% dataset = results.load_dataset() diff --git a/packages/data-designer-engine/tests/engine/models/test_facade.py b/packages/data-designer-engine/tests/engine/models/test_facade.py index 65c66896..1f220f4f 100644 --- a/packages/data-designer-engine/tests/engine/models/test_facade.py +++ b/packages/data-designer-engine/tests/engine/models/test_facade.py @@ -4,7 +4,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -1077,6 +1077,70 @@ def test_generate_image_chat_completion_tracks_image_usage( assert stub_model_facade.usage_stats.image_usage.has_usage is True +@patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True) +def test_generate_image_chat_completion_with_dict_format( + mock_completion: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image handles images as dicts with image_url string.""" + # Create mock message with images as dict with string image_url + mock_message = MagicMock() + mock_message.role = "assistant" + mock_message.content = "" + mock_message.images = [ + {"image_url": "data:image/png;base64,image1"}, + {"image_url": "data:image/jpeg;base64,image2"}, + ] + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + + mock_completion.return_value = mock_response + + # Generate images + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=False): + images = stub_model_facade.generate_image(prompt="test prompt") + + # Verify results + assert len(images) == 2 + assert images == ["image1", "image2"] + + +@patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True) +def test_generate_image_chat_completion_with_plain_strings( + mock_completion: Any, + stub_model_facade: ModelFacade, +) -> None: + """Test that generate_image handles images as plain strings.""" + # Create mock message with images as plain strings + mock_message = MagicMock() + mock_message.role = "assistant" + mock_message.content = "" + mock_message.images = [ + "data:image/png;base64,image1", + "image2", # Plain base64 without data URI prefix + ] + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + + mock_completion.return_value = mock_response + + # Generate images + with patch("data_designer.engine.models.facade.is_image_diffusion_model", return_value=False): + images = stub_model_facade.generate_image(prompt="test prompt") + + # Verify results + assert len(images) == 2 + assert images == ["image1", "image2"] + + @patch("data_designer.engine.models.facade.CustomRouter.image_generation", autospec=True) def test_generate_image_skip_usage_tracking( mock_image_generation: Any, diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py index f908a4c2..9d74734a 100644 --- a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -60,13 +60,23 @@ def test_media_storage_init_custom_subdir(tmp_path): assert not storage.images_dir.exists() -def test_save_base64_image_png(media_storage, sample_base64_png): - """Test saving a PNG image from base64.""" - relative_path = media_storage.save_base64_image(sample_base64_png, subfolder_name="test_column") +@pytest.mark.parametrize( + "image_fixture,expected_extension", + [ + ("sample_base64_png", ".png"), + ("sample_base64_jpg", ".jpg"), + ], +) +def test_save_base64_image_format(media_storage, image_fixture, expected_extension, request): + """Test saving images from base64 in different formats.""" + # Get the actual fixture value using request.getfixturevalue + sample_base64 = request.getfixturevalue(image_fixture) + + relative_path = media_storage.save_base64_image(sample_base64, subfolder_name="test_column") # Check return value format (organized by column name) assert relative_path.startswith(f"{IMAGES_SUBDIR}/test_column/") - assert relative_path.endswith(".png") + assert relative_path.endswith(expected_extension) # Check file exists on disk full_path = media_storage.base_path / relative_path @@ -74,23 +84,10 @@ def test_save_base64_image_png(media_storage, sample_base64_png): # Verify file content saved_bytes = full_path.read_bytes() - expected_bytes = base64.b64decode(sample_base64_png) + expected_bytes = base64.b64decode(sample_base64) assert saved_bytes == expected_bytes -def test_save_base64_image_jpg(media_storage, sample_base64_jpg): - """Test saving a JPEG image from base64.""" - relative_path = media_storage.save_base64_image(sample_base64_jpg, subfolder_name="test_column") - - # Check return value format (organized by column name) - assert relative_path.startswith(f"{IMAGES_SUBDIR}/test_column/") - assert relative_path.endswith(".jpg") - - # Check file exists on disk - full_path = media_storage.base_path / relative_path - assert full_path.exists() - - def test_save_base64_image_with_data_uri(media_storage, sample_base64_png): """Test saving image from data URI format.""" data_uri = f"data:image/png;base64,{sample_base64_png}" From 5aa7e109faab4889e0fb5b1fa6121033caefd565 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 15:26:17 -0700 Subject: [PATCH 54/64] Use regex for base64 character validation in is_base64_image --- .../src/data_designer/config/utils/image_helpers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index c20c81ea..69ee5310 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -7,6 +7,7 @@ import base64 import io +import re from pathlib import Path from typing import TYPE_CHECKING @@ -23,6 +24,8 @@ # WEBP uses RIFF header - handled separately } +_BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/=]+$") + # Patterns for diffusion-based image models only (use image_generation API). IMAGE_DIFFUSION_MODEL_PATTERNS = ( "dall-e", @@ -152,9 +155,7 @@ def is_base64_image(value: str) -> bool: if value.startswith("data:image/"): return True # Check if it looks like base64 (at least 100 chars, contains only base64 chars) - if len(value) > 100 and all( - c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in value[:100] - ): + if len(value) > 100 and _BASE64_PATTERN.match(value[:100]): try: # Try to decode a small portion to verify it's valid base64 base64.b64decode(value[:100]) From ecaeb727b239427a63c0aaaf178145fbf321bf7c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 15:38:46 -0700 Subject: [PATCH 55/64] move to a constant --- .../data_designer/config/utils/image_helpers.py | 17 ++++------------- .../tests/config/utils/test_image_helpers.py | 10 ---------- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 69ee5310..0fb949af 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -36,6 +36,8 @@ "imagen", ) +SUPPORTED_IMAGE_EXTENSIONS = [f".{fmt.value.lower()}" for fmt in ImageFormat] + def is_image_diffusion_model(model_name: str) -> bool: """Return True if the model is a diffusion-based image generation model. @@ -137,7 +139,7 @@ def is_image_path(value: str) -> bool: """ if not isinstance(value, str): return False - return any(value.lower().endswith(ext) for ext in get_supported_image_extensions()) + return any(value.lower().endswith(ext) for ext in SUPPORTED_IMAGE_EXTENSIONS) def is_base64_image(value: str) -> bool: @@ -176,9 +178,7 @@ def is_image_url(value: str) -> bool: """ if not isinstance(value, str): return False - return value.startswith(("http://", "https://")) and any( - ext in value.lower() for ext in get_supported_image_extensions() - ) + return value.startswith(("http://", "https://")) and any(ext in value.lower() for ext in SUPPORTED_IMAGE_EXTENSIONS) def load_image_path_to_base64(image_path: str, base_path: str | None = None) -> str | None: @@ -228,12 +228,3 @@ def validate_image(image_path: Path) -> None: img.verify() except Exception as e: raise ValueError(f"Image validation failed: {e}") from e - - -def get_supported_image_extensions() -> list[str]: - """Get list of supported image extensions from ImageFormat enum. - - Returns: - List of extensions with leading dot (e.g., [".png", ".jpg", ...]) - """ - return [f".{fmt.value}" for fmt in ImageFormat] diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index f24696e4..08ea3b50 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -14,7 +14,6 @@ decode_base64_image, detect_image_format, extract_base64_from_data_uri, - get_supported_image_extensions, is_base64_image, is_image_diffusion_model, is_image_path, @@ -204,15 +203,6 @@ def test_validate_image_nonexistent_raises_error(tmp_path): validate_image(image_path) -# Tests for get_supported_image_extensions - - -def test_get_supported_image_extensions_matches_enum(): - result = get_supported_image_extensions() - enum_values = [f".{fmt.value}" for fmt in ImageFormat] - assert set(result) == set(enum_values) - - # Additional tests for uncovered lines From 622b1c4f75013d184fefb46da95e2bf5a69a285c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 15:43:21 -0700 Subject: [PATCH 56/64] fix pyproject.toml --- pyproject.toml | 6 +++--- uv.lock | 35 +++++++++++++++++++++++------------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 35566648..99d7b78c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,11 +39,11 @@ dev-dependencies = [ [dependency-groups] dev = [ "jsonpath-ng>=1.5.3,<2", - "pytest>=8.3.3,<9", - "pytest-asyncio>=0.24.0,<1", + "pytest>=9.0.2,<10", + "pytest-asyncio>=1.3.0,<2", "pytest-cov>=7.0.0,<8", "pytest-env>=1.2.0,<2", - "pytest-httpx>=0.35.0,<1", + "pytest-httpx>=0.36.0,<1", "pre-commit>=4.0.0,<5", ] docs = [ diff --git a/uv.lock b/uv.lock index 17306f0e..200d0b12 100644 --- a/uv.lock +++ b/uv.lock @@ -308,6 +308,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" }, ] +[[package]] +name = "backports-asyncio-runner" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, +] + [[package]] name = "backrefs" version = "6.1" @@ -905,11 +914,11 @@ recipes = [ dev = [ { name = "jsonpath-ng", specifier = ">=1.5.3,<2" }, { name = "pre-commit", specifier = ">=4.0.0,<5" }, - { name = "pytest", specifier = ">=8.3.3,<9" }, - { name = "pytest-asyncio", specifier = ">=0.24.0,<1" }, + { name = "pytest", specifier = ">=9.0.2,<10" }, + { name = "pytest-asyncio", specifier = ">=1.3.0,<2" }, { name = "pytest-cov", specifier = ">=7.0.0,<8" }, { name = "pytest-env", specifier = ">=1.2.0,<2" }, - { name = "pytest-httpx", specifier = ">=0.35.0,<1" }, + { name = "pytest-httpx", specifier = ">=0.36.0,<1" }, { name = "ruff", specifier = ">=0.14.10,<1" }, ] docs = [ @@ -3986,7 +3995,7 @@ wheels = [ [[package]] name = "pytest" -version = "8.4.2" +version = "9.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -3997,21 +4006,23 @@ dependencies = [ { name = "pygments" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] [[package]] name = "pytest-asyncio" -version = "0.26.0" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" }, { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/c4/453c52c659521066969523e87d85d54139bbd17b78f09532fb8eb8cdb58e/pytest_asyncio-0.26.0.tar.gz", hash = "sha256:c4df2a697648241ff39e7f0e4a73050b03f123f760673956cf0d72a4990e312f", size = 54156, upload-time = "2025-03-25T06:22:28.883Z" } +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694, upload-time = "2025-03-25T06:22:27.807Z" }, + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, ] [[package]] @@ -4043,15 +4054,15 @@ wheels = [ [[package]] name = "pytest-httpx" -version = "0.35.0" +version = "0.36.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1f/89/5b12b7b29e3d0af3a4b9c071ee92fa25a9017453731a38f08ba01c280f4c/pytest_httpx-0.35.0.tar.gz", hash = "sha256:d619ad5d2e67734abfbb224c3d9025d64795d4b8711116b1a13f72a251ae511f", size = 54146, upload-time = "2024-11-28T19:16:54.237Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/5574834da9499066fa1a5ea9c336f94dba2eae02298d36dab192fcf95c86/pytest_httpx-0.36.0.tar.gz", hash = "sha256:9edb66a5fd4388ce3c343189bc67e7e1cb50b07c2e3fc83b97d511975e8a831b", size = 56793, upload-time = "2025-12-02T16:34:57.414Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/ed/026d467c1853dd83102411a78126b4842618e86c895f93528b0528c7a620/pytest_httpx-0.35.0-py3-none-any.whl", hash = "sha256:ee11a00ffcea94a5cbff47af2114d34c5b231c326902458deed73f9c459fd744", size = 19442, upload-time = "2024-11-28T19:16:52.787Z" }, + { url = "https://files.pythonhosted.org/packages/e2/d2/1eb1ea9c84f0d2033eb0b49675afdc71aa4ea801b74615f00f3c33b725e3/pytest_httpx-0.36.0-py3-none-any.whl", hash = "sha256:bd4c120bb80e142df856e825ec9f17981effb84d159f9fa29ed97e2357c3a9c8", size = 20229, upload-time = "2025-12-02T16:34:56.45Z" }, ] [[package]] From 400e97b55a555c6fc788245c38862599a29b3189 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 15:50:57 -0700 Subject: [PATCH 57/64] regen colab notebooks --- docs/colab_notebooks/1-the-basics.ipynb | 62 ++++++++--------- ...ctured-outputs-and-jinja-expressions.ipynb | 58 ++++++++-------- .../3-seeding-with-a-dataset.ipynb | 54 +++++++-------- .../4-providing-images-as-context.ipynb | 66 +++++++++---------- .../colab_notebooks/5-generating-images.ipynb | 44 ++++++------- 5 files changed, 142 insertions(+), 142 deletions(-) diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb index ed8942df..f50209f7 100644 --- a/docs/colab_notebooks/1-the-basics.ipynb +++ b/docs/colab_notebooks/1-the-basics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "945eebf8", + "id": "96178d08", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: The Basics\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "8e8f2e22", + "id": "1d02a1d6", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -26,7 +26,7 @@ }, { "cell_type": "markdown", - "id": "92d91bf1", + "id": "2292d817", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -37,7 +37,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0b9b4427", + "id": "8af621fc", "metadata": {}, "outputs": [], "source": [ @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8878d172", + "id": "70e6a11c", "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c92bfb3", + "id": "41031828", "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "markdown", - "id": "4e39eed1", + "id": "0b480b10", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -89,7 +89,7 @@ { "cell_type": "code", "execution_count": null, - "id": "70c96cfb", + "id": "d434a8e2", "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ }, { "cell_type": "markdown", - "id": "99d975c9", + "id": "f88f6792", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -115,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "851228c8", + "id": "4261574c", "metadata": {}, "outputs": [], "source": [ @@ -145,7 +145,7 @@ }, { "cell_type": "markdown", - "id": "fefb639d", + "id": "bbbc3d58", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -160,7 +160,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0ba52672", + "id": "92c0cf35", "metadata": {}, "outputs": [], "source": [ @@ -169,7 +169,7 @@ }, { "cell_type": "markdown", - "id": "7cc2aefc", + "id": "44246c7d", "metadata": {}, "source": [ "## 🎲 Getting started with sampler columns\n", @@ -186,7 +186,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a5a34b1a", + "id": "07d20f3f", "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "markdown", - "id": "ee4d1b6a", + "id": "9d3c87b0", "metadata": {}, "source": [ "Let's start designing our product review dataset by adding product category and subcategory columns.\n" @@ -204,7 +204,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7782d790", + "id": "c646b021", "metadata": {}, "outputs": [], "source": [ @@ -285,7 +285,7 @@ }, { "cell_type": "markdown", - "id": "f88e8b18", + "id": "ff18b032", "metadata": {}, "source": [ "Next, let's add samplers to generate data related to the customer and their review.\n" @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19174a73", + "id": "78846d99", "metadata": {}, "outputs": [], "source": [ @@ -331,7 +331,7 @@ }, { "cell_type": "markdown", - "id": "01438115", + "id": "97059bfc", "metadata": {}, "source": [ "## 🦜 LLM-generated columns\n", @@ -346,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9c8f1275", + "id": "98c66eff", "metadata": {}, "outputs": [], "source": [ @@ -382,7 +382,7 @@ }, { "cell_type": "markdown", - "id": "f61e3771", + "id": "ff2d52b9", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -399,7 +399,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7f8dc56e", + "id": "6e622478", "metadata": {}, "outputs": [], "source": [ @@ -409,7 +409,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5b66172a", + "id": "1addc7d8", "metadata": {}, "outputs": [], "source": [ @@ -420,7 +420,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b0eaa931", + "id": "7af4b9c3", "metadata": {}, "outputs": [], "source": [ @@ -430,7 +430,7 @@ }, { "cell_type": "markdown", - "id": "122d099d", + "id": "91d0ee89", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -443,7 +443,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f40f7ba0", + "id": "e1e3aed0", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +453,7 @@ }, { "cell_type": "markdown", - "id": "597c41ec", + "id": "6eaa402e", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -466,7 +466,7 @@ { "cell_type": "code", "execution_count": null, - "id": "acf8caa3", + "id": "f6b148d4", "metadata": {}, "outputs": [], "source": [ @@ -476,7 +476,7 @@ { "cell_type": "code", "execution_count": null, - "id": "697e9090", + "id": "f4e62e5b", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +489,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18f34e66", + "id": "7d426ab0", "metadata": {}, "outputs": [], "source": [ @@ -501,7 +501,7 @@ }, { "cell_type": "markdown", - "id": "4c498f62", + "id": "449d003c", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb index 49be6edb..a6e04680 100644 --- a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "bd333de9", + "id": "ba22504d", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "28fb2ee3", + "id": "c176fe63", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "fbeb3b2d", + "id": "32c80f72", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -39,7 +39,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6ef3d2ae", + "id": "4ab45e3a", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ { "cell_type": "code", "execution_count": null, - "id": "07546806", + "id": "2ae70d67", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "81b00725", + "id": "2cdc070b", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "markdown", - "id": "a5cf694f", + "id": "a04261b9", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8320e2b0", + "id": "c8bef18a", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "348e2c5a", + "id": "ed555636", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21019fc5", + "id": "47208094", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "7bf9d9af", + "id": "36c200d9", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -162,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88abb685", + "id": "57c0d82f", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "d8e790c6", + "id": "01ff63ca", "metadata": {}, "source": [ "### πŸ§‘β€πŸŽ¨ Designing our data\n", @@ -198,7 +198,7 @@ { "cell_type": "code", "execution_count": null, - "id": "64465ab1", + "id": "4fb0f1ca", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ }, { "cell_type": "markdown", - "id": "cfbad124", + "id": "8f35bd87", "metadata": {}, "source": [ "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n" @@ -235,7 +235,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aa93a4c9", + "id": "43341f16", "metadata": {}, "outputs": [], "source": [ @@ -344,7 +344,7 @@ }, { "cell_type": "markdown", - "id": "74aa72fc", + "id": "34c3e08b", "metadata": {}, "source": [ "Next, we will use more advanced Jinja expressions to create new columns.\n", @@ -361,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9ae978cc", + "id": "c168c089", "metadata": {}, "outputs": [], "source": [ @@ -414,7 +414,7 @@ }, { "cell_type": "markdown", - "id": "ec850f14", + "id": "7e6521a2", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -431,7 +431,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cb18575e", + "id": "03510f78", "metadata": {}, "outputs": [], "source": [ @@ -441,7 +441,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eee46dc6", + "id": "ad599c43", "metadata": {}, "outputs": [], "source": [ @@ -452,7 +452,7 @@ { "cell_type": "code", "execution_count": null, - "id": "082d0fc4", + "id": "dbd3e17c", "metadata": {}, "outputs": [], "source": [ @@ -462,7 +462,7 @@ }, { "cell_type": "markdown", - "id": "e8d80b94", + "id": "4db52c26", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -475,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4b0a7299", + "id": "f1007ac4", "metadata": {}, "outputs": [], "source": [ @@ -485,7 +485,7 @@ }, { "cell_type": "markdown", - "id": "d7e0c925", + "id": "dcd68de4", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -498,7 +498,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b599d759", + "id": "27b6bfe8", "metadata": {}, "outputs": [], "source": [ @@ -508,7 +508,7 @@ { "cell_type": "code", "execution_count": null, - "id": "07a7c0da", + "id": "d4e9a395", "metadata": {}, "outputs": [], "source": [ @@ -521,7 +521,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7760dffa", + "id": "946b3aa8", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ }, { "cell_type": "markdown", - "id": "6d19000a", + "id": "f50d996e", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb index 468aa795..639e88df 100644 --- a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "573c3e7b", + "id": "25501772", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "63f6c36d", + "id": "67ffc49e", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "02cc81c7", + "id": "54a42504", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -39,7 +39,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18d51631", + "id": "05b45354", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ { "cell_type": "code", "execution_count": null, - "id": "67c55f6b", + "id": "039360fe", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cfe2ff62", + "id": "028d5e8a", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "markdown", - "id": "bdbc5b03", + "id": "15a1df61", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55d9caf1", + "id": "a87b6ff6", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "aa1623bc", + "id": "b9166cfd", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9d1310cf", + "id": "4961d3b0", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "e64ce3b7", + "id": "b1d8588a", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -162,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dafd6155", + "id": "cf42a4dd", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "7c01f11c", + "id": "8d6b26aa", "metadata": {}, "source": [ "## πŸ₯ Prepare a seed dataset\n", @@ -196,7 +196,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7941073f", + "id": "fc90401d", "metadata": {}, "outputs": [], "source": [ @@ -214,7 +214,7 @@ }, { "cell_type": "markdown", - "id": "a68c7d55", + "id": "6f5ee960", "metadata": {}, "source": [ "## 🎨 Designing our synthetic patient notes dataset\n", @@ -227,7 +227,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f1b3d4d4", + "id": "e9db2ff0", "metadata": {}, "outputs": [], "source": [ @@ -308,7 +308,7 @@ }, { "cell_type": "markdown", - "id": "eff1bf9f", + "id": "00efc894", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -325,7 +325,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b5955230", + "id": "3e3d824e", "metadata": {}, "outputs": [], "source": [ @@ -335,7 +335,7 @@ { "cell_type": "code", "execution_count": null, - "id": "062a7294", + "id": "27785af7", "metadata": {}, "outputs": [], "source": [ @@ -346,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6378e1be", + "id": "430998d1", "metadata": {}, "outputs": [], "source": [ @@ -356,7 +356,7 @@ }, { "cell_type": "markdown", - "id": "51e5175e", + "id": "dda6458b", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -369,7 +369,7 @@ { "cell_type": "code", "execution_count": null, - "id": "891b6860", + "id": "f45bc088", "metadata": {}, "outputs": [], "source": [ @@ -379,7 +379,7 @@ }, { "cell_type": "markdown", - "id": "0f52668f", + "id": "1e913fd8", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -392,7 +392,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ed083bd8", + "id": "30b8b7f7", "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ { "cell_type": "code", "execution_count": null, - "id": "039c42e4", + "id": "b7ff96d1", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +415,7 @@ { "cell_type": "code", "execution_count": null, - "id": "623ca205", + "id": "dbfef8a8", "metadata": {}, "outputs": [], "source": [ @@ -427,7 +427,7 @@ }, { "cell_type": "markdown", - "id": "0a7e7d42", + "id": "5db3f38d", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb index 62ac63e8..9797695e 100644 --- a/docs/colab_notebooks/4-providing-images-as-context.ipynb +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "731384ed", + "id": "19e57933", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "bc66dd23", + "id": "25e3cc64", "metadata": {}, "source": [ "#### πŸ“š What you'll learn\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "4539a931", + "id": "4aae5c82", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -37,7 +37,7 @@ }, { "cell_type": "markdown", - "id": "f88809bf", + "id": "24dfae6c", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3628d4c4", + "id": "619b1aae", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7fcf0f75", + "id": "0d49a542", "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6654714a", + "id": "1b28f160", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "22488cb7", + "id": "63dc34de", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -113,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39913ca0", + "id": "672155c8", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +122,7 @@ }, { "cell_type": "markdown", - "id": "fba112ab", + "id": "4b32c25e", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -139,7 +139,7 @@ { "cell_type": "code", "execution_count": null, - "id": "70fd86dd", + "id": "72971915", "metadata": {}, "outputs": [], "source": [ @@ -162,7 +162,7 @@ }, { "cell_type": "markdown", - "id": "810c7457", + "id": "115ad20f", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -177,7 +177,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9b2204d0", + "id": "11e844d2", "metadata": {}, "outputs": [], "source": [ @@ -186,7 +186,7 @@ }, { "cell_type": "markdown", - "id": "29e3dae5", + "id": "77862fce", "metadata": {}, "source": [ "### 🌱 Seed Dataset Creation\n", @@ -203,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e2cc3506", + "id": "e415a502", "metadata": {}, "outputs": [], "source": [ @@ -218,7 +218,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7a821067", + "id": "335f2611", "metadata": {}, "outputs": [], "source": [ @@ -266,7 +266,7 @@ { "cell_type": "code", "execution_count": null, - "id": "359d144b", + "id": "f055e88d", "metadata": {}, "outputs": [], "source": [ @@ -284,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "985cd308", + "id": "47a1c586", "metadata": {}, "outputs": [], "source": [ @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6a8cb414", + "id": "3a77fc52", "metadata": {}, "outputs": [], "source": [ @@ -306,7 +306,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a57e1b73", + "id": "c0941cc7", "metadata": {}, "outputs": [], "source": [ @@ -335,7 +335,7 @@ }, { "cell_type": "markdown", - "id": "7518100a", + "id": "578e77dc", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -352,7 +352,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c1fe540", + "id": "9f0c11ce", "metadata": {}, "outputs": [], "source": [ @@ -362,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bceafe91", + "id": "b10412c1", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +373,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20f4ace5", + "id": "766ee2d7", "metadata": {}, "outputs": [], "source": [ @@ -383,7 +383,7 @@ }, { "cell_type": "markdown", - "id": "16a86d56", + "id": "6370bfa5", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -396,7 +396,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c1bbae97", + "id": "d57ded0e", "metadata": {}, "outputs": [], "source": [ @@ -406,7 +406,7 @@ }, { "cell_type": "markdown", - "id": "d8d7604f", + "id": "5afd8e8c", "metadata": {}, "source": [ "### πŸ”Ž Visual Inspection\n", @@ -417,7 +417,7 @@ { "cell_type": "code", "execution_count": null, - "id": "27c0636c", + "id": "aa4bfcc3", "metadata": { "lines_to_next_cell": 2 }, @@ -441,7 +441,7 @@ }, { "cell_type": "markdown", - "id": "f6b99539", + "id": "4eeaada6", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -454,7 +454,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e5d53787", + "id": "0ee5b1b9", "metadata": {}, "outputs": [], "source": [ @@ -464,7 +464,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1f859e49", + "id": "e5e8b241", "metadata": {}, "outputs": [], "source": [ @@ -477,7 +477,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6688e3c5", + "id": "23ebb3ca", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +489,7 @@ }, { "cell_type": "markdown", - "id": "28635b09", + "id": "14a78533", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/5-generating-images.ipynb b/docs/colab_notebooks/5-generating-images.ipynb index 485fe258..c8092938 100644 --- a/docs/colab_notebooks/5-generating-images.ipynb +++ b/docs/colab_notebooks/5-generating-images.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "0ee289e6", + "id": "735e6197", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Generating Images\n", @@ -22,7 +22,7 @@ }, { "cell_type": "markdown", - "id": "86f748c1", + "id": "92ae4afe", "metadata": {}, "source": [ "### πŸ“¦ Import Data Designer\n", @@ -33,7 +33,7 @@ }, { "cell_type": "markdown", - "id": "c610ee22", + "id": "ccc77347", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -44,7 +44,7 @@ { "cell_type": "code", "execution_count": null, - "id": "818ca495", + "id": "23627c23", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f165bb15", + "id": "bf958dc6", "metadata": {}, "outputs": [], "source": [ @@ -73,7 +73,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5decfc83", + "id": "ab0cfff8", "metadata": {}, "outputs": [], "source": [ @@ -86,7 +86,7 @@ }, { "cell_type": "markdown", - "id": "929f35d6", + "id": "a18ef5ce", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -97,7 +97,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b4c8b7d7", + "id": "5fe11301", "metadata": {}, "outputs": [], "source": [ @@ -106,7 +106,7 @@ }, { "cell_type": "markdown", - "id": "8ed7b0b6", + "id": "b913d454", "metadata": {}, "source": [ "### πŸŽ›οΈ Define an image-generation model\n", @@ -118,7 +118,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d6b1ca66", + "id": "a50d26ee", "metadata": {}, "outputs": [], "source": [ @@ -140,7 +140,7 @@ }, { "cell_type": "markdown", - "id": "498cfecf", + "id": "122374d9", "metadata": {}, "source": [ "### πŸ—οΈ Build the config: samplers + image column\n", @@ -151,7 +151,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e74fc7ab", + "id": "940f2b70", "metadata": {}, "outputs": [], "source": [ @@ -324,7 +324,7 @@ }, { "cell_type": "markdown", - "id": "c592c820", + "id": "e13e0bb4", "metadata": {}, "source": [ "### πŸ” Preview: images as base64\n", @@ -335,7 +335,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eee17bb1", + "id": "2a60a76f", "metadata": {}, "outputs": [], "source": [ @@ -345,7 +345,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3cd320cc", + "id": "3c831ee8", "metadata": {}, "outputs": [], "source": [ @@ -356,7 +356,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ffb5e188", + "id": "143e762f", "metadata": {}, "outputs": [], "source": [ @@ -365,7 +365,7 @@ }, { "cell_type": "markdown", - "id": "87b83328", + "id": "a84606b4", "metadata": {}, "source": [ "### πŸ†™ Create: images saved to disk\n", @@ -376,17 +376,17 @@ { "cell_type": "code", "execution_count": null, - "id": "a8f9cc41", + "id": "89147954", "metadata": {}, "outputs": [], "source": [ - "results = data_designer.create(config_builder, num_records=5, dataset_name=\"tutorial-5-images\")" + "results = data_designer.create(config_builder, num_records=2, dataset_name=\"tutorial-5-images\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "0d4453e5", + "id": "04c96063", "metadata": {}, "outputs": [], "source": [ @@ -397,7 +397,7 @@ { "cell_type": "code", "execution_count": null, - "id": "198301ab", + "id": "edb794bb", "metadata": {}, "outputs": [], "source": [ @@ -413,7 +413,7 @@ }, { "cell_type": "markdown", - "id": "2bdcef2b", + "id": "e0a72bf6", "metadata": {}, "source": [ "## ⏭️ Next steps\n", From 469a3d295fd24afbce9179e24889ff4947c95c6d Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 10 Feb 2026 16:01:01 -0700 Subject: [PATCH 58/64] raise a ValueError if we fail to detect image format --- .../config/utils/image_helpers.py | 28 +++++++++++++++---- .../tests/config/utils/test_image_helpers.py | 17 +++++------ .../engine/storage/test_media_storage.py | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 0fb949af..c91974d8 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -21,9 +21,20 @@ IMAGE_FORMAT_MAGIC_BYTES = { ImageFormat.PNG: b"\x89PNG\r\n\x1a\n", ImageFormat.JPG: b"\xff\xd8\xff", + ImageFormat.GIF: b"GIF8", # WEBP uses RIFF header - handled separately } +# Maps PIL format name (lowercase) to our ImageFormat enum. +# PIL reports "JPEG" (not "JPG"), so we normalize it here. +_PIL_FORMAT_TO_IMAGE_FORMAT: dict[str, ImageFormat] = { + "png": ImageFormat.PNG, + "jpeg": ImageFormat.JPG, + "jpg": ImageFormat.JPG, + "gif": ImageFormat.GIF, + "webp": ImageFormat.WEBP, +} + _BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/=]+$") # Patterns for diffusion-based image models only (use image_generation API). @@ -105,13 +116,18 @@ def detect_image_format(image_bytes: bytes) -> ImageFormat: image_bytes: Image data as bytes Returns: - Detected format (defaults to PNG if unknown) + Detected ImageFormat + + Raises: + ValueError: If the image format cannot be determined """ # Check magic bytes first (fast) if image_bytes.startswith(IMAGE_FORMAT_MAGIC_BYTES[ImageFormat.PNG]): return ImageFormat.PNG elif image_bytes.startswith(IMAGE_FORMAT_MAGIC_BYTES[ImageFormat.JPG]): return ImageFormat.JPG + elif image_bytes.startswith(IMAGE_FORMAT_MAGIC_BYTES[ImageFormat.GIF]): + return ImageFormat.GIF elif image_bytes.startswith(b"RIFF") and b"WEBP" in image_bytes[:12]: return ImageFormat.WEBP @@ -119,13 +135,15 @@ def detect_image_format(image_bytes: bytes) -> ImageFormat: try: img = Image.open(io.BytesIO(image_bytes)) format_str = img.format.lower() if img.format else None - if format_str in [fmt.value for fmt in ImageFormat]: - return ImageFormat(format_str if format_str != ImageFormat.JPEG.value else ImageFormat.JPG.value) + if format_str in _PIL_FORMAT_TO_IMAGE_FORMAT: + return _PIL_FORMAT_TO_IMAGE_FORMAT[format_str] except Exception: pass - # Default to PNG - return ImageFormat.PNG + raise ValueError( + f"Unable to detect image format (first 8 bytes: {image_bytes[:8]!r}). " + f"Supported formats: {', '.join(SUPPORTED_IMAGE_EXTENSIONS)}." + ) def is_image_path(value: str) -> bool: diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index 08ea3b50..fe2f40b7 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -84,9 +84,10 @@ def test_detect_image_format_webp(): assert detect_image_format(webp_magic) == ImageFormat.WEBP -def test_detect_image_format_unknown_defaults_to_png(): +def test_detect_image_format_unknown_raises_error(): unknown_bytes = b"\x00\x00\x00\x00" + b"\x00" * 10 - assert detect_image_format(unknown_bytes) == ImageFormat.PNG + with pytest.raises(ValueError, match="Unable to detect image format"): + detect_image_format(unknown_bytes) # Tests for is_image_path @@ -206,31 +207,27 @@ def test_validate_image_nonexistent_raises_error(tmp_path): # Additional tests for uncovered lines -def test_detect_image_format_with_pil_fallback_unsupported_format(tmp_path): - # Create a real GIF image that will trigger PIL fallback - # (GIF has different magic bytes not in our fast-path detection) +def test_detect_image_format_gif_magic_bytes(tmp_path): + # GIF files start with "GIF87a" or "GIF89a" and are now detected via magic bytes img = Image.new("RGB", (1, 1), color="red") gif_path = tmp_path / "test.gif" img.save(gif_path, format="GIF") gif_bytes = gif_path.read_bytes() - # Should use PIL fallback and correctly detect GIF format result = detect_image_format(gif_bytes) assert result == ImageFormat.GIF def test_detect_image_format_with_pil_fallback_jpeg(): - # Test PIL fallback path that converts "jpeg" format string to JPG enum - # Use mock since we can't easily create valid JPEG bytes without magic bytes + # Test PIL fallback path that normalizes "jpeg" -> JPG enum mock_img = Mock() mock_img.format = "JPEG" - # Use bytes that don't match our magic bytes to trigger PIL fallback + # Use bytes that don't match any magic bytes to trigger PIL fallback test_bytes = b"\x00\x00\x00\x00" with patch.object(Image, "open", return_value=mock_img): result = detect_image_format(test_bytes) - # Should convert JPEG -> JPG via line 96 assert result == ImageFormat.JPG diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py index 9d74734a..2e690fb4 100644 --- a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -140,7 +140,7 @@ def test_save_base64_image_disk_mode_corrupted_image_raises_error(tmp_path): corrupted_bytes = b"not a valid image" corrupted_base64 = base64.b64encode(corrupted_bytes).decode() - with pytest.raises(ValueError, match="Image validation failed"): + with pytest.raises(ValueError, match="Unable to detect image format"): storage.save_base64_image(corrupted_base64, subfolder_name="test_column") # Check that no files were left behind (cleanup on validation failure) From 1e43394b142b77acfb589f0e0cec0d567587acf9 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 11 Feb 2026 10:19:48 -0700 Subject: [PATCH 59/64] Fix diffusion image gen --- .../config/utils/image_helpers.py | 23 ++++++++++- .../src/data_designer/engine/models/facade.py | 40 ++++++++++++++----- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index c91974d8..45f43622 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -11,6 +11,8 @@ from pathlib import Path from typing import TYPE_CHECKING +import requests + from data_designer.config.models import ImageFormat from data_designer.lazy_heavy_imports import Image @@ -39,12 +41,13 @@ # Patterns for diffusion-based image models only (use image_generation API). IMAGE_DIFFUSION_MODEL_PATTERNS = ( - "dall-e", + "dall-e-", "dalle", "stable-diffusion", "sd-", "sd_", "imagen", + "gpt-image-", ) SUPPORTED_IMAGE_EXTENSIONS = [f".{fmt.value.lower()}" for fmt in ImageFormat] @@ -232,6 +235,24 @@ def load_image_path_to_base64(image_path: str, base_path: str | None = None) -> return None +def load_image_url_to_base64(url: str, timeout: int = 60) -> str: + """Download an image from a URL and return as base64. + + Args: + url: HTTP(S) URL pointing to an image. + timeout: Request timeout in seconds. + + Returns: + Base64-encoded image data. + + Raises: + requests.HTTPError: If the download fails with a non-2xx status. + """ + resp = requests.get(url, timeout=timeout) + resp.raise_for_status() + return base64.b64encode(resp.content).decode() + + def validate_image(image_path: Path) -> None: """Validate that an image file is readable and not corrupted. diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index e637d9f4..902ac80a 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -13,6 +13,7 @@ extract_base64_from_data_uri, is_base64_image, is_image_diffusion_model, + load_image_url_to_base64, ) from data_designer.engine.mcp.errors import MCPConfigurationError from data_designer.engine.model_provider import ModelProviderRegistry @@ -41,13 +42,30 @@ def _identity(x: Any) -> Any: return x -def _try_extract_base64(data: str) -> str | None: - """Try to extract base64 image data from a data URI, returning None on failure.""" +def _try_extract_base64(source: str | litellm.types.utils.ImageObject) -> str | None: + """Try to extract base64 image data from a data URI string or image response object. + + Args: + source: Either a data URI string (e.g. "data:image/png;base64,...") + or a litellm ImageObject with b64_json/url attributes. + + Returns: + Base64-encoded image string, or None if extraction fails. + """ try: - return extract_base64_from_data_uri(data) - except ValueError: + if isinstance(source, str): + return extract_base64_from_data_uri(source) + + if getattr(source, "b64_json", None): + return source.b64_json + + if getattr(source, "url", None): + return load_image_url_to_base64(source.url) + except Exception: return None + return None + logger = logging.getLogger(__name__) @@ -447,16 +465,14 @@ def _generate_image_chat_completion( def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = False, **kwargs) -> list[str]: """Generate image(s) using diffusion model via image_generation API. - Always returns base64. The API is configured to return base64 format. + Always returns base64. If the API returns URLs instead of inline base64, + the images are downloaded and converted automatically. Returns: List of base64-encoded image strings """ kwargs = self.consolidate_kwargs(**kwargs) - # Always request base64 format - kwargs["response_format"] = "b64_json" - response = None try: @@ -471,8 +487,12 @@ def _generate_image_diffusion(self, prompt: str, skip_usage_tracking: bool = Fal if not response.data or len(response.data) == 0: raise ImageGenerationError("Image generation returned no data") - # Return all images as list - return [img.b64_json for img in response.data] + images = [b64 for img in response.data if (b64 := _try_extract_base64(img)) is not None] + + if not images: + raise ImageGenerationError("No image data could be extracted from response") + + return images except Exception: raise From 8f6be9bae09623873d805207750e7d27d68b5e8f Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 11 Feb 2026 10:28:38 -0700 Subject: [PATCH 60/64] Add requests to config pyproject.toml --- packages/data-designer-config/pyproject.toml | 1 + uv.lock | 2 ++ 2 files changed, 3 insertions(+) diff --git a/packages/data-designer-config/pyproject.toml b/packages/data-designer-config/pyproject.toml index 569c8fe0..dc980798 100644 --- a/packages/data-designer-config/pyproject.toml +++ b/packages/data-designer-config/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "pygments>=2.19.2,<3", "python-json-logger>=3,<4", "pyyaml>=6.0.1,<7", + "requests>=2.32.0,<3", "rich>=13.7.1,<15", ] diff --git a/uv.lock b/uv.lock index 200d0b12..d92497dd 100644 --- a/uv.lock +++ b/uv.lock @@ -805,6 +805,7 @@ dependencies = [ { name = "pygments" }, { name = "python-json-logger" }, { name = "pyyaml" }, + { name = "requests" }, { name = "rich" }, ] @@ -819,6 +820,7 @@ requires-dist = [ { name = "pygments", specifier = ">=2.19.2,<3" }, { name = "python-json-logger", specifier = ">=3,<4" }, { name = "pyyaml", specifier = ">=6.0.1,<7" }, + { name = "requests", specifier = ">=2.32.0,<3" }, { name = "rich", specifier = ">=13.7.1,<15" }, ] From 87dcab1e9bfb4e0cbc4a4cc8ee3ab60979cae4cd Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Wed, 11 Feb 2026 15:46:01 -0700 Subject: [PATCH 61/64] address pr feedback from andre --- .../tests/config/utils/test_image_helpers.py | 325 +++++++++--------- .../dataset_builders/column_wise_builder.py | 25 ++ .../src/data_designer/engine/models/facade.py | 4 + .../engine/storage/media_storage.py | 36 +- .../engine/storage/test_media_storage.py | 82 +++-- 5 files changed, 267 insertions(+), 205 deletions(-) diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index fe2f40b7..8b2f557a 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -5,6 +5,7 @@ import base64 import io +from pathlib import Path from unittest.mock import Mock, patch import pytest @@ -23,37 +24,51 @@ ) from data_designer.lazy_heavy_imports import Image -# Tests for extract_base64_from_data_uri +@pytest.fixture +def sample_png_bytes() -> bytes: + """Create a valid 1x1 PNG as raw bytes.""" + img = Image.new("RGB", (1, 1), color="red") + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + +# --------------------------------------------------------------------------- +# extract_base64_from_data_uri +# --------------------------------------------------------------------------- -def test_extract_base64_from_data_uri_with_prefix(): + +def test_extract_base64_from_data_uri_with_prefix() -> None: data_uri = "data:image/png;base64,iVBORw0KGgoAAAANS" result = extract_base64_from_data_uri(data_uri) assert result == "iVBORw0KGgoAAAANS" -def test_extract_base64_plain_base64_without_prefix(): +def test_extract_base64_plain_base64_without_prefix() -> None: plain_base64 = "iVBORw0KGgoAAAANS" result = extract_base64_from_data_uri(plain_base64) assert result == plain_base64 -def test_extract_base64_invalid_data_uri_raises_error(): +def test_extract_base64_invalid_data_uri_raises_error() -> None: with pytest.raises(ValueError, match="Invalid data URI format: missing comma separator"): extract_base64_from_data_uri("data:image/png;base64") -# Tests for decode_base64_image +# --------------------------------------------------------------------------- +# decode_base64_image +# --------------------------------------------------------------------------- -def test_decode_base64_image_valid(): +def test_decode_base64_image_valid() -> None: png_bytes = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01" base64_data = base64.b64encode(png_bytes).decode() result = decode_base64_image(base64_data) assert result == png_bytes -def test_decode_base64_image_with_data_uri(): +def test_decode_base64_image_with_data_uri() -> None: png_bytes = b"\x89PNG\r\n\x1a\n" base64_data = base64.b64encode(png_bytes).decode() data_uri = f"data:image/png;base64,{base64_data}" @@ -61,274 +76,258 @@ def test_decode_base64_image_with_data_uri(): assert result == png_bytes -def test_decode_base64_image_invalid_raises_error(): +def test_decode_base64_image_invalid_raises_error() -> None: with pytest.raises(ValueError, match="Invalid base64 data"): decode_base64_image("not-valid-base64!!!") -# Tests for detect_image_format +# --------------------------------------------------------------------------- +# detect_image_format (magic bytes) +# --------------------------------------------------------------------------- -def test_detect_image_format_png(): - png_magic = b"\x89PNG\r\n\x1a\n" + b"\x00" * 10 - assert detect_image_format(png_magic) == ImageFormat.PNG +@pytest.mark.parametrize( + "header_bytes,expected_format", + [ + (b"\x89PNG\r\n\x1a\n" + b"\x00" * 10, ImageFormat.PNG), + (b"\xff\xd8\xff" + b"\x00" * 10, ImageFormat.JPG), + (b"RIFF" + b"\x00" * 4 + b"WEBP", ImageFormat.WEBP), + ], + ids=["png", "jpg", "webp"], +) +def test_detect_image_format_magic_bytes(header_bytes: bytes, expected_format: ImageFormat) -> None: + assert detect_image_format(header_bytes) == expected_format -def test_detect_image_format_jpg(): - jpg_magic = b"\xff\xd8\xff" + b"\x00" * 10 - assert detect_image_format(jpg_magic) == ImageFormat.JPG +def test_detect_image_format_gif_magic_bytes(tmp_path: Path) -> None: + img = Image.new("RGB", (1, 1), color="red") + gif_path = tmp_path / "test.gif" + img.save(gif_path, format="GIF") + gif_bytes = gif_path.read_bytes() + assert detect_image_format(gif_bytes) == ImageFormat.GIF -def test_detect_image_format_webp(): - webp_magic = b"RIFF" + b"\x00" * 4 + b"WEBP" - assert detect_image_format(webp_magic) == ImageFormat.WEBP +def test_detect_image_format_with_pil_fallback_jpeg() -> None: + mock_img = Mock() + mock_img.format = "JPEG" + test_bytes = b"\x00\x00\x00\x00" + with patch.object(Image, "open", return_value=mock_img): + result = detect_image_format(test_bytes) + assert result == ImageFormat.JPG -def test_detect_image_format_unknown_raises_error(): + +def test_detect_image_format_unknown_raises_error() -> None: unknown_bytes = b"\x00\x00\x00\x00" + b"\x00" * 10 with pytest.raises(ValueError, match="Unable to detect image format"): detect_image_format(unknown_bytes) -# Tests for is_image_path +# --------------------------------------------------------------------------- +# is_image_path +# --------------------------------------------------------------------------- -def test_is_image_path_various_extensions(): - assert is_image_path("/path/to/image.png") is True - assert is_image_path("image.PNG") is True - assert is_image_path("image.jpg") is True - assert is_image_path("image.jpeg") is True +@pytest.mark.parametrize( + "value,expected", + [ + ("/path/to/image.png", True), + ("image.PNG", True), + ("image.jpg", True), + ("image.jpeg", True), + ("/path/to/file.txt", False), + ("document.pdf", False), + ("/some.png/file.txt", False), + ], + ids=["png", "png-upper", "jpg", "jpeg", "txt", "pdf", "ext-in-dir"], +) +def test_is_image_path(value: str, expected: bool) -> None: + assert is_image_path(value) is expected -def test_is_image_path_non_image(): - assert is_image_path("/path/to/file.txt") is False - assert is_image_path("document.pdf") is False +# --------------------------------------------------------------------------- +# is_image_url +# --------------------------------------------------------------------------- -def test_is_image_path_extension_in_directory(): - assert is_image_path("/some.png/file.txt") is False +@pytest.mark.parametrize( + "value,expected", + [ + ("http://example.com/image.png", True), + ("https://example.com/photo.jpg", True), + ("https://example.com/image.png?size=large", True), + ("https://example.com/page.html", False), + ("ftp://example.com/image.png", False), + ], + ids=["http", "https", "query-params", "non-image-ext", "ftp"], +) +def test_is_image_url(value: str, expected: bool) -> None: + assert is_image_url(value) is expected -# Tests for is_base64_image +# --------------------------------------------------------------------------- +# is_base64_image +# --------------------------------------------------------------------------- -def test_is_base64_image_data_uri(): +def test_is_base64_image_data_uri() -> None: assert is_base64_image("data:image/png;base64,iVBORw0KGgo") is True -def test_is_base64_image_long_valid_base64(): +def test_is_base64_image_long_valid_base64() -> None: long_base64 = base64.b64encode(b"x" * 100).decode() assert is_base64_image(long_base64) is True -def test_is_base64_image_short_string(): +def test_is_base64_image_short_string() -> None: assert is_base64_image("short") is False -# Tests for is_image_url - - -def test_is_image_url_http_and_https(): - assert is_image_url("http://example.com/image.png") is True - assert is_image_url("https://example.com/photo.jpg") is True - - -def test_is_image_url_with_query_params(): - assert is_image_url("https://example.com/image.png?size=large") is True - - -def test_is_image_url_without_image_extension(): - assert is_image_url("https://example.com/page.html") is False - - -def test_is_image_url_non_http(): - assert is_image_url("ftp://example.com/image.png") is False - - -# Tests for is_image_diffusion_model - - -def test_is_image_diffusion_model_dall_e(): - assert is_image_diffusion_model("dall-e-3") is True - assert is_image_diffusion_model("DALL-E-2") is True - assert is_image_diffusion_model("openai/dalle-2") is True - - -def test_is_image_diffusion_model_stable_diffusion(): - assert is_image_diffusion_model("stable-diffusion-xl") is True - assert is_image_diffusion_model("sd-2.1") is True - assert is_image_diffusion_model("sd_1.5") is True - +def test_is_base64_image_invalid_base64_decode() -> None: + invalid_base64 = "A" * 50 + "=" + "A" * 49 + "more text" + assert is_base64_image(invalid_base64) is False -def test_is_image_diffusion_model_imagen(): - assert is_image_diffusion_model("imagen-3") is True - assert is_image_diffusion_model("google/imagen") is True +# --------------------------------------------------------------------------- +# Non-string guard (is_image_path, is_base64_image, is_image_url) +# --------------------------------------------------------------------------- -def test_is_image_diffusion_model_chat_completion_image_models(): - assert is_image_diffusion_model("gemini-3-pro-image-preview") is False - assert is_image_diffusion_model("gpt-5-image") is False - assert is_image_diffusion_model("flux.2-pro") is False +@pytest.mark.parametrize( + "func", + [is_image_path, is_base64_image, is_image_url], + ids=["is_image_path", "is_base64_image", "is_image_url"], +) +@pytest.mark.parametrize("value", [123, None, []], ids=["int", "none", "list"]) +def test_non_string_input_returns_false(func: object, value: object) -> None: + assert func(value) is False + + +# --------------------------------------------------------------------------- +# is_image_diffusion_model +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "model_name,expected", + [ + ("dall-e-3", True), + ("DALL-E-2", True), + ("openai/dalle-2", True), + ("stable-diffusion-xl", True), + ("sd-2.1", True), + ("sd_1.5", True), + ("imagen-3", True), + ("google/imagen", True), + ("gpt-image-1", True), + ("gemini-3-pro-image-preview", False), + ("gpt-5-image", False), + ("flux.2-pro", False), + ], + ids=[ + "dall-e-3", + "DALL-E-2", + "dalle-2", + "stable-diffusion-xl", + "sd-2.1", + "sd_1.5", + "imagen-3", + "google-imagen", + "gpt-image-1", + "gemini-not-diffusion", + "gpt-5-not-diffusion", + "flux-not-diffusion", + ], +) +def test_is_image_diffusion_model(model_name: str, expected: bool) -> None: + assert is_image_diffusion_model(model_name) is expected -# Tests for validate_image +# --------------------------------------------------------------------------- +# validate_image +# --------------------------------------------------------------------------- -def test_validate_image_valid_png(tmp_path): - # Create a valid 1x1 PNG using PIL - img = Image.new("RGB", (1, 1), color="red") - buf = io.BytesIO() - img.save(buf, format="PNG") - png_bytes = buf.getvalue() +def test_validate_image_valid_png(tmp_path: Path, sample_png_bytes: bytes) -> None: image_path = tmp_path / "test.png" - image_path.write_bytes(png_bytes) - - # Should not raise + image_path.write_bytes(sample_png_bytes) validate_image(image_path) -def test_validate_image_corrupted_raises_error(tmp_path): - # Create an invalid image file +def test_validate_image_corrupted_raises_error(tmp_path: Path) -> None: image_path = tmp_path / "corrupted.png" image_path.write_bytes(b"not a valid image") - with pytest.raises(ValueError, match="Image validation failed"): validate_image(image_path) -def test_validate_image_nonexistent_raises_error(tmp_path): +def test_validate_image_nonexistent_raises_error(tmp_path: Path) -> None: image_path = tmp_path / "nonexistent.png" - with pytest.raises(ValueError, match="Image validation failed"): validate_image(image_path) -# Additional tests for uncovered lines - +# --------------------------------------------------------------------------- +# load_image_path_to_base64 +# --------------------------------------------------------------------------- -def test_detect_image_format_gif_magic_bytes(tmp_path): - # GIF files start with "GIF87a" or "GIF89a" and are now detected via magic bytes - img = Image.new("RGB", (1, 1), color="red") - gif_path = tmp_path / "test.gif" - img.save(gif_path, format="GIF") - - gif_bytes = gif_path.read_bytes() - result = detect_image_format(gif_bytes) - assert result == ImageFormat.GIF - - -def test_detect_image_format_with_pil_fallback_jpeg(): - # Test PIL fallback path that normalizes "jpeg" -> JPG enum - mock_img = Mock() - mock_img.format = "JPEG" - - # Use bytes that don't match any magic bytes to trigger PIL fallback - test_bytes = b"\x00\x00\x00\x00" - with patch.object(Image, "open", return_value=mock_img): - result = detect_image_format(test_bytes) - assert result == ImageFormat.JPG - - -def test_is_image_path_non_string_input(): - assert is_image_path(123) is False - assert is_image_path(None) is False - assert is_image_path([]) is False - - -def test_is_base64_image_non_string_input(): - assert is_base64_image(123) is False - assert is_base64_image(None) is False - assert is_base64_image([]) is False - - -def test_is_base64_image_invalid_base64_decode(): - # String with valid base64 characters but incorrect padding that causes decode to fail - # Single '=' in middle of string is invalid base64 (padding only allowed at end) - invalid_base64 = "A" * 50 + "=" + "A" * 49 + "more text" - assert is_base64_image(invalid_base64) is False - - -def test_is_image_url_non_string_input(): - assert is_image_url(123) is False - assert is_image_url(None) is False - assert is_image_url([]) is False - - -# Tests for load_image_path_to_base64 - - -def test_load_image_path_to_base64_absolute_path(tmp_path): - # Create a test image file +def test_load_image_path_to_base64_absolute_path(tmp_path: Path) -> None: img = Image.new("RGB", (1, 1), color="blue") image_path = tmp_path / "test.png" img.save(image_path) - # Load with absolute path result = load_image_path_to_base64(str(image_path)) assert result is not None assert len(result) > 0 - # Verify it's valid base64 decoded = base64.b64decode(result) assert len(decoded) > 0 -def test_load_image_path_to_base64_relative_with_base_path(tmp_path): - # Create a test image file +def test_load_image_path_to_base64_relative_with_base_path(tmp_path: Path) -> None: img = Image.new("RGB", (1, 1), color="green") image_path = tmp_path / "subdir" / "test.png" image_path.parent.mkdir(exist_ok=True) img.save(image_path) - # Load with relative path and base_path result = load_image_path_to_base64("subdir/test.png", base_path=str(tmp_path)) assert result is not None assert len(result) > 0 -def test_load_image_path_to_base64_nonexistent_file(): +def test_load_image_path_to_base64_nonexistent_file() -> None: result = load_image_path_to_base64("/nonexistent/path/to/image.png") assert result is None -def test_load_image_path_to_base64_relative_with_cwd_fallback(tmp_path, monkeypatch): - # Create test image in current working directory - - # Change to tmp_path as cwd +def test_load_image_path_to_base64_relative_with_cwd_fallback(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.chdir(tmp_path) img = Image.new("RGB", (1, 1), color="yellow") image_path = tmp_path / "test_cwd.png" img.save(image_path) - # Use relative path without base_path - should fall back to cwd result = load_image_path_to_base64("test_cwd.png") assert result is not None assert len(result) > 0 -def test_load_image_path_to_base64_base_path_fallback_to_cwd(tmp_path, monkeypatch): - # Test the case where base_path is provided but file isn't there, falls back to cwd +def test_load_image_path_to_base64_base_path_fallback_to_cwd(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.chdir(tmp_path) - # Create image in cwd img = Image.new("RGB", (1, 1), color="red") image_path = tmp_path / "test.png" img.save(image_path) - # Create a different base_path that doesn't have the image wrong_base = tmp_path / "wrong" wrong_base.mkdir() - # Use relative path with wrong base_path - should fall back to cwd result = load_image_path_to_base64("test.png", base_path=str(wrong_base)) assert result is not None assert len(result) > 0 -def test_load_image_path_to_base64_exception_handling(tmp_path): - # Create a directory (not a file) to trigger exception +def test_load_image_path_to_base64_exception_handling(tmp_path: Path) -> None: dir_path = tmp_path / "directory" dir_path.mkdir() diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index 9077e807..e5d67928 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -286,6 +286,7 @@ def _fan_out_with_threads(self, generator: ColumnGeneratorWithModelRegistry, max progress_tracker.log_final() if len(self._records_to_drop) > 0: + self._cleanup_dropped_record_images(self._records_to_drop) self.batch_manager.drop_records(self._records_to_drop) self._records_to_drop.clear() @@ -362,6 +363,30 @@ def _run_processors( ) from e return dataframe + def _cleanup_dropped_record_images(self, dropped_indices: set[int]) -> None: + """Remove saved image files for records that will be dropped. + + When a record fails during generation, any images already saved to disk + for that record in previous columns become dangling. This method deletes + those files so they don't accumulate. + """ + media_storage = self.artifact_storage.media_storage + if not self._has_image_columns() or media_storage is None or media_storage.mode != StorageMode.DISK: + return + + image_col_names = [ + col.name for col in self.single_column_configs if col.column_type == DataDesignerColumnType.IMAGE + ] + + buffer = self.batch_manager.get_current_batch(as_dataframe=False) + for idx in dropped_indices: + if idx < 0 or idx >= len(buffer): + continue + for col_name in image_col_names: + paths = buffer[idx].get(col_name, []) + for path in [paths] if isinstance(paths, str) else paths: + media_storage.delete_image(path) + def _worker_error_callback(self, exc: Exception, *, context: dict | None = None) -> None: """If a worker fails, we can handle the exception here.""" logger.warning( diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index 7f7972d5..cf3c7e6e 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -62,6 +62,7 @@ def _try_extract_base64(source: str | litellm.types.utils.ImageObject) -> str | if getattr(source, "url", None): return load_image_url_to_base64(source.url) except Exception: + logger.debug(f"Failed to extract base64 from source of type {type(source).__name__}") return None return None @@ -561,3 +562,6 @@ def _track_token_usage_from_image_diffusion(self, response: litellm.types.utils. ), request_usage=RequestUsageStats(successful_requests=1, failed_requests=0), ) + else: + # Successful response but no token usage data (some providers don't report it) + self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=1, failed_requests=0)) diff --git a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py index 81387525..1c887c80 100644 --- a/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py +++ b/packages/data-designer-engine/src/data_designer/engine/storage/media_storage.py @@ -57,15 +57,6 @@ def __init__( self.images_subdir = images_subdir self.mode = mode - def _ensure_images_directory(self) -> None: - """Create images directory if it doesn't exist (lazy initialization).""" - self.images_dir.mkdir(parents=True, exist_ok=True) - - def _sanitize_subfolder_name(self, name: str) -> str: - """Sanitize subfolder name to prevent path traversal and filesystem issues.""" - # Replace path separators and parent directory references with underscores - return name.replace("/", "_").replace("\\", "_").replace("..", "_") - def save_base64_image(self, base64_data: str, subfolder_name: str) -> str: """Save or return base64 image based on storage mode. @@ -118,6 +109,24 @@ def save_base64_image(self, base64_data: str, subfolder_name: str) -> str: return relative_path + def delete_image(self, relative_path: str) -> bool: + """Delete a saved image file given its relative path. + + Args: + relative_path: Relative path as returned by save_base64_image (e.g., "images/col/uuid.png") + + Returns: + True if the file was deleted, False if it didn't exist or deletion failed. + """ + try: + full_path = self.base_path / relative_path + if full_path.exists() and self.images_dir in full_path.parents: + full_path.unlink() + return True + except OSError: + pass + return False + def _validate_image(self, image_path: Path) -> None: """Validate that saved image is readable. @@ -133,3 +142,12 @@ def _validate_image(self, image_path: Path) -> None: # Clean up invalid file image_path.unlink(missing_ok=True) raise + + def _ensure_images_directory(self) -> None: + """Create images directory if it doesn't exist (lazy initialization).""" + self.images_dir.mkdir(parents=True, exist_ok=True) + + def _sanitize_subfolder_name(self, name: str) -> str: + """Sanitize subfolder name to prevent path traversal and filesystem issues.""" + # Replace path separators and parent directory references with underscores + return name.replace("/", "_").replace("\\", "_").replace("..", "_") diff --git a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py index 2e690fb4..e79c854b 100644 --- a/packages/data-designer-engine/tests/engine/storage/test_media_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_media_storage.py @@ -38,24 +38,21 @@ def sample_base64_jpg() -> str: return base64.b64encode(jpg_bytes).decode() -def test_media_storage_init(tmp_path): - """Test MediaStorage initialization.""" - storage = MediaStorage(base_path=tmp_path) +@pytest.mark.parametrize( + "images_subdir,mode", + [ + (IMAGES_SUBDIR, StorageMode.DISK), + ("custom_images", StorageMode.DATAFRAME), + ], + ids=["defaults", "custom-subdir-dataframe"], +) +def test_media_storage_init(tmp_path, images_subdir: str, mode: StorageMode) -> None: + """Test MediaStorage initialization with various configurations.""" + storage = MediaStorage(base_path=tmp_path, images_subdir=images_subdir, mode=mode) assert storage.base_path == tmp_path - assert storage.images_dir == tmp_path / IMAGES_SUBDIR - assert storage.images_subdir == IMAGES_SUBDIR - assert storage.mode == StorageMode.DISK - # Directory should NOT exist until first save (lazy initialization) - assert not storage.images_dir.exists() - - -def test_media_storage_init_custom_subdir(tmp_path): - """Test MediaStorage initialization with custom subdirectory and mode.""" - custom_subdir = "custom_images" - storage = MediaStorage(base_path=tmp_path, images_subdir=custom_subdir, mode=StorageMode.DATAFRAME) - assert storage.images_subdir == custom_subdir - assert storage.images_dir == tmp_path / custom_subdir - assert storage.mode == StorageMode.DATAFRAME + assert storage.images_subdir == images_subdir + assert storage.images_dir == tmp_path / images_subdir + assert storage.mode == mode # Directory should NOT exist until first save (lazy initialization) assert not storage.images_dir.exists() @@ -149,12 +146,12 @@ def test_save_base64_image_disk_mode_corrupted_image_raises_error(tmp_path): assert len(list(column_dir.iterdir())) == 0 -def test_save_base64_image_dataframe_mode_returns_base64(tmp_path, sample_base64_png): - """Test that DATAFRAME mode returns base64 directly without disk operations.""" +@pytest.mark.parametrize("subfolder_name", ["test_column", "test_subfolder"], ids=["column", "subfolder"]) +def test_save_base64_image_dataframe_mode_returns_base64(tmp_path, sample_base64_png, subfolder_name): + """Test that DATAFRAME mode returns base64 directly regardless of subfolder name.""" storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DATAFRAME) - # Should return the same base64 data (column_name is ignored in DATAFRAME mode) - result = storage.save_base64_image(sample_base64_png, subfolder_name="test_column") + result = storage.save_base64_image(sample_base64_png, subfolder_name=subfolder_name) assert result == sample_base64_png # Directory should not be created in DATAFRAME mode (lazy initialization) @@ -201,18 +198,6 @@ def test_save_base64_image_with_different_subfolder_names(media_storage, sample_ assert (media_storage.base_path / path2).exists() -def test_save_base64_image_dataframe_mode_with_subfolder_name(tmp_path, sample_base64_png): - """Test that DATAFRAME mode returns base64 directly even with subfolder name.""" - storage = MediaStorage(base_path=tmp_path, mode=StorageMode.DATAFRAME) - - # Should return the same base64 data regardless of subfolder name - result = storage.save_base64_image(sample_base64_png, subfolder_name="test_subfolder") - assert result == sample_base64_png - - # Directory should not be created in DATAFRAME mode - assert not storage.images_dir.exists() - - @pytest.mark.parametrize( "unsafe_name,expected_sanitized", [ @@ -236,3 +221,34 @@ def test_save_base64_image_sanitizes_subfolder_name(media_storage, sample_base64 full_path = media_storage.base_path / relative_path assert full_path.exists() assert media_storage.images_dir in full_path.parents + + +# --------------------------------------------------------------------------- +# delete_image +# --------------------------------------------------------------------------- + + +def test_delete_image_removes_saved_file(media_storage, sample_base64_png) -> None: + """Test that delete_image removes a previously saved image.""" + relative_path = media_storage.save_base64_image(sample_base64_png, subfolder_name="col") + full_path = media_storage.base_path / relative_path + assert full_path.exists() + + result = media_storage.delete_image(relative_path) + assert result is True + assert not full_path.exists() + + +def test_delete_image_returns_false_for_nonexistent(media_storage) -> None: + """Test that delete_image returns False when the file doesn't exist.""" + assert media_storage.delete_image(f"{IMAGES_SUBDIR}/col/nonexistent.png") is False + + +def test_delete_image_rejects_path_outside_images_dir(media_storage, tmp_path) -> None: + """Test that delete_image refuses to delete files outside the images directory.""" + outside_file = tmp_path / "outside.txt" + outside_file.write_text("should not be deleted") + + result = media_storage.delete_image("../outside.txt") + assert result is False + assert outside_file.exists() From b1648c78bfc3d4f1b985454bfdd01f34193ac22c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Thu, 12 Feb 2026 11:33:44 -0700 Subject: [PATCH 62/64] reorder docstring --- .../src/data_designer/config/column_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data-designer-config/src/data_designer/config/column_configs.py b/packages/data-designer-config/src/data_designer/config/column_configs.py index e3ea013d..49dbb831 100644 --- a/packages/data-designer-config/src/data_designer/config/column_configs.py +++ b/packages/data-designer-config/src/data_designer/config/column_configs.py @@ -492,7 +492,6 @@ class ImageColumnConfig(SingleColumnConfig): The API used is automatically determined based on the model name: Attributes: - column_type: Discriminator field, always "image" for this configuration type. prompt: Prompt template for image generation. Supports Jinja2 templating to reference other columns (e.g., "Generate an image of a {{ character_name }}"). Must be a valid Jinja2 template. @@ -500,6 +499,7 @@ class ImageColumnConfig(SingleColumnConfig): multi_modal_context: Optional list of image contexts for multi-modal generation. Enables autoregressive multi-modal models to generate images based on image inputs. Only works with autoregressive models that support image-to-image generation. + column_type: Discriminator field, always "image" for this configuration type. """ prompt: str From 78202339a4da2b623ce652b7c473c79bba3c96aa Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Thu, 12 Feb 2026 11:47:32 -0700 Subject: [PATCH 63/64] Fix bug with display sample record with index=0 --- .../src/data_designer/config/utils/visualization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data-designer-config/src/data_designer/config/utils/visualization.py b/packages/data-designer-config/src/data_designer/config/utils/visualization.py index bd2876d4..ac4df1d8 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/visualization.py +++ b/packages/data-designer-config/src/data_designer/config/utils/visualization.py @@ -171,7 +171,7 @@ def display_sample_record( processors_to_display: List of processors to display the artifacts for. If None, all processors will be displayed. hide_seed_columns: If True, seed columns will not be displayed separately. """ - i = index or self._display_cycle_index + i = self._display_cycle_index if index is None else index try: record = self._record_sampler_dataset.iloc[i] From f491c11d4bd9b8d01ba72f04bd5443290892c52f Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Thu, 12 Feb 2026 11:51:16 -0700 Subject: [PATCH 64/64] remove redundant kwargs consolidation --- .../src/data_designer/engine/models/facade.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index cf3c7e6e..ef328a9a 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -407,7 +407,6 @@ def _generate_image_chat_completion( Returns: List of base64-encoded image strings """ - kwargs = self.consolidate_kwargs(**kwargs) messages = prompt_to_messages(user_prompt=prompt, multi_modal_context=multi_modal_context) response = None