diff --git a/packages/data-designer-engine/src/data_designer/engine/configurable_task.py b/packages/data-designer-engine/src/data_designer/engine/configurable_task.py index 2d64f9fc..0e7b96f8 100644 --- a/packages/data-designer-engine/src/data_designer/engine/configurable_task.py +++ b/packages/data-designer-engine/src/data_designer/engine/configurable_task.py @@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Generic, TypeVar, get_origin from data_designer.config.base import ConfigBase -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py index e8845ae1..9b0d91b8 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/column_wise_builder.py @@ -27,7 +27,6 @@ ) from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated from data_designer.engine.compiler import compile_data_designer_config -from data_designer.engine.dataset_builders.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage from data_designer.engine.dataset_builders.errors import DatasetGenerationError from data_designer.engine.dataset_builders.multi_column_configs import MultiColumnConfig from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor @@ -40,6 +39,7 @@ from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage from data_designer.engine.storage.media_storage import StorageMode from data_designer.lazy_heavy_imports import pd @@ -182,10 +182,6 @@ def _has_image_columns(self) -> bool: """Check if config has any image generation columns.""" return any(col.column_type == DataDesignerColumnType.IMAGE for col in self.single_column_configs) - def _has_image_columns(self) -> bool: - """Check if config has any image generation columns.""" - return any(col.column_type == DataDesignerColumnType.IMAGE for col in self.single_column_configs) - def _initialize_generators(self) -> list[ColumnGenerator]: return [ self._registry.column_generators.get_for_config_type(type(config))( diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py index a60d52b9..efb60b8b 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py @@ -8,8 +8,8 @@ from pathlib import Path from typing import TYPE_CHECKING, Callable, Container, Iterator -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage, BatchStage from data_designer.engine.dataset_builders.utils.errors import DatasetBatchManagementError +from data_designer.engine.storage.artifact_storage import ArtifactStorage, BatchStage from data_designer.lazy_heavy_imports import pd, pq if TYPE_CHECKING: diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/processor_runner.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/processor_runner.py index 284d45f5..c78ee2b3 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/processor_runner.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/processor_runner.py @@ -8,15 +8,15 @@ from enum import Enum from typing import TYPE_CHECKING -from data_designer.engine.dataset_builders.artifact_storage import BatchStage from data_designer.engine.dataset_builders.errors import DatasetProcessingError +from data_designer.engine.storage.artifact_storage import BatchStage if TYPE_CHECKING: import pandas as pd - from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager from data_designer.engine.processing.processors.base import Processor + from data_designer.engine.storage.artifact_storage import ArtifactStorage logger = logging.getLogger(__name__) diff --git a/packages/data-designer-engine/src/data_designer/engine/processing/processors/drop_columns.py b/packages/data-designer-engine/src/data_designer/engine/processing/processors/drop_columns.py index bb26af2a..19768404 100644 --- a/packages/data-designer-engine/src/data_designer/engine/processing/processors/drop_columns.py +++ b/packages/data-designer-engine/src/data_designer/engine/processing/processors/drop_columns.py @@ -7,8 +7,8 @@ from typing import TYPE_CHECKING from data_designer.config.processors import DropColumnsProcessorConfig -from data_designer.engine.dataset_builders.artifact_storage import BatchStage from data_designer.engine.processing.processors.base import Processor +from data_designer.engine.storage.artifact_storage import BatchStage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/src/data_designer/engine/processing/processors/schema_transform.py b/packages/data-designer-engine/src/data_designer/engine/processing/processors/schema_transform.py index 349afddc..cbeab60d 100644 --- a/packages/data-designer-engine/src/data_designer/engine/processing/processors/schema_transform.py +++ b/packages/data-designer-engine/src/data_designer/engine/processing/processors/schema_transform.py @@ -8,10 +8,10 @@ from typing import TYPE_CHECKING, Any from data_designer.config.processors import SchemaTransformProcessorConfig -from data_designer.engine.dataset_builders.artifact_storage import BatchStage from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering from data_designer.engine.processing.processors.base import Processor from data_designer.engine.processing.utils import deserialize_json_values +from data_designer.engine.storage.artifact_storage import BatchStage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/src/data_designer/engine/resources/resource_provider.py b/packages/data-designer-engine/src/data_designer/engine/resources/resource_provider.py index 1fcb8e02..cf9b252d 100644 --- a/packages/data-designer-engine/src/data_designer/engine/resources/resource_provider.py +++ b/packages/data-designer-engine/src/data_designer/engine/resources/resource_provider.py @@ -10,7 +10,6 @@ from data_designer.config.run_config import RunConfig from data_designer.config.seed_source import SeedSource from data_designer.config.utils.type_helpers import StrEnum -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.mcp.factory import create_mcp_registry from data_designer.engine.mcp.registry import MCPRegistry from data_designer.engine.model_provider import ( @@ -22,6 +21,7 @@ from data_designer.engine.resources.managed_storage import ManagedBlobStorage from data_designer.engine.resources.seed_reader import SeedReader, SeedReaderRegistry from data_designer.engine.secret_resolver import SecretResolver +from data_designer.engine.storage.artifact_storage import ArtifactStorage class ResourceType(StrEnum): diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py b/packages/data-designer-engine/src/data_designer/engine/storage/artifact_storage.py similarity index 100% rename from packages/data-designer-engine/src/data_designer/engine/dataset_builders/artifact_storage.py rename to packages/data-designer-engine/src/data_designer/engine/storage/artifact_storage.py diff --git a/packages/data-designer-engine/tests/engine/analysis/conftest.py b/packages/data-designer-engine/tests/engine/analysis/conftest.py index 6a9de4ea..a760388c 100644 --- a/packages/data-designer-engine/tests/engine/analysis/conftest.py +++ b/packages/data-designer-engine/tests/engine/analysis/conftest.py @@ -23,10 +23,10 @@ DatasetProfilerConfig, ) from data_designer.engine.analysis.utils.judge_score_processing import JudgeScoreDistributions -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.models.registry import ModelRegistry from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.lazy_heavy_imports import pa, pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/tests/engine/conftest.py b/packages/data-designer-engine/tests/engine/conftest.py index b04580b9..0bf50da7 100644 --- a/packages/data-designer-engine/tests/engine/conftest.py +++ b/packages/data-designer-engine/tests/engine/conftest.py @@ -9,11 +9,11 @@ import pytest from data_designer.config.run_config import RunConfig -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.models.facade import ModelFacade from data_designer.engine.models.registry import ModelRegistry from data_designer.engine.resources.managed_storage import ManagedBlobStorage from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py b/packages/data-designer-engine/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py index 07bd0ebb..67e96a0c 100644 --- a/packages/data-designer-engine/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +++ b/packages/data-designer-engine/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py @@ -9,9 +9,9 @@ import pytest -from data_designer.engine.dataset_builders.artifact_storage import BatchStage from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager from data_designer.engine.dataset_builders.utils.errors import DatasetBatchManagementError +from data_designer.engine.storage.artifact_storage import BatchStage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/tests/engine/processing/processors/test_drop_columns.py b/packages/data-designer-engine/tests/engine/processing/processors/test_drop_columns.py index 97662e98..022a3d42 100644 --- a/packages/data-designer-engine/tests/engine/processing/processors/test_drop_columns.py +++ b/packages/data-designer-engine/tests/engine/processing/processors/test_drop_columns.py @@ -9,8 +9,8 @@ import pytest from data_designer.config.processors import DropColumnsProcessorConfig -from data_designer.engine.dataset_builders.artifact_storage import BatchStage from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor +from data_designer.engine.storage.artifact_storage import BatchStage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/tests/engine/processing/processors/test_schema_transform.py b/packages/data-designer-engine/tests/engine/processing/processors/test_schema_transform.py index 69b5b357..7ce79c94 100644 --- a/packages/data-designer-engine/tests/engine/processing/processors/test_schema_transform.py +++ b/packages/data-designer-engine/tests/engine/processing/processors/test_schema_transform.py @@ -10,9 +10,9 @@ import pytest from data_designer.config.processors import SchemaTransformProcessorConfig -from data_designer.engine.dataset_builders.artifact_storage import BatchStage from data_designer.engine.processing.processors.schema_transform import SchemaTransformProcessor from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.artifact_storage import BatchStage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer-engine/tests/engine/resources/test_resource_provider.py b/packages/data-designer-engine/tests/engine/resources/test_resource_provider.py index f384c18c..5b046bff 100644 --- a/packages/data-designer-engine/tests/engine/resources/test_resource_provider.py +++ b/packages/data-designer-engine/tests/engine/resources/test_resource_provider.py @@ -6,13 +6,13 @@ import pytest from data_designer.config.mcp import LocalStdioMCPProvider, MCPProvider, ToolConfig -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.models.registry import ModelRegistry from data_designer.engine.resources.resource_provider import ( ResourceProvider, _validate_tool_configs_against_providers, create_resource_provider, ) +from data_designer.engine.storage.artifact_storage import ArtifactStorage def _stub_model_registry() -> ModelRegistry: diff --git a/packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py b/packages/data-designer-engine/tests/engine/storage/test_artifact_storage.py similarity index 98% rename from packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py rename to packages/data-designer-engine/tests/engine/storage/test_artifact_storage.py index 35edf892..79c9ce99 100644 --- a/packages/data-designer-engine/tests/engine/dataset_builders/test_artifact_storage.py +++ b/packages/data-designer-engine/tests/engine/storage/test_artifact_storage.py @@ -11,8 +11,8 @@ import pytest from pyarrow import ArrowNotImplementedError -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage, BatchStage from data_designer.engine.dataset_builders.errors import ArtifactStorageError +from data_designer.engine.storage.artifact_storage import ArtifactStorage, BatchStage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: @@ -201,7 +201,7 @@ def test_artifact_storage_batch_numbering(stub_artifact_storage, batch_number): assert path.name == expected_name -@patch("data_designer.engine.dataset_builders.artifact_storage.datetime") +@patch("data_designer.engine.storage.artifact_storage.datetime") def test_artifact_storage_resolved_dataset_name(mock_datetime, tmp_path): mock_datetime.now.return_value = datetime(2025, 1, 1, 12, 3, 4) diff --git a/packages/data-designer-engine/tests/engine/test_configurable_task.py b/packages/data-designer-engine/tests/engine/test_configurable_task.py index 6e3673de..772b4ee5 100644 --- a/packages/data-designer-engine/tests/engine/test_configurable_task.py +++ b/packages/data-designer-engine/tests/engine/test_configurable_task.py @@ -10,9 +10,9 @@ from data_designer.config.base import ConfigBase from data_designer.engine.configurable_task import ConfigurableTask, DataT, TaskConfigT -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.models.registry import ModelRegistry from data_designer.engine.resources.resource_provider import ResourceProvider +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.lazy_heavy_imports import pd if TYPE_CHECKING: diff --git a/packages/data-designer/src/data_designer/integrations/huggingface/client.py b/packages/data-designer/src/data_designer/integrations/huggingface/client.py index 1d0a0f0e..a5da4bfb 100644 --- a/packages/data-designer/src/data_designer/integrations/huggingface/client.py +++ b/packages/data-designer/src/data_designer/integrations/huggingface/client.py @@ -13,7 +13,7 @@ from huggingface_hub.utils import HfHubHTTPError, validate_repo_id from data_designer.config.utils.constants import HUGGINGFACE_HUB_DATASET_URL_PREFIX -from data_designer.engine.dataset_builders.artifact_storage import ( +from data_designer.engine.storage.artifact_storage import ( FINAL_DATASET_FOLDER_NAME, METADATA_FILENAME, PROCESSORS_OUTPUTS_FOLDER_NAME, diff --git a/packages/data-designer/src/data_designer/interface/data_designer.py b/packages/data-designer/src/data_designer/interface/data_designer.py index a880a924..838198cf 100644 --- a/packages/data-designer/src/data_designer/interface/data_designer.py +++ b/packages/data-designer/src/data_designer/interface/data_designer.py @@ -33,7 +33,6 @@ from data_designer.config.utils.info import InfoType, InterfaceInfo from data_designer.engine.analysis.dataset_profiler import DataDesignerDatasetProfiler, DatasetProfilerConfig from data_designer.engine.compiler import compile_data_designer_config -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.column_wise_builder import ColumnWiseDatasetBuilder from data_designer.engine.model_provider import resolve_model_provider_registry from data_designer.engine.resources.managed_storage import init_managed_blob_storage @@ -51,6 +50,7 @@ PlaintextResolver, SecretResolver, ) +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.interface.errors import ( DataDesignerGenerationError, DataDesignerProfilingError, diff --git a/packages/data-designer/src/data_designer/interface/results.py b/packages/data-designer/src/data_designer/interface/results.py index f86acced..2d6f935d 100644 --- a/packages/data-designer/src/data_designer/interface/results.py +++ b/packages/data-designer/src/data_designer/interface/results.py @@ -10,8 +10,8 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.dataset_metadata import DatasetMetadata from data_designer.config.utils.visualization import WithRecordSamplerMixin -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage from data_designer.engine.dataset_builders.errors import ArtifactStorageError +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.integrations.huggingface.client import HuggingFaceHubClient from data_designer.lazy_heavy_imports import pd diff --git a/packages/data-designer/tests/interface/test_results.py b/packages/data-designer/tests/interface/test_results.py index 827e6115..da45118a 100644 --- a/packages/data-designer/tests/interface/test_results.py +++ b/packages/data-designer/tests/interface/test_results.py @@ -14,7 +14,7 @@ from data_designer.config.preview_results import PreviewResults from data_designer.config.utils.errors import DatasetSampleDisplayError from data_designer.config.utils.visualization import display_sample_record as display_fn -from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage +from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.interface.results import DatasetCreationResults from data_designer.lazy_heavy_imports import pd