diff --git a/.vale.ini b/.vale.ini index 538f6a5..1ff85c8 100644 --- a/.vale.ini +++ b/.vale.ini @@ -2,7 +2,7 @@ StylesPath = "vale/styles" Vocab = docs Packages = Google -IgnoredScopes = code, tt, img, url, a +IgnoredScopes = code, tt, img, url, a, text.frontmatter SkippedScopes = script, style, pre, figure, code MinAlertLevel = warning @@ -10,7 +10,7 @@ MinAlertLevel = warning [formats] mdx = md -[*.{md, mdx}] +[*.{md,mdx}] # Ignore react components starting with export const # Ignore code blocks in triple backticks diff --git a/api-reference/go/datasets/As.mdx b/api-reference/go/datasets/As.mdx new file mode 100644 index 0000000..27e3a52 --- /dev/null +++ b/api-reference/go/datasets/As.mdx @@ -0,0 +1,41 @@ +--- +title: As +sidebarTitle: As +icon: layer-group +--- + +```go +func As[T proto.Message](seq iter.Seq2[[]byte, error]) iter.Seq2[T, error] +``` + +Convert a sequence of bytes into a sequence of `proto.Message`. + +Useful to convert the output of [`Datapoints.Query`](/api-reference/go/datasets/Datapoints.Query) into a sequence of `proto.Message`. + +## Parameters + + + The sequence of bytes to convert + + +## Returns + +A sequence of `proto.Message` or an error if any. + + +```go Go +import ( + "time" + datasets "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/query" +) + +startDate := time.Date(2014, 10, 4, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2021, 2, 24, 0, 0, 0, 0, time.UTC) +queryInterval := query.NewTimeInterval(startDate, endDate) + +seq := datasets.As[*v1.Sentinel1Sar]( + client.Datapoints.Query(ctx, collectionID, datasets.WithTemporalExtent(queryInterval)), +) +``` + diff --git a/api-reference/go/datasets/Collect.mdx b/api-reference/go/datasets/Collect.mdx new file mode 100644 index 0000000..daf2ae9 --- /dev/null +++ b/api-reference/go/datasets/Collect.mdx @@ -0,0 +1,41 @@ +--- +title: Collect +sidebarTitle: Collect +icon: layer-group +--- + +```go +func Collect[K any](seq iter.Seq2[K, error]) ([]K, error) +``` + +Convert any sequence into a slice. + +It return an error if any of the elements in the sequence has a non-nil error. + +## Parameters + + + The sequence of bytes to convert + + +## Returns + +A slice of `K` or an error if any. + + +```go Go +import ( + "time" + datasets "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/query" +) + +startDate := time.Date(2014, 10, 4, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2021, 2, 24, 0, 0, 0, 0, time.UTC) +queryInterval := query.NewTimeInterval(startDate, endDate) + +datapoints, err := datasets.Collect(datasets.As[*v1.Sentinel1Sar]( + client.Datapoints.Query(ctx, collectionID, datasets.WithTemporalExtent(queryInterval)), +)) +``` + diff --git a/api-reference/go/datasets/CollectAs.mdx b/api-reference/go/datasets/CollectAs.mdx new file mode 100644 index 0000000..76c2c04 --- /dev/null +++ b/api-reference/go/datasets/CollectAs.mdx @@ -0,0 +1,43 @@ +--- +title: CollectAs +sidebarTitle: CollectAs +icon: layer-group +--- + +```go +func CollectAs[T proto.Message](seq iter.Seq2[[]byte, error]) ([]T, error) +``` + +Convert a sequence of bytes into a slice of `proto.Message`. + +Useful to convert the output of [`Datapoints.Query`](/api-reference/go/datasets/Datapoints.Query) into a slice of `proto.Message`. + +This a convenience function for `Collect(As[T](seq))`. + +## Parameters + + + The sequence of bytes to convert + + +## Returns + +A slice of `proto.Message` or an error if any. + + +```go Go +import ( + "time" + datasets "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/query" +) + +startDate := time.Date(2014, 10, 4, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2021, 2, 24, 0, 0, 0, 0, time.UTC) +queryInterval := query.NewTimeInterval(startDate, endDate) + +datapoints, err := datasets.CollectAs[*v1.Sentinel1Sar]( + client.Datapoints.Query(ctx, collectionID, datasets.WithTemporalExtent(queryInterval)), +) +``` + diff --git a/api-reference/go/datasets/Collections.Create.mdx b/api-reference/go/datasets/Collections.Create.mdx new file mode 100644 index 0000000..108f378 --- /dev/null +++ b/api-reference/go/datasets/Collections.Create.mdx @@ -0,0 +1,37 @@ +--- +title: Client.Collections.Create +sidebarTitle: Collections.Create +icon: layer-group +--- + +```go +func (collectionClient) Create( + ctx context.Context, + datasetID uuid.UUID, + collectionName string, +) (*datasets.Collection, error) +``` + +Create a collection in the dataset. + +## Parameters + + + The id of the dataset + + + The name of the collection + + +## Returns + +The created collection object. + + +```go Go +collection, err := client.Collections.Create(ctx, + datasetID, + "My-collection", +) +``` + diff --git a/api-reference/go/datasets/Collections.Get.mdx b/api-reference/go/datasets/Collections.Get.mdx new file mode 100644 index 0000000..ced5b3d --- /dev/null +++ b/api-reference/go/datasets/Collections.Get.mdx @@ -0,0 +1,43 @@ +--- +title: Client.Collections.Get +sidebarTitle: Collections.Get +icon: layer-group +--- + +```go +func (collectionClient) Get( + ctx context.Context, + datasetID uuid.UUID, + name string, +) (*datasets.Collection, error) +``` + +Get a dataset by its slug. + +## Parameters + + + The id of the dataset + + + The name of the collection + + +## Returns + +The created collection object. + + +```go Go +collection, err := client.Collections.Get(ctx, + datasetID, + "My-collection", +) +``` + + +## Errors + + + The specified dataset does not exist. + diff --git a/api-reference/go/datasets/Collections.GetOrCreate.mdx b/api-reference/go/datasets/Collections.GetOrCreate.mdx new file mode 100644 index 0000000..bcec9ec --- /dev/null +++ b/api-reference/go/datasets/Collections.GetOrCreate.mdx @@ -0,0 +1,37 @@ +--- +title: Client.Collections.GetOrCreate +sidebarTitle: Collections.GetOrCreate +icon: layer-group +--- + +```go +func (collectionClient) GetOrCreate( + ctx context.Context, + datasetID uuid.UUID, + name string, +) (*datasets.Collection, error) +``` + +Get or create a collection by its name. If the collection does not exist, it will be created. + +## Parameters + + + The id of the dataset + + + The name of the collection + + +## Returns + +A collection object. + + +```go Go +collection, err := client.Collections.GetOrCreate(ctx, + datasetID, + "My-collection", +) +``` + diff --git a/api-reference/go/datasets/Collections.List.mdx b/api-reference/go/datasets/Collections.List.mdx new file mode 100644 index 0000000..c1fc4d9 --- /dev/null +++ b/api-reference/go/datasets/Collections.List.mdx @@ -0,0 +1,38 @@ +--- +title: Client.Collections.List +sidebarTitle: Collections.List +icon: layer-group +--- + +```go +func (collectionClient) List( + ctx context.Context, + datasetID uuid.UUID, +) ([]*datasets.Collection, error) +``` + +List the available collections in a dataset. + +## Parameters + + + The id of the dataset + + +## Returns + +A list of collection objects. + + +```go Go +collections, err := client.Collections.List(ctx, + datasetID, +) +``` + + +## Errors + + + The specified dataset does not exist. + diff --git a/api-reference/go/datasets/Datapoints.Delete.mdx b/api-reference/go/datasets/Datapoints.Delete.mdx new file mode 100644 index 0000000..119a0c2 --- /dev/null +++ b/api-reference/go/datasets/Datapoints.Delete.mdx @@ -0,0 +1,42 @@ +--- +title: Client.Datapoints.Delete +sidebarTitle: Datapoints.Delete +icon: layer-group +--- + +```go +func (datapointClient) Delete( + ctx context.Context, + collectionID uuid.UUID, + datapoints any, +) (int64, error) +``` + +Delete data points from a collection. + +Data points are identified and deleted by their ids. + +## Parameters + + + The id of the collection + + + The datapoints to delete from the collection + + +## Returns + +The number of data points that were deleted. + + +```go Go +var datapoints []*v1.Sentinel1Sar +// assuming the slice is filled with datapoints + +numDeleted, err := client.Datapoints.Delete(ctx, + collectionID, + datapoints, +) +``` + diff --git a/api-reference/go/datasets/Datapoints.DeleteIDs.mdx b/api-reference/go/datasets/Datapoints.DeleteIDs.mdx new file mode 100644 index 0000000..2bae11a --- /dev/null +++ b/api-reference/go/datasets/Datapoints.DeleteIDs.mdx @@ -0,0 +1,40 @@ +--- +title: Client.Datapoints.DeleteIDs +sidebarTitle: Datapoints.DeleteIDs +icon: layer-group +--- + +```go +func (datapointClient) DeleteIDs( + ctx context.Context, + collectionID uuid.UUID, + datapointIDs []uuid.UUID, +) (int64, error) +``` + +Delete data points from a collection. + +## Parameters + + + The id of the collection + + + The ids of the data points to delete from the collection + + +## Returns + +The number of data points that were deleted. + + +```go Go +numDeleted, err := client.Datapoints.DeleteIDs(ctx, + collectionID, + []uuid.UUID{ + uuid.MustParse("0195c87a-49f6-5ffa-e3cb-92215d057ea6"), + uuid.MustParse("0195c87b-bd0e-3998-05cf-af6538f34957"), + }, +) +``` + diff --git a/api-reference/go/datasets/Datapoints.GetInto.mdx b/api-reference/go/datasets/Datapoints.GetInto.mdx new file mode 100644 index 0000000..0c50ad5 --- /dev/null +++ b/api-reference/go/datasets/Datapoints.GetInto.mdx @@ -0,0 +1,54 @@ +--- +title: Client.Datapoints.GetInto +sidebarTitle: Datapoints.GetInto +icon: layer-group +--- + +```go +func (datapointClient) GetInto( + ctx context.Context, + collectionIDs []uuid.UUID, + datapointID uuid.UUID, + datapoint proto.Message, + options ...QueryOption, +) error +``` + +Get a data point by its id from one of the specified collections. + +The data point is stored in the `datapoint` parameter. + +## Parameters + + + The ids of the collections to query + + + The id of the datapoint to query + + + The datapoint to query into + + + Options for querying data points. + + +## Options + + + Skip the data when querying datapoint. + If set, only the required and auto-generated fields will be returned. + + +## Returns + +An error if data point could not be queried. + + +```go Go +var datapoint v1.Sentinel1Sar +err = client.Datapoints.GetInto(ctx, + []uuid.UUID{collection.ID}, datapointID, &datapoint, +) +``` + diff --git a/api-reference/go/datasets/Datapoints.Ingest.mdx b/api-reference/go/datasets/Datapoints.Ingest.mdx new file mode 100644 index 0000000..da941fe --- /dev/null +++ b/api-reference/go/datasets/Datapoints.Ingest.mdx @@ -0,0 +1,63 @@ +--- +title: Client.Datapoints.Ingest +sidebarTitle: Datapoints.Ingest +icon: layer-group +--- + +```go +func (datapointClient) Ingest( + ctx context.Context, + collectionID uuid.UUID, + datapoints any, + allowExisting bool, +) (*datasets.IngestResponse, error) +``` + +Ingest data points into a collection. + +## Parameters + + + The id of the collection + + + The datapoints to ingest + + + Datapoint fields are used to generate a deterministic unique `UUID` for each + datapoint in a collection. Duplicate data points result in the same ID being generated. + If `allowExisting` is `true`, `ingest` will skip those datapoints, since they already exist. + If `allowExisting` is `false`, `ingest` will raise an error if any of the generated datapoint IDs already exist. + + +## Returns + +The list of datapoint ids that were ingested, including the IDs of existing data points in case of duplicates and +`allowExisting=true`. + + +```go Go +datapoints := []*v1.Modis{ + v1.Modis_builder{ + Time: timestamppb.New(time.Now()), + GranuleName: proto.String("Granule 1"), + }.Build(), + v1.Modis_builder{ + Time: timestamppb.New(time.Now().Add(-5 * time.Hour)), + GranuleName: proto.String("Past Granule 2"), + }.Build(), +} + +ingestResponse, err := client.Datapoints.Ingest(ctx, + collectionID, + datapoints + false, +) +``` + + +## Errors + + + If `allowExisting` is `False` and any of the datapoints attempting to ingest already exist. + diff --git a/api-reference/go/datasets/Datapoints.Query.mdx b/api-reference/go/datasets/Datapoints.Query.mdx new file mode 100644 index 0000000..02f9d98 --- /dev/null +++ b/api-reference/go/datasets/Datapoints.Query.mdx @@ -0,0 +1,64 @@ +--- +title: Client.Datapoints.Query +sidebarTitle: Datapoints.Query +icon: layer-group +--- + +```go +func (datapointClient) Query( + ctx context.Context, + collectionID uuid.UUID, + options ...datasets.QueryOption, +) iter.Seq2[[]byte, error] +``` + +Query a range of data points in this collection in a specified interval. + +The datapoints are lazily queried and returned as a sequence of bytes. +The output sequence can be transformed into a typed `proto.Message` using [CollectAs](/api-reference/go/datasets/CollectAs) or [As](/api-reference/go/datasets/As) functions. + +## Parameters + + + The id of the collection + + + Options for querying data points + + +## Options + + + Specify the time interval for which data should be queried. + Right now, a temporal extent is required for every query. + + + Specify the geographical extent in which to query data. + Optional, if not specified the query will return all results found globally. + + + Skip the data when querying datapoints. + If set, only the required and auto-generated fields will be returned. + + +## Returns + +A sequence of bytes containing the requested data points as bytes. + + +```go Go +import ( + "time" + datasets "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/query" +) + +startDate := time.Date(2014, 10, 4, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2021, 2, 24, 0, 0, 0, 0, time.UTC) +queryInterval := query.NewTimeInterval(startDate, endDate) + +datapoints, err := datasets.CollectAs[*v1.Sentinel1Sar]( + client.Datapoints.Query(ctx, collectionID, datasets.WithTemporalExtent(queryInterval)), +) +``` + diff --git a/api-reference/go/datasets/Datapoints.QueryInto.mdx b/api-reference/go/datasets/Datapoints.QueryInto.mdx new file mode 100644 index 0000000..ad54f01 --- /dev/null +++ b/api-reference/go/datasets/Datapoints.QueryInto.mdx @@ -0,0 +1,70 @@ +--- +title: Client.Datapoints.QueryInto +sidebarTitle: Datapoints.QueryInto +icon: layer-group +--- + +```go +func (datapointClient) QueryInto( + ctx context.Context, + collectionID uuid.UUID, + datapoints any, + options ...datasets.QueryOption, +) error +``` + +Query a range of data points in this collection in a specified interval. + +QueryInto is a convenience function for [Query](/api-reference/go/datasets/Datapoints.Query), when no manual pagination or custom iteration is required. + +## Parameters + + + The id of the collection + + + The datapoints to query into + + + Options for querying data points + + +## Options + + + Specify the time interval for which data should be queried. + Right now, a temporal extent is required for every query. + + + Specify the geographical extent in which to query data. + Optional, if not specified the query will return all results found globally. + + + Skip the data when querying datapoints. + If set, only the required and auto-generated fields will be returned. + + +## Returns + +An error if data points could not be queried. + + +```go Go +import ( + "time" + datasets "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/query" +) + +startDate := time.Date(2014, 10, 4, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2021, 2, 24, 0, 0, 0, 0, time.UTC) +queryInterval := query.NewTimeInterval(startDate, endDate) + +var datapoints []*v1.Sentinel1Sar +err := client.Datapoints.QueryInto(ctx, + collectionID, + &datapoints, + datasets.WithTemporalExtent(queryInterval), +) +``` + diff --git a/api-reference/go/datasets/Get.mdx b/api-reference/go/datasets/Get.mdx new file mode 100644 index 0000000..6a0aa04 --- /dev/null +++ b/api-reference/go/datasets/Get.mdx @@ -0,0 +1,38 @@ +--- +title: Client.Datasets.Get +sidebarTitle: Get +icon: laptop-code +--- + +```go +func (datasetClient) Get( + ctx context.Context, + slug string, +) (*datasets.Dataset, error) +``` + +Get a dataset by its slug. + +## Parameters + + + The slug of the dataset + + +## Returns + +A dataset object. + + +```go Go +s1_sar, err := client.Datasets.Get(ctx, + "open_data.copernicus.sentinel1_sar" +) +``` + + +## Errors + + + The specified dataset does not exist. + diff --git a/api-reference/go/datasets/List.mdx b/api-reference/go/datasets/List.mdx new file mode 100644 index 0000000..67efb66 --- /dev/null +++ b/api-reference/go/datasets/List.mdx @@ -0,0 +1,21 @@ +--- +title: Client.Datasets.List +sidebarTitle: List +icon: laptop-code +--- + +```go +func (datasetClient) List(ctx context.Context) ([]*datasets.Dataset, error) +``` + +Fetch all available datasets. + +## Returns + +A list of all available datasets. + + +```go Go +datasets, err := client.Datasets.List(ctx) +``` + diff --git a/api-reference/go/workflows/Clusters.Create.mdx b/api-reference/go/workflows/Clusters.Create.mdx new file mode 100644 index 0000000..288c6d9 --- /dev/null +++ b/api-reference/go/workflows/Clusters.Create.mdx @@ -0,0 +1,32 @@ +--- +title: Client.Clusters.Create +sidebarTitle: Clusters.Create +icon: circle-nodes +--- + +```go +func (*ClusterClient) Create( + ctx context.Context, + name string, +) (*workflows.Cluster, error) +``` + +Create a cluster. + +## Parameters + + + A display name for the cluster + + +## Returns + +The created cluster object. + + +```go Go +cluster, err := client.Clusters.Create(ctx, + "My cluster" +) +``` + diff --git a/api-reference/go/workflows/Clusters.Delete.mdx b/api-reference/go/workflows/Clusters.Delete.mdx new file mode 100644 index 0000000..203cbef --- /dev/null +++ b/api-reference/go/workflows/Clusters.Delete.mdx @@ -0,0 +1,35 @@ +--- +title: Client.Clusters.Delete +sidebarTitle: Clusters.Delete +icon: circle-nodes +--- + +```go +func (*ClusterClient) Delete(ctx context.Context, slug string) error +``` + +Delete a cluster by its slug. + +## Parameters + + + The slug of the cluster to delete + + +## Returns + +An error if the cluster could not be deleted. + + +```go Go +err := client.Clusters.Delete(ctx, + "my-cluster-tZD9Ca1qsqt3V" +) +``` + + +## Errors + + + The specified cluster does not exist. + diff --git a/api-reference/go/workflows/Clusters.Get.mdx b/api-reference/go/workflows/Clusters.Get.mdx new file mode 100644 index 0000000..249e553 --- /dev/null +++ b/api-reference/go/workflows/Clusters.Get.mdx @@ -0,0 +1,38 @@ +--- +title: Client.Clusters.Get +sidebarTitle: Clusters.Get +icon: circle-nodes +--- + +```go +func (*ClusterClient) Get( + ctx context.Context, + slug string, +) (*workflows.Cluster, error) +``` + +Get a cluster by its slug. + +## Parameters + + + The slug of the cluster + + +## Returns + +A cluster object. + + +```go Go +cluster, err := client.Clusters.Get(ctx, + "my-cluster-tZD9Ca1qsqt3V" +) +``` + + +## Errors + + + The specified cluster does not exist. + diff --git a/api-reference/go/workflows/Clusters.List.mdx b/api-reference/go/workflows/Clusters.List.mdx new file mode 100644 index 0000000..6f0692e --- /dev/null +++ b/api-reference/go/workflows/Clusters.List.mdx @@ -0,0 +1,21 @@ +--- +title: Client.Clusters.List +sidebarTitle: Clusters.List +icon: circle-nodes +--- + +```go +func (*ClusterClient) List(ctx context.Context) ([]*workflows.Cluster, error) +``` + +Fetch all available clusters. + +## Returns + +A list of all available clusters. + + +```go Go +clusters, err := client.Clusters.List(ctx) +``` + diff --git a/api-reference/go/workflows/Collect.mdx b/api-reference/go/workflows/Collect.mdx new file mode 100644 index 0000000..3a20832 --- /dev/null +++ b/api-reference/go/workflows/Collect.mdx @@ -0,0 +1,31 @@ +--- +title: Collect +sidebarTitle: Collect +icon: diagram-project +--- + +```go +func Collect[K any](seq iter.Seq2[K, error]) ([]K, error) +``` + +Convert any sequence into a slice. + +It return an error if any of the elements in the sequence has a non-nil error. + +## Parameters + + + The sequence of bytes to convert + + +## Returns + +A slice of `K` or an error if any. + + +```go Go +jobs, err := workflows.Collect( + client.Jobs.List(ctx, interval), +) +``` + diff --git a/api-reference/go/workflows/GetCurrentCluster.mdx b/api-reference/go/workflows/GetCurrentCluster.mdx new file mode 100644 index 0000000..acee8db --- /dev/null +++ b/api-reference/go/workflows/GetCurrentCluster.mdx @@ -0,0 +1,32 @@ +--- +title: workflows.GetCurrentCluster +sidebarTitle: GetCurrentCluster +icon: code +--- + +```go +workflows.GetCurrentCluster(ctx context.Context) (string, error) +``` + +Get the current cluster slug. + +This function is intended to be used in tasks. + +## Returns + +The current cluster slug. + + +```go Go +type Task struct{} + +func (t *Task) Execute(ctx context.Context) error { + clusterSlug, err := workflows.GetCurrentCluster(ctx) + if err != nil { + return fmt.Errorf("failed to get current cluster: %w", err) + } + + return nil +} +``` + diff --git a/api-reference/go/workflows/Jobs.Cancel.mdx b/api-reference/go/workflows/Jobs.Cancel.mdx new file mode 100644 index 0000000..33142ef --- /dev/null +++ b/api-reference/go/workflows/Jobs.Cancel.mdx @@ -0,0 +1,29 @@ +--- +title: Client.Jobs.Cancel +sidebarTitle: Jobs.Cancel +icon: diagram-project +--- + +```go +func (*JobClient) Cancel(ctx context.Context, jobID uuid.UUID) error +``` + +Cancel a job. When a job is canceled, no queued tasks will be picked up by task runners and executed even if task runners are idle. Tasks that are already being executed will finish their execution and not be interrupted. All sub-tasks spawned from such tasks after the cancellation will not be picked up by task runners. + +## Parameters + + + The id of the job + + +## Returns + +An error if the job could not be cancelled. + + +```go Go +err := client.Jobs.Cancel(ctx, + uuid.MustParse("0195c87a-49f6-5ffa-e3cb-92215d057ea6"), +) +``` + diff --git a/api-reference/go/workflows/Jobs.Get.mdx b/api-reference/go/workflows/Jobs.Get.mdx new file mode 100644 index 0000000..d712b10 --- /dev/null +++ b/api-reference/go/workflows/Jobs.Get.mdx @@ -0,0 +1,38 @@ +--- +title: Client.Jobs.Get +sidebarTitle: Jobs.Get +icon: diagram-project +--- + +```go +func (*JobClient) Get( + ctx context.Context, + jobID uuid.UUID, +) (*workflows.Job, error) +``` + +Get a job by its id. + +## Parameters + + + The id of the job + + +## Returns + +A job object. + + +```go Go +job, err := client.Jobs.Get(ctx, + uuid.MustParse("0195c87a-49f6-5ffa-e3cb-92215d057ea6"), +) +``` + + +## Errors + + + The specified job does not exist. + diff --git a/api-reference/go/workflows/Jobs.List.mdx b/api-reference/go/workflows/Jobs.List.mdx new file mode 100644 index 0000000..5afff72 --- /dev/null +++ b/api-reference/go/workflows/Jobs.List.mdx @@ -0,0 +1,47 @@ +--- +title: Client.Jobs.List +sidebarTitle: Jobs.List +icon: diagram-project +--- + +```go +func (*JobClient) List( + ctx context.Context, + interval query.TemporalExtent, +) iter.Seq2[*workflows.Job, error] +``` + +List all available jobs. + +The jobs are lazily loaded and returned as a sequence of Jobs. +The jobs are returned sorted by creation time in reverse order. +The output sequence can be transformed into a slice of Job using [Collect](/api-reference/go/workflows/Collect) function. + +## Parameters + + + The interval for which to load jobs + + +## Returns + +A sequence of jobs. + + +```go Go +import ( + "time" + workflows "github.com/tilebox/tilebox-go/workflows/v1" + "github.com/tilebox/tilebox-go/query" +) + +interval := query.NewTimeInterval( + time.Now().Add(-24 * time.Hour), + time.Now(), +) + +jobs, err := workflows.Collect( + client.Jobs.List(ctx, interval), +) +``` + diff --git a/api-reference/go/workflows/Jobs.Retry.mdx b/api-reference/go/workflows/Jobs.Retry.mdx new file mode 100644 index 0000000..3fb773e --- /dev/null +++ b/api-reference/go/workflows/Jobs.Retry.mdx @@ -0,0 +1,38 @@ +--- +title: Client.Jobs.Retry +sidebarTitle: Jobs.Retry +icon: diagram-project +--- + +```go +func (*JobClient) Retry( + ctx context.Context, + jobID uuid.UUID, +) (int64, error) +``` + +Retry a job. All failed tasks will become queued again, and queued tasks will be picked up by task runners again. + +## Parameters + + + The id of the job to retry + + +## Returns + +The number of tasks that were rescheduled. + + +```go Go +nbRescheduled, err := client.Jobs.Retry(ctx, + uuid.MustParse("0195c87a-49f6-5ffa-e3cb-92215d057ea6"), +) +``` + + +## Errors + + + The specified job does not exist. + diff --git a/api-reference/go/workflows/Jobs.Submit.mdx b/api-reference/go/workflows/Jobs.Submit.mdx new file mode 100644 index 0000000..4af4e16 --- /dev/null +++ b/api-reference/go/workflows/Jobs.Submit.mdx @@ -0,0 +1,52 @@ +--- +title: Client.Jobs.Submit +sidebarTitle: Jobs.Submit +icon: diagram-project +--- + +```go +func (*JobClient) Submit( + ctx context.Context, + jobName string, + cluster *workflows.Cluster, + tasks []workflows.Task, + options ...job.SubmitOption +) (*workflows.Job, error) +``` + +Submit a job. + +## Parameters + + + The name of the job + + + The [cluster](/workflows/concepts/clusters#managing-clusters) to run the root task on + + + The root task for the job. This task is executed first and can submit subtasks to manage the entire workflow. A job can have optionally consist of multiple root tasks. + + + Options for the job + + +## Options + + + Set the maximum number of [retries](/workflows/concepts/tasks#retry-handling) for the subtask in case it fails + + +## Returns + +A job object. + + +```go Go +job, err := client.Jobs.Submit(ctx, + "My job", + cluster, + []workflows.Task{rootTask}, +) +``` + diff --git a/api-reference/go/workflows/NewTaskRunner.mdx b/api-reference/go/workflows/NewTaskRunner.mdx new file mode 100644 index 0000000..fbac3c6 --- /dev/null +++ b/api-reference/go/workflows/NewTaskRunner.mdx @@ -0,0 +1,42 @@ +--- +title: Client.NewTaskRunner +sidebarTitle: NewTaskRunner +icon: gear-code +--- + +```go +func (*Client) NewTaskRunner( + cluster *workflows.Cluster, + options ...runner.Option, +) (*workflows.TaskRunner, error) +``` + +Initialize a task runner. + +## Parameters + + + The [cluster](/workflows/concepts/clusters#managing-clusters) to connect to + + + Options for initializing the task runner + + +## Options + + + Set the logger to use for the task runner + + + Disable OpenTelemetry metrics for the task runner + + +## Returns + +The created task runner object. + + +```go Go +runner, err := client.NewTaskRunner() +``` + diff --git a/api-reference/go/workflows/SubmitSubtask.mdx b/api-reference/go/workflows/SubmitSubtask.mdx new file mode 100644 index 0000000..a4e1738 --- /dev/null +++ b/api-reference/go/workflows/SubmitSubtask.mdx @@ -0,0 +1,68 @@ +--- +title: workflows.SubmitSubtask +sidebarTitle: SubmitSubtask +icon: code +--- + +```go +workflows.SubmitSubtask( + ctx context.Context, + task workflows.Task, + options ...subtask.SubmitOption, +) (subtask.FutureTask, error) +``` + +Submit a subtask to the task runner. + +This function is intended to be used in tasks. + +## Parameters + + + A subtask to submit + + + Options for the subtask + + +## Options + + + Set dependencies for the task + + + Set the cluster slug of the cluster where the task will be executed. + + + Set the maximum number of [retries](/workflows/concepts/tasks#retry-handling) for the subtask in case it fails + + +## Returns + +A future task that can be used to set dependencies between tasks. + + +```go Go + +type MySubTask struct { + Sensor string + Value float64 +} + +type Task struct{} + +func (t *Task) Execute(ctx context.Context) error { + err := workflows.SubmitSubtask(ctx, + &MySubTask{ + Sensor: "A", + Value: 42, + }, + ) + if err != nil { + return fmt.Errorf("failed to submit subtasks: %w", err) + } + + return nil +} +``` + diff --git a/api-reference/go/workflows/SubmitSubtasks.mdx b/api-reference/go/workflows/SubmitSubtasks.mdx new file mode 100644 index 0000000..6e87f3e --- /dev/null +++ b/api-reference/go/workflows/SubmitSubtasks.mdx @@ -0,0 +1,74 @@ +--- +title: workflows.SubmitSubtasks +sidebarTitle: SubmitSubtasks +icon: code +--- + +```go +workflows.SubmitSubtasks( + ctx context.Context, + tasks []workflows.Task, + options ...subtask.SubmitOption, +) ([]subtask.FutureTask, error) +``` + +Submit multiple subtasks to the task runner. Same as [SubmitSubtask](/api-reference/go/workflows/SubmitSubtask), but accepts a list of tasks. + +This function is intended to be used in tasks. + +## Parameters + + + A list of tasks to submit + + + Options for the subtasks + + +## Options + + + Set dependencies for the tasks + + + Set the cluster slug of the cluster where the tasks will be executed. + + + Set the maximum number of [retries](/workflows/concepts/tasks#retry-handling) for the subtasks in case it fails + + +## Returns + +A list of future tasks that can be used to set dependencies between tasks. + + +```go Go + +type MySubTask struct { + Sensor string + Value float64 +} + +type Task struct{} + +func (t *Task) Execute(ctx context.Context) error { + err := workflows.SubmitSubtasks(ctx, + []workflows.Task{ + &MySubTask{ + Sensor: "A", + Value: 42, + }, + &MySubTask{ + Sensor: "B", + Value: 42, + } + }, + ) + if err != nil { + return fmt.Errorf("failed to submit subtasks: %w", err) + } + + return nil +} +``` + diff --git a/api-reference/go/workflows/Task.mdx b/api-reference/go/workflows/Task.mdx new file mode 100644 index 0000000..e127657 --- /dev/null +++ b/api-reference/go/workflows/Task.mdx @@ -0,0 +1,88 @@ +--- +title: Task +icon: code +--- + +```go +type Task interface{} +``` + +Base interface for Tilebox workflows [tasks](/workflows/concepts/tasks). +It doesn't need to be identifiable or executable, but it can be both (see below). + +## Methods + +```go +Task.Execute(ctx context.Context) error +``` + +The entry point for the execution of the task. +If not defined, the task can't be registered with a task runner but can still be submitted. + +```go +Task.Identifier() TaskIdentifier +``` + +Provides a user-defined task identifier. +The identifier is used to uniquely identify the task and specify its version. +If not defined, the task runner will generate an identifier for it using reflection. + +## JSON-serializable task + +```go +type SampleTask struct { + Message string + Depth int + BranchFactor int +} +``` + +Optional task [input parameters](/workflows/concepts/tasks#input-parameters), defined as struct fields. +Supported types are all types supported by [json.Marshal](https://pkg.go.dev/encoding/json#Marshal). + +## Protobuf-serializable task + +```go +type SampleTask struct { + examplesv1.SpawnWorkflowTreeTask +} +``` + +Task can also be defined as a protobuf message. +An example using task protobuf messages can be found [here](https://github.com/tilebox/tilebox-go/tree/main/examples/sampleworkflow). + + +```go Go +package helloworld + +import ( + "context" + "fmt" + "github.com/tilebox/tilebox-go/workflows/v1" +) + +type MyFirstTask struct{} + +func (t *MyFirstTask) Execute(ctx context.Context) error { + fmt.Println("Hello World!") + return nil +} + +func (t *MyFirstTask) Identifier() workflows.TaskIdentifier { + return workflows.NewTaskIdentifier("tilebox.workflows.MyTask", "v3.2") +} + +type MyFirstParameterizedTask struct { + Name string + Greet bool + Data map[string]string +} + +func (t *MyFirstParameterizedTask) Execute(ctx context.Context) error { + if t.Greet { + fmt.Printf("Hello %s!\n", t.Name) + } + return nil +} +``` + diff --git a/api-reference/go/workflows/TaskRunner.GetRegisteredTask.mdx b/api-reference/go/workflows/TaskRunner.GetRegisteredTask.mdx new file mode 100644 index 0000000..bf5c059 --- /dev/null +++ b/api-reference/go/workflows/TaskRunner.GetRegisteredTask.mdx @@ -0,0 +1,33 @@ +--- +title: TaskRunner.GetRegisteredTask +sidebarTitle: TaskRunner.GetRegiste... +icon: gear-code +--- + +```go +func (*TaskRunner) GetRegisteredTask( + identifier workflows.TaskIdentifier, +) (workflows.ExecutableTask, bool) +``` + +Get the task with the given identifier. + +## Parameters + + + A display name for the cluster + + +## Returns + +The registered task. Returns `false` if not found. + + +```go Go +identifier := workflows.NewTaskIdentifier("my-task", "v1.0") + +task, found := runner.GetRegisteredTask( + identifier, +) +``` + diff --git a/api-reference/go/workflows/TaskRunner.RegisterTasks.mdx b/api-reference/go/workflows/TaskRunner.RegisterTasks.mdx new file mode 100644 index 0000000..190248e --- /dev/null +++ b/api-reference/go/workflows/TaskRunner.RegisterTasks.mdx @@ -0,0 +1,29 @@ +--- +title: TaskRunner.RegisterTasks +icon: gear-code +--- + +```go +func (*TaskRunner) RegisterTasks(tasks ...workflows.ExecutableTask) error +``` + +Register tasks that can be executed by this task runner. + +## Parameters + + + A list of task classes that this runner can execute + + +## Returns + +An error if the tasks could not be registered. + + +```go Go +err := runner.RegisterTasks( + &MyTask{}, + &MyOtherTask{}, +) +``` + diff --git a/api-reference/go/workflows/TaskRunner.Run.mdx b/api-reference/go/workflows/TaskRunner.Run.mdx new file mode 100644 index 0000000..d9b2490 --- /dev/null +++ b/api-reference/go/workflows/TaskRunner.Run.mdx @@ -0,0 +1,16 @@ +--- +title: TaskRunner.Run +icon: gear-code +--- + +```go +func (*TaskRunner) Run(ctx context.Context) +``` + +Run the task runner forever, looking for new tasks to run and polling for new tasks when idle. + + +```go Go +runner.Run(ctx) +``` + diff --git a/api-reference/go/workflows/WithTaskSpan.mdx b/api-reference/go/workflows/WithTaskSpan.mdx new file mode 100644 index 0000000..b4f5361 --- /dev/null +++ b/api-reference/go/workflows/WithTaskSpan.mdx @@ -0,0 +1,46 @@ +--- +title: workflows.WithTaskSpan +sidebarTitle: WithTaskSpan +icon: code +--- + +```go +workflows.WithTaskSpan( + ctx context.Context, + name string, + f func(ctx context.Context) error, +) error +``` + +Wrap a function with a [tracing span](/workflows/observability/tracing). + +## Parameters + + + The name of the span + + + The function to wrap + + +## Returns + +An error if any. + + +```go Go +type Task struct{} + +func (t *Task) Execute(ctx context.Context) error { + err := workflows.WithTaskSpan(ctx, "Database insert", func(ctx context.Context) error { + // Do something + return nil + }) + if err != nil { + return fmt.Errorf("failed to insert into database: %w", err) + } + + return nil +} +``` + diff --git a/api-reference/go/workflows/WithTaskSpanResult.mdx b/api-reference/go/workflows/WithTaskSpanResult.mdx new file mode 100644 index 0000000..5d4fd37 --- /dev/null +++ b/api-reference/go/workflows/WithTaskSpanResult.mdx @@ -0,0 +1,45 @@ +--- +title: workflows.WithTaskSpanResult +sidebarTitle: WithTaskSpanResult +icon: code +--- + +```go +workflows.WithTaskSpanResult[Result any]( + ctx context.Context, + name string, + f func(ctx context.Context) (Result, error), +) (Result, error) +``` + +Wrap a function with a [tracing span](/workflows/observability/tracing). + +## Parameters + + + The name of the span + + + The function to wrap + + +## Returns + +The result of the function and an error if any. + + +```go Go +type Task struct{} + +func (t *Task) Execute(ctx context.Context) error { + result, err := workflows.WithTaskSpanResult(ctx, "Expensive Compute", func(ctx context.Context) (int, error) { + return 6 * 7, nil + }) + if err != nil { + return fmt.Errorf("failed to compute: %w", err) + } + + return nil +} +``` + diff --git a/api-reference/tilebox.datasets/Client.dataset.mdx b/api-reference/python/tilebox.datasets/Client.dataset.mdx similarity index 100% rename from api-reference/tilebox.datasets/Client.dataset.mdx rename to api-reference/python/tilebox.datasets/Client.dataset.mdx diff --git a/api-reference/tilebox.datasets/Client.datasets.mdx b/api-reference/python/tilebox.datasets/Client.datasets.mdx similarity index 100% rename from api-reference/tilebox.datasets/Client.datasets.mdx rename to api-reference/python/tilebox.datasets/Client.datasets.mdx diff --git a/api-reference/tilebox.datasets/Client.mdx b/api-reference/python/tilebox.datasets/Client.mdx similarity index 100% rename from api-reference/tilebox.datasets/Client.mdx rename to api-reference/python/tilebox.datasets/Client.mdx diff --git a/api-reference/tilebox.datasets/Collection.delete.mdx b/api-reference/python/tilebox.datasets/Collection.delete.mdx similarity index 89% rename from api-reference/tilebox.datasets/Collection.delete.mdx rename to api-reference/python/tilebox.datasets/Collection.delete.mdx index f70615b..9923782 100644 --- a/api-reference/tilebox.datasets/Collection.delete.mdx +++ b/api-reference/python/tilebox.datasets/Collection.delete.mdx @@ -7,12 +7,12 @@ icon: layer-group def Collection.delete(datapoints: DatapointIDs) -> int ``` -Delete datapoints from the collection. +Delete data points from the collection. -Datapoints are identified and deleted by their ids. +Data points are identified and deleted by their ids. - You need to have write permission on the collection to be able to delete datapoints. + You need to have write permission on the collection to be able to delete data points. ## Parameters @@ -32,7 +32,7 @@ Datapoints are identified and deleted by their ids. ## Returns -The number of datapoints that were deleted. +The number of data points that were deleted. ```python Python diff --git a/api-reference/tilebox.datasets/Collection.find.mdx b/api-reference/python/tilebox.datasets/Collection.find.mdx similarity index 89% rename from api-reference/tilebox.datasets/Collection.find.mdx rename to api-reference/python/tilebox.datasets/Collection.find.mdx index 502df97..3c8e39b 100644 --- a/api-reference/tilebox.datasets/Collection.find.mdx +++ b/api-reference/python/tilebox.datasets/Collection.find.mdx @@ -26,6 +26,8 @@ Find a specific datapoint in a collection by its id. An [`xarray.Dataset`](/sdks/python/xarray) containing the requested data point. +Since it returns only a single data point, the output xarray dataset does not include a `time` dimension. + ## Errors diff --git a/api-reference/tilebox.datasets/Collection.info.mdx b/api-reference/python/tilebox.datasets/Collection.info.mdx similarity index 80% rename from api-reference/tilebox.datasets/Collection.info.mdx rename to api-reference/python/tilebox.datasets/Collection.info.mdx index 3028d52..7a79924 100644 --- a/api-reference/tilebox.datasets/Collection.info.mdx +++ b/api-reference/python/tilebox.datasets/Collection.info.mdx @@ -7,7 +7,7 @@ icon: layer-group def Collection.info() -> CollectionInfo ``` -Fetch metadata about the datapoints in this collection. +Fetch metadata about the data points in this collection. ## Returns diff --git a/api-reference/tilebox.datasets/Collection.ingest.mdx b/api-reference/python/tilebox.datasets/Collection.ingest.mdx similarity index 76% rename from api-reference/tilebox.datasets/Collection.ingest.mdx rename to api-reference/python/tilebox.datasets/Collection.ingest.mdx index 4b769ba..03b655c 100644 --- a/api-reference/tilebox.datasets/Collection.ingest.mdx +++ b/api-reference/python/tilebox.datasets/Collection.ingest.mdx @@ -13,7 +13,7 @@ def Collection.ingest( Ingest data into a collection. - You need to have write permission on the collection to be able to delete datapoints. + You need to have write permission on the collection to be able to delete data points. ## Parameters @@ -24,20 +24,20 @@ Ingest data into a collection. Supported `IngestionData` data types are: - A `pandas.DataFrame`, mapping the column names to dataset fields. - An `xarray.Dataset`, mapping variables and coordinates to dataset fields. - - `Iterable`, `dict` or `nd-array`: Ingest any object that can be converted to a `pandas.DataFrame` using + - An `Iterable`, `dict` or `nd-array`: ingest any object that can be converted to a `pandas.DataFrame` using it's constructor, equivalent to `ingest(pd.DataFrame(data))`. Datapoint fields are used to generate a deterministic unique `UUID` for each - datapoint in a collection. Duplicate datapoints result in the same ID being generated. - If `allow_existing` is `True`, `ingest` will skip those datapoints, since they already exist. + datapoint in a collection. Duplicate data points result in the same ID being generated. + If `allow_existing` is `True`, `ingest` will skip those data points, since they already exist. If `allow_existing` is `False`, `ingest` will raise an error if any of the generated datapoint IDs already exist. Defaults to `True`. ## Returns -List of datapoint ids that were ingested, including the IDs of already existing datapoints in case of duplicates and +List of datapoint ids that were ingested, including the IDs of existing data points in case of duplicates and `allow_existing=True`. diff --git a/api-reference/tilebox.datasets/Collection.load.mdx b/api-reference/python/tilebox.datasets/Collection.load.mdx similarity index 100% rename from api-reference/tilebox.datasets/Collection.load.mdx rename to api-reference/python/tilebox.datasets/Collection.load.mdx diff --git a/api-reference/tilebox.datasets/Dataset.collection.mdx b/api-reference/python/tilebox.datasets/Dataset.collection.mdx similarity index 100% rename from api-reference/tilebox.datasets/Dataset.collection.mdx rename to api-reference/python/tilebox.datasets/Dataset.collection.mdx diff --git a/api-reference/tilebox.datasets/Dataset.collections.mdx b/api-reference/python/tilebox.datasets/Dataset.collections.mdx similarity index 100% rename from api-reference/tilebox.datasets/Dataset.collections.mdx rename to api-reference/python/tilebox.datasets/Dataset.collections.mdx diff --git a/api-reference/tilebox.datasets/Dataset.create_collection.mdx b/api-reference/python/tilebox.datasets/Dataset.create_collection.mdx similarity index 100% rename from api-reference/tilebox.datasets/Dataset.create_collection.mdx rename to api-reference/python/tilebox.datasets/Dataset.create_collection.mdx diff --git a/api-reference/tilebox.datasets/Dataset.get_or_create_collection.mdx b/api-reference/python/tilebox.datasets/Dataset.get_or_create_collection.mdx similarity index 100% rename from api-reference/tilebox.datasets/Dataset.get_or_create_collection.mdx rename to api-reference/python/tilebox.datasets/Dataset.get_or_create_collection.mdx diff --git a/api-reference/tilebox.workflows/Client.mdx b/api-reference/python/tilebox.workflows/Client.mdx similarity index 92% rename from api-reference/tilebox.workflows/Client.mdx rename to api-reference/python/tilebox.workflows/Client.mdx index 873c7e0..f8902f1 100644 --- a/api-reference/tilebox.workflows/Client.mdx +++ b/api-reference/python/tilebox.workflows/Client.mdx @@ -47,7 +47,7 @@ A client for scheduling automations. def Client.runner(...) -> TaskRunner ``` -A client is also used to instantiate task runners. Check out the [`Client.runner` API reference](/api-reference/tilebox.workflows/Client.runner) for more information. +A client is also used to instantiate task runners. Check out the [`Client.runner` API reference](/api-reference/python/tilebox.workflows/Client.runner) for more information. ```python Python diff --git a/api-reference/tilebox.workflows/Client.runner.mdx b/api-reference/python/tilebox.workflows/Client.runner.mdx similarity index 100% rename from api-reference/tilebox.workflows/Client.runner.mdx rename to api-reference/python/tilebox.workflows/Client.runner.mdx diff --git a/api-reference/tilebox.workflows/ExecutionContext.job_cache.mdx b/api-reference/python/tilebox.workflows/ExecutionContext.job_cache.mdx similarity index 100% rename from api-reference/tilebox.workflows/ExecutionContext.job_cache.mdx rename to api-reference/python/tilebox.workflows/ExecutionContext.job_cache.mdx diff --git a/api-reference/tilebox.workflows/ExecutionContext.submit_subtask.mdx b/api-reference/python/tilebox.workflows/ExecutionContext.submit_subtask.mdx similarity index 100% rename from api-reference/tilebox.workflows/ExecutionContext.submit_subtask.mdx rename to api-reference/python/tilebox.workflows/ExecutionContext.submit_subtask.mdx diff --git a/api-reference/tilebox.workflows/JobCache.__iter__.mdx b/api-reference/python/tilebox.workflows/JobCache.__iter__.mdx similarity index 100% rename from api-reference/tilebox.workflows/JobCache.__iter__.mdx rename to api-reference/python/tilebox.workflows/JobCache.__iter__.mdx diff --git a/api-reference/tilebox.workflows/JobCache.group.mdx b/api-reference/python/tilebox.workflows/JobCache.group.mdx similarity index 100% rename from api-reference/tilebox.workflows/JobCache.group.mdx rename to api-reference/python/tilebox.workflows/JobCache.group.mdx diff --git a/api-reference/tilebox.workflows/JobClient.cancel.mdx b/api-reference/python/tilebox.workflows/JobClient.cancel.mdx similarity index 100% rename from api-reference/tilebox.workflows/JobClient.cancel.mdx rename to api-reference/python/tilebox.workflows/JobClient.cancel.mdx diff --git a/api-reference/tilebox.workflows/JobClient.retry.mdx b/api-reference/python/tilebox.workflows/JobClient.retry.mdx similarity index 100% rename from api-reference/tilebox.workflows/JobClient.retry.mdx rename to api-reference/python/tilebox.workflows/JobClient.retry.mdx diff --git a/api-reference/tilebox.workflows/JobClient.submit.mdx b/api-reference/python/tilebox.workflows/JobClient.submit.mdx similarity index 98% rename from api-reference/tilebox.workflows/JobClient.submit.mdx rename to api-reference/python/tilebox.workflows/JobClient.submit.mdx index 7a927d5..1c5039c 100644 --- a/api-reference/tilebox.workflows/JobClient.submit.mdx +++ b/api-reference/python/tilebox.workflows/JobClient.submit.mdx @@ -12,6 +12,8 @@ def JobClient.submit( ) -> Job ``` +Submit a job. + ## Parameters diff --git a/api-reference/tilebox.workflows/JobClient.visualize.mdx b/api-reference/python/tilebox.workflows/JobClient.visualize.mdx similarity index 100% rename from api-reference/tilebox.workflows/JobClient.visualize.mdx rename to api-reference/python/tilebox.workflows/JobClient.visualize.mdx diff --git a/api-reference/tilebox.workflows/Task.mdx b/api-reference/python/tilebox.workflows/Task.mdx similarity index 100% rename from api-reference/tilebox.workflows/Task.mdx rename to api-reference/python/tilebox.workflows/Task.mdx diff --git a/api-reference/tilebox.workflows/TaskRunner.run_all.mdx b/api-reference/python/tilebox.workflows/TaskRunner.run_all.mdx similarity index 100% rename from api-reference/tilebox.workflows/TaskRunner.run_all.mdx rename to api-reference/python/tilebox.workflows/TaskRunner.run_all.mdx diff --git a/api-reference/tilebox.workflows/TaskRunner.run_forever.mdx b/api-reference/python/tilebox.workflows/TaskRunner.run_forever.mdx similarity index 100% rename from api-reference/tilebox.workflows/TaskRunner.run_forever.mdx rename to api-reference/python/tilebox.workflows/TaskRunner.run_forever.mdx diff --git a/authentication.mdx b/authentication.mdx index bb3f587..37c59be 100644 --- a/authentication.mdx +++ b/authentication.mdx @@ -24,6 +24,15 @@ from tilebox.workflows import Client as WorkflowsClient datasets_client = DatasetsClient(token="YOUR_TILEBOX_API_KEY") workflows_client = WorkflowsClient(token="YOUR_TILEBOX_API_KEY") ``` +```go Go +import ( + "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/workflows/v1" +) + +datasetsClient := datasets.NewClient(datasets.WithAPIKey("YOUR_TILEBOX_API_KEY")) +workflowsClient := workflows.NewClient(workflows.WithAPIKey("YOUR_TILEBOX_API_KEY")) +``` diff --git a/changelog.mdx b/changelog.mdx index a64508d..54f2e8d 100644 --- a/changelog.mdx +++ b/changelog.mdx @@ -4,7 +4,6 @@ description: New updates and improvements icon: rss --- - ## Custom Datasets diff --git a/console.mdx b/console.mdx index 1109aad..952540b 100644 --- a/console.mdx +++ b/console.mdx @@ -26,17 +26,46 @@ When you click a specific event time in the data point list view, a detailed vie After selecting a dataset, collection, and time range, you can export the current selection as a Python code snippet. This will copy a code snippet like the one below to your clipboard. + ```python Python from tilebox.datasets import Client client = Client() datasets = client.datasets() sentinel2_msi = datasets.open_data.copernicus.sentinel2_msi -data = sentinel2_msi.collection("S2A_S2MSI1C").load( - ("2024-07-12", "2024-07-26"), +data = sentinel2_msi.collection("S2A_S2MSI1C").query( + temporal_extent=("2024-07-12", "2024-07-26"), show_progress=True, ) ``` +```go Go +ctx := context.Background() +client := datasets.NewClient() + +dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.sentinel2_msi") +if err != nil { + log.Fatalf("Failed to get dataset: %v", err) +} + +collection, err := client.Collections.Get(ctx, dataset.ID, "S2A_S2MSI1C") +if err != nil { + log.Fatalf("Failed to get collection: %v", err) +} + +startDate := time.Date(2024, 7, 12, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2024, 7, 26, 0, 0, 0, 0, time.UTC) +timeInterval := query.NewTimeInterval(startDate, endDate) + +var datapoints []*v1.Sentinel2Msi +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapoints, + datasets.WithTemporalExtent(timeInterval), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} +``` + Paste the snippet into a [notebook](/sdks/python/sample-notebooks) to interactively explore the diff --git a/datasets/concepts/collections.mdx b/datasets/concepts/collections.mdx index 87201be..a69a25e 100644 --- a/datasets/concepts/collections.mdx +++ b/datasets/concepts/collections.mdx @@ -4,21 +4,11 @@ description: Learn about dataset collections icon: layer-group --- -Collections group data points within a dataset. They help represent logical groupings of data points that are commonly queried together. For example, if your dataset includes data from a specific instrument on different satellites, you can group the data points from each satellite into a collection. +Collections group data points within a dataset. They help represent logical groupings of data points that are commonly queried together. +For example, if your dataset includes data from a specific instrument on different satellites, you can group the data points from each satellite into a collection. -## Overview - -This section provides a quick overview of the API for listing and accessing collections. Below are some usage examples for different scenarios. - -| Method | Description | -| -------------------------------------------------------------------- | ---------------------------------------------------- | -| [`dataset.collections`](/api-reference/tilebox.datasets/Dataset.collections) | List all available collections for a dataset. | -| [`dataset.create_collection`](/api-reference/tilebox.datasets/Dataset.create_collection) | Create a collection in a dataset. | -| [`dataset.get_or_create_collection`](/api-reference/tilebox.datasets/Dataset.get_or_create_collection) | Get a collection, create it if it doesn't exist. | -| [`dataset.collection`](/api-reference/tilebox.datasets/Dataset.collection) | Access an individual collection by its name. | -| [`collection.info`](/api-reference/tilebox.datasets/Collection.info) | Request information about a collection. | - -Refer to the examples below for common use cases when working with collections. These examples assume that you have already [created a client](/datasets/introduction#creating-a-datasets-client) and [listed the available datasets](/api-reference/tilebox.datasets/Client.datasets). +Refer to the examples below for common use cases when working with collections. +These examples assume that you have already created a client and selected a dataset as shown below. ```python Python @@ -26,6 +16,27 @@ from tilebox.datasets import Client client = Client() datasets = client.datasets() +dataset = datasets.open_data.copernicus.landsat8_oli_tirs +``` +```go Go +package main + +import ( + "context" + "log" + + "github.com/tilebox/tilebox-go/datasets/v1" +) + +func main() { + ctx := context.Background() + client := datasets.NewClient() + + dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.landsat8_oli_tirs") + if err != nil { + log.Fatalf("Failed to get dataset: %v", err) + } +} ``` @@ -35,10 +46,17 @@ To list the collections for a dataset, use the `collections` method on the datas ```python Python -dataset = datasets.open_data.copernicus.landsat8_oli_tirs collections = dataset.collections() print(collections) ``` +```go Go +collections, err := client.Collections.List(ctx, dataset.ID) +if err != nil { + log.Fatalf("Failed to list collections: %v", err) +} + +log.Println(collections) +``` ```plaintext Output @@ -48,29 +66,35 @@ print(collections) 'L2SP': Collection L2SP: [2015-01-01T07:53:35.391 UTC, 2024-08-12T12:52:03.243 UTC] (191110 data points)} ``` -[dataset.collections](/api-reference/tilebox.datasets/Dataset.collections) returns a dictionary mapping collection names to their corresponding collection objects. Each collection has a unique name within its dataset. +[dataset.collections](/api-reference/python/tilebox.datasets/Dataset.collections) returns a dictionary mapping collection names to their corresponding collection objects. Each collection has a unique name within its dataset. ## Creating collections -To create a collection in a dataset, use [dataset.create_collection()](/api-reference/tilebox.datasets/Dataset.create_collection). This method returns the created collection object. +To create a collection in a dataset, use [dataset.create_collection()](/api-reference/python/tilebox.datasets/Dataset.create_collection). This method returns the created collection object. ```python Python collection = dataset.create_collection("My-collection") ``` +```go Go +collection, err := client.Collections.Create(ctx, dataset.ID, "My-collection") +``` -Alternatively, you can use [dataset.get_or_create_collection()](/api-reference/tilebox.datasets/Dataset.get_or_create_collection) to get a collection by its name. If the collection does not exist, it will be created. +You can use [dataset.get_or_create_collection()](/api-reference/python/tilebox.datasets/Dataset.get_or_create_collection) to get a collection by its name. If the collection does not exist, it will be created. ```python Python collection = dataset.get_or_create_collection("My-collection") ``` +```go Go +collection, err := client.Collections.GetOrCreate(ctx, dataset.ID, "My-collection") +``` ## Accessing individual collections -Once you have listed the collections for a dataset using [dataset.collections()](/api-reference/tilebox.datasets/Dataset.collections), you can access a specific collection by retrieving it from the resulting dictionary with its name. Use [collection.info()](/api-reference/tilebox.datasets/Collection.info) to get details (name, availability, and count) about it. +Once you have listed the collections for a dataset using [dataset.collections()](/api-reference/python/tilebox.datasets/Dataset.collections), you can access a specific collection by retrieving it from the resulting dictionary with its name. Use [collection.info()](/api-reference/python/tilebox.datasets/Collection.info) in Python or `String()` in Go to get details (name, availability, and count) about it. ```python Python @@ -79,13 +103,26 @@ terrain_correction = collections["L1GT"] collection_info = terrain_correction.info() print(collection_info) ``` +```go Go +dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.landsat8_oli_tirs") +if err != nil { + log.Fatalf("Failed to get dataset: %v", err) +} + +collection, err := client.Collections.Get(ctx, dataset.ID, "L1GT") +if err != nil { + log.Fatalf("Failed to get collection: %v", err) +} + +log.Println(collection.String()) +``` ```plaintext Output L1GT: [2013-03-25T12:08:43.699 UTC, 2024-08-19T12:57:32.456 UTC] (154288 data points) ``` -You can also access a specific collection directly using the [dataset.collection](/api-reference/tilebox.datasets/Dataset.collection) method on the dataset object. This method allows you to get the collection without having to list all collections first. +You can also access a specific collection directly using the [dataset.collection](/api-reference/python/tilebox.datasets/Dataset.collection) method on the dataset object. This method allows you to get the collection without having to list all collections first. ```python Python @@ -93,6 +130,19 @@ terrain_correction = dataset.collection("L1GT") collection_info = terrain_correction.info() print(collection_info) ``` +```go Go +dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.landsat8_oli_tirs") +if err != nil { + log.Fatalf("Failed to get dataset: %v", err) +} + +collection, err := client.Collections.Get(ctx, dataset.ID, "L1GT") +if err != nil { + log.Fatalf("Failed to get collection: %v", err) +} + +log.Println(collection.String()) +``` ```plaintext Output @@ -109,6 +159,12 @@ If you attempt to access a collection with a non-existent name, a `NotFoundError ```python Python dataset.collection("Sat-X").info() # raises NotFoundError: 'No such collection Sat-X' ``` +```go Go +collection, err := client.Collections.Get(ctx, dataset.ID, "Sat-X") +if err != nil { + log.Fatal(err) // prints 'failed to get collections: not_found: no such collection' +} +``` ## Next steps diff --git a/datasets/concepts/datasets.mdx b/datasets/concepts/datasets.mdx index 3dfad9b..c94b89b 100644 --- a/datasets/concepts/datasets.mdx +++ b/datasets/concepts/datasets.mdx @@ -4,21 +4,12 @@ description: Tilebox Datasets act as containers for data points. All data points icon: database --- -## Overview - -This section provides a quick overview of the API for listing and accessing datasets. - -| Method | Description | -| -------------------------------------------------------------------- | ---------------------------------------------------- | -| [`client.datasets`](/api-reference/tilebox.datasets/Client.datasets) | List all available datasets. | -| [`client.dataset`](/api-reference/tilebox.datasets/Client.dataset) | Access an individual dataset by its name. | - - You can create your own, custom datasets via the [Tilebox Console](/console). + You can create your own, Custom Datasets via the [Tilebox Console](/console). ## Related Guides - + Learn how to create a Timeseries dataset using the Tilebox Console. @@ -31,7 +22,7 @@ This section provides a quick overview of the API for listing and accessing data ## Dataset types Each dataset is of a specific type. Each dataset type comes with a set of required fields for each data point. -The dataset type also determines the query capabilities for a dataset, e.g. whether a dataset supports time-based queries +The dataset type also determines the query capabilities for a dataset, for example, whether a dataset supports time-based queries or additionally also spatially filtered queries. To find out which fields are required for each dataset type check out the documentation for the available dataset types @@ -56,17 +47,16 @@ The required fields of the dataset type, as well as the custom fields specific t **dataset schema**. Once a **dataset schema** is defined, existing fields cannot be removed or edited as soon as data has been ingested into it. -However, you can always add new fields to a dataset, since all fields are always optional. +You can always add new fields to a dataset, since all fields are always optional. The only exception to this rule are empty datasets. If you empty all collections in a dataset, you can freely edit the data schema, since no conflicts with existing data points can occur. - ## Field types -When defining the data schema, you can specify the type of each field. The following field types are supported. +When defining the data schema, you can specify each field's type. The following field types are supported. ### Primitives @@ -118,6 +108,31 @@ client = Client() datasets = client.datasets() print(datasets) ``` +```go Go +package main + +import ( + "context" + "log" + + "github.com/tilebox/tilebox-go/datasets/v1" +) + +func main() { + ctx := context.Background() + + client := datasets.NewClient() + + allDatasets, err := client.Datasets.List(ctx) + if err != nil { + log.Fatalf("Failed to list datasets: %v", err) + } + + for _, dataset := range allDatasets { + log.Println(dataset) + } +} +``` ```plaintext Output @@ -135,15 +150,15 @@ open_data: Once you have your dataset object, you can use it to [list the available collections](/datasets/concepts/collections) for the dataset. - If you're using an IDE or an interactive environment with auto-complete, you can use it on your client instance to discover the datasets available to you. Type `client.` and trigger auto-complete after the dot to do so. + In python, if you're using an IDE or an interactive environment with auto-complete, you can use it on your client instance to discover the datasets available to you. Type `client.` and trigger auto-complete after the dot to do so. ## Accessing a dataset -Each dataset has an automatically generated *code name* that can be used to access it. The *code name* is the name of the group, followed by a dot, followed by the dataset name. -For example, the *code name* for the Sentinel-2 MSI dataset above, which is part of the `open_data.copernicus` group, the code name is `open_data.copernicus.sentinel2_msi`. +Each dataset has an automatically generated *slug* that can be used to access it. The *slug* is the name of the group, followed by a dot, followed by the dataset *code name*. +For example, the *slug* for the Sentinel-2 MSI dataset, which is part of the `open_data.copernicus` group, is `open_data.copernicus.sentinel2_msi`. -To access a dataset, use the `dataset` method of your client instance and pass the *code name* of the dataset as an argument. +To access a dataset, use the `dataset` method of your client instance and pass the *slug* of the dataset as an argument. ```python Python @@ -152,7 +167,9 @@ from tilebox.datasets import Client client = Client() s2_msi_dataset = client.dataset("open_data.copernicus.sentinel2_msi") ``` +```go Go +s2MsiDataset, err := client.Datasets.Get(ctx, "open_data.copernicus.sentinel2_msi") +``` Once you have your dataset object, you can use it to [access available collections](/datasets/concepts/collections) for the dataset. - diff --git a/datasets/delete.mdx b/datasets/delete.mdx index 01e1fed..43b8e26 100644 --- a/datasets/delete.mdx +++ b/datasets/delete.mdx @@ -7,14 +7,6 @@ icon: trash-can import { CodeOutputHeader } from '/snippets/components.mdx'; -## Overview - -This section provides an overview of the API for deleting data from a collection. - -| Method | Description | -| ------ | ----------- | -| [`collection.delete`](/api-reference/tilebox.datasets/Collection.delete) | Delete data points from a collection. | - You need to have write permission on the collection to be able to delete datapoints. @@ -23,7 +15,7 @@ Check out the examples below for common scenarios of deleting data from a collec ## Deleting data by datapoint IDs -To delete data from a collection, use the [delete](/api-reference/tilebox.datasets/Collection.delete) method. This method accepts a list of datapoint IDs to delete. +To delete data from a collection, use the [delete](/api-reference/python/tilebox.datasets/Collection.delete) method. This method accepts a list of datapoint IDs to delete. ```python Python @@ -41,6 +33,44 @@ n_deleted = collection.delete([ print(f"Deleted {n_deleted} data points.") ``` +```go Go +package main + +import ( + "context" + "log" + "log/slog" + + "github.com/google/uuid" + "github.com/tilebox/tilebox-go/datasets/v1" +) + +func main() { + ctx := context.Background() + + client := datasets.NewClient() + + dataset, err := client.Datasets.Get(ctx, "my_custom_dataset") + if err != nil { + log.Fatalf("Failed to get dataset: %v", err) + } + + collection, err := client.Collections.Get(ctx, dataset.ID, "Sensor-1") + if err != nil { + log.Fatalf("Failed to create collection: %v", err) + } + + datapointIDs := []uuid.UUID{ + uuid.MustParse("0195c87a-49f6-5ffa-e3cb-92215d057ea6"), + uuid.MustParse("0195c87b-bd0e-3998-05cf-af6538f34957"), + } + numDeleted, err := client.Datapoints.DeleteIDs(ctx, collection.ID, datapointIDs) + if err != nil { + log.Fatalf("Failed to delete datapoints: %v", err) + } + slog.Info("Deleted data points", slog.Int64("deleted", numDeleted)) +} +``` @@ -50,23 +80,21 @@ Deleted 2 data points. - `delete` not only takes a list of datapoint IDs as string, but supports a wide range of other useful input types as well. - See the [delete](/api-reference/tilebox.datasets/Collection.delete) API documentation for more details. + In python, `delete` not only takes a list of datapoint IDs as string, but supports a wide range of other useful input types as well. + See the [delete](/api-reference/python/tilebox.datasets/Collection.delete) API documentation for more details. - ### Possible errors - `NotFoundError`: raised if one of the data points is not found in the collection. If any of the data points are not found, - nothing will be deleted. + nothing will be deleted - `ValueError`: raised if one of the specified ids is not a valid UUID - ## Deleting a time interval One common way to delete data is to first load it from a collection and then forward it to the `delete` method. For this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid loading the data fields, -since we only need the datapoint IDs. See [fetching only metadata](/datasets/query#fetching-only-metadata) for more details. +since you only need the datapoint IDs. See [fetching only metadata](/datasets/query#fetching-only-metadata) for more details. ```python Python @@ -75,6 +103,28 @@ to_delete = collection.load(("2023-05-01", "2023-06-01"), skip_data=True) n_deleted = collection.delete(datapoints) print(f"Deleted {n_deleted} data points.") ``` +```go Go +collectionID := uuid.MustParse("c5145c99-1843-4816-9221-970f9ce3ac93") +startDate := time.Date(2023, time.May, 1, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2023, time.June, 1, 0, 0, 0, 0, time.UTC) +mai2023 := query.NewTimeInterval(startDate, endDate) + +var toDelete []*v1.Sentinel2Msi +err := client.Datapoints.QueryInto(ctx, + []uuid.UUID{collectionID}, &toDelete, + datasets.WithTemporalExtent(mai2023), + datasets.WithSkipData(), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} + +numDeleted, err := client.Datapoints.Delete(ctx, collectionID, toDelete) +if err != nil { + log.Fatalf("Failed to delete datapoints: %v", err) +} +slog.Info("Deleted data points", slog.Int64("deleted", numDeleted)) +``` @@ -82,3 +132,7 @@ print(f"Deleted {n_deleted} data points.") Deleted 104 data points. ``` + +## Automatic batching + +Tilebox automatically batches the delete requests for you, so you don't have to worry about the maximum request size. diff --git a/datasets/ingest.mdx b/datasets/ingest.mdx index 84712f2..76126c6 100644 --- a/datasets/ingest.mdx +++ b/datasets/ingest.mdx @@ -7,14 +7,6 @@ icon: up-from-bracket import { CodeOutputHeader } from '/snippets/components.mdx'; -## Overview - -This section provides an overview of the API for ingesting and deleting data from a collection. It includes usage examples for many common scenarios. - -| Method | Description | -| ----------------------- | ---------------------------------------------------- | -| [`collection.ingest`](/api-reference/tilebox.datasets/Collection.ingest) | Ingest data into a collection. | - You need to have write permission on the collection to be able to ingest data. @@ -23,7 +15,7 @@ Check out the examples below for common scenarios of ingesting data into a colle ## Dataset schema -Tilebox Datasets are strongly-typed. This means you can only ingest data that matches the schema of a dataset. The schema is defined during dataset creation time. +Tilebox Datasets are strongly typed. This means you can only ingest data that matches the schema of a dataset. The schema is defined during dataset creation time. The examples on this page assume that you have access to a [Timeseries dataset](/datasets/types/timeseries) that has the following schema: @@ -49,7 +41,7 @@ The examples on this page assume that you have access to a [Timeseries dataset]( -Once we've defined the schema and created a dataset, we can access it and create a collection to ingest data into. +Once you've defined the schema and created a dataset, you can access it and create a collection to ingest data into. ```python Python @@ -63,13 +55,13 @@ collection = dataset.get_or_create_collection("Measurements") ## Preparing data for ingestion -[`collection.ingest`](/api-reference/tilebox.datasets/Collection.ingest) supports a wide range of input types. Below is an example of using either a `pandas.DataFrame` or an `xarray.Dataset` as input. +[`collection.ingest`](/api-reference/python/tilebox.datasets/Collection.ingest) supports a wide range of input types. Below is an example of using either a `pandas.DataFrame` or an `xarray.Dataset` as input. ### pandas.DataFrame -A [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) is a representation of two-dimensional, potentially heterogeneous tabular data. It is a powerful tool for working with structured data, and Tilebox supports it as input for `ingest`. +A [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) is a representation of two-dimensional, potentially heterogeneous tabular data. It's a powerful tool for working with structured data, and Tilebox supports it as input for `ingest`. -The example below shows how to construct a `pandas.DataFrame` from scratch, that matches the schema of the `MyCustomDataset` dataset and can therefore be ingested into it. +The example below shows how to construct a `pandas.DataFrame` from scratch, that matches the schema of the `MyCustomDataset` dataset and can be ingested into it. ```python Python @@ -103,7 +95,7 @@ print(data) ``` -Once we have the data ready in this format, we can `ingest` it into a collection. +Once you have the data ready in this format, you can `ingest` it into a collection. ```python Python @@ -128,11 +120,11 @@ Measurements: [2025-03-28T11:44:23.000 UTC, 2025-03-28T11:45:19.000 UTC] (2 data ### xarray.Dataset -[xarray.Dataset](/sdks/python/xarray) is the default format in which Tilebox Datasets returns data when +[`xarray.Dataset`](/sdks/python/xarray) is the default format in which Tilebox Datasets returns data when [querying data](/datasets/query) from a collection. Tilebox also supports it as input for ingestion. The example below shows how to construct an `xarray.Dataset` -from scratch, that matches the schema of the `MyCustomDataset` dataset and can therefore be ingested into it. -To learn more about `xarray.Dataset`, visit our dedicated [Xarray documentation page](/sdks/python/xarray). +from scratch, that matches the schema of the `MyCustomDataset` dataset and can then be ingested into it. +To learn more about `xarray.Dataset`, visit Tilebox dedicated [Xarray documentation page](/sdks/python/xarray). ```python Python @@ -174,14 +166,14 @@ Data variables: - Array fields manifest in xarray using an extra dimension, in this case `n_sensor_history`. Therefore in case + Array fields manifest in xarray using an extra dimension, in this case `n_sensor_history`. In case of different array sizes for each data point, remaining values are filled up with a fill value, depending on the `dtype` of the array. For `float64` this is `np.nan` (not a number). Don't worry - when ingesting data into a Tilebox dataset, Tilebox will automatically skip those padding fill values and not store them in the dataset. -Now that we have the `xarray.Dataset` in the correct format, we can ingest it into the Tilebox dataset collection. +Now that you have the `xarray.Dataset` in the correct format, you can ingest it into the Tilebox dataset collection. ```python Python @@ -199,7 +191,6 @@ OtherMeasurements: [2025-03-28T11:46:13.000 UTC, 2025-03-28T11:46:54.000 UTC] (2 ``` - ## Copying or moving data Since [collection.load](/datasets/query) returns a `xarray.Dataset`, and `ingest` takes such a dataset as input you @@ -228,15 +219,19 @@ OtherMeasurements: [2025-03-28T11:44:23.000 UTC, 2025-03-28T11:46:54.000 UTC] (4 ``` +## Automatic batching + +Tilebox automatically batches the ingestion requests for you, so you don't have to worry about the maximum request size. + ## Idempotency -Tilebox will auto-generate datapoint IDs based on the data of all of its fields - except for the auto-generated +Tilebox will auto-generate datapoint IDs based on the data of all its fields - except for the auto-generated `ingestion_time`, so ingesting the same data twice will result in the same ID being generated. By default, Tilebox will silently skip any data points that are duplicates of existing ones in a collection. This behavior is especially useful when implementing idempotent algorithms. That way, re-executions of certain ingestion tasks due to retries or other reasons will never result in duplicate data points. -However, you can instead also request an error to be raised if any of the generated datapoint IDs already exist. +You can instead also request an error to be raised if any of the generated datapoint IDs already exist. This can be done by setting the `allow_existing` parameter to `False`. @@ -279,10 +274,10 @@ formats, such as CSV, [Parquet](https://parquet.apache.org/), [Feather](https:// ### CSV -Comma-separated values (CSV) is a common file format for tabular data. It is widely used in data science. Tilebox +Comma-separated values (CSV) is a common file format for tabular data. It's widely used in data science. Tilebox supports CSV ingestion using the `pandas.read_csv` function. -Let's assume we have a CSV file named `data.csv` with the following content. If you want to follow along, you can +Assuming you have a CSV file named `data.csv` with the following content. If you want to follow along, you can download the file [here](https://storage.googleapis.com/tbx-web-assets-2bad228/docs/data-samples/ingestion_data.csv). ```csv ingestion_data.csv @@ -292,7 +287,7 @@ time,value,sensor,precise_time,sensor_history,some_unwanted_column ``` This data already conforms to the schema of the `MyCustomDataset` dataset, except for `some_unwanted_column` which -we want to drop before we ingest it. Here is how this could look like: +you want to drop before you ingest it. Here is how this could look like: ```python Python @@ -329,10 +324,10 @@ collection.ingest(data) ### Feather [Feather](https://arrow.apache.org/docs/python/feather.html) is a file format originating from the Apache Arrow project, -designed for storing tabular data in a fast and memory-efficient way. It is supported by many programming languages, +designed for storing tabular data in a fast and memory-efficient way. It's supported by many programming languages, including Python. Tilebox supports Feather ingestion using the `pandas.read_feather` function. -The feather file file used in this example [is available here](https://storage.googleapis.com/tbx-web-assets-2bad228/docs/data-samples/ingestion_data.feather). +The feather file used in this example [is available here](https://storage.googleapis.com/tbx-web-assets-2bad228/docs/data-samples/ingestion_data.feather). ```python Python diff --git a/datasets/introduction.mdx b/datasets/introduction.mdx index 5dd8c4c..36c1174 100644 --- a/datasets/introduction.mdx +++ b/datasets/introduction.mdx @@ -5,9 +5,9 @@ description: Tilebox Datasets provides structured and high-performance satellite icon: house --- -Tilebox Datasets ingests and structures metadata for efficient querying, significantly reducing data transfer and storage costs. +Tilebox Datasets ingests and structures metadata for efficient querying, reducing data transfer and storage costs. -Create your own [custom datasets](/datasets/concepts/datasets) and easily set up a private, custom, strongly-typed and highly-available catalogue, or +Create your own [Custom Datasets](/datasets/concepts/datasets) and easily set up a private, custom, strongly typed and highly available catalogue, or explore any of the wide range of [available public open data datasets](/datasets/open-data) available on Tilebox. Learn more about datasets by exploring the following sections: @@ -28,7 +28,7 @@ Learn more about datasets by exploring the following sections: - For a quick reference to API methods or specific parameter meanings, [check out the complete Datasets API Reference](/api-reference/tilebox.datasets/Client). + For a quick reference to API methods or specific parameter meanings, [check out the complete datasets API Reference](/api-reference/python/tilebox.datasets/Client). ## Terminology @@ -37,7 +37,7 @@ Get familiar with some key terms when working with time series datasets. - Data points are the individual entities that form a dataset. Each data point has a set of required [fields](/datasets/types/timeseries) determined by the dataset type, and can have additional, custom user-defined fields. + Data points are the individual entities that form a dataset. Each data point has a set of required [fields](/datasets/types/timeseries) determined by the dataset type, and can have custom user-defined fields. Datasets act as containers for data points. All data points in a dataset share the same type and fields. Tilebox supports different types of datasets, currently those are [Timeseries](/datasets/types/timeseries) and [Spatio-temporal](/datasets/types/spatiotemporal) datasets. @@ -51,7 +51,7 @@ Get familiar with some key terms when working with time series datasets. Prerequisites -- You have [installed](/sdks/python/install) the `tilebox-datasets` package. +- You have installed the [python](/sdks/python/install) `tilebox-datasets` package or [go](/sdks/go/install) library. - You have [created](/authentication) a Tilebox API key. After meeting these prerequisites, you can create a client instance to interact with Tilebox Datasets. @@ -62,6 +62,15 @@ from tilebox.datasets import Client client = Client(token="YOUR_TILEBOX_API_KEY") ``` +```go Go +import ( + "github.com/tilebox/tilebox-go/datasets/v1" +) + +client := datasets.NewClient( + datasets.WithAPIKey("YOUR_TILEBOX_API_KEY"), +) +``` You can also set the `TILEBOX_API_KEY` environment variable to your API key. You can then instantiate the client without passing the `token` argument. Python will automatically use this environment variable for authentication. @@ -73,6 +82,14 @@ from tilebox.datasets import Client # requires a TILEBOX_API_KEY environment variable client = Client() ``` +```go Go +import ( + "github.com/tilebox/tilebox-go/datasets/v1" +) + +// requires a TILEBOX_API_KEY environment variable +client := datasets.NewClient() +``` @@ -81,13 +98,36 @@ client = Client() ### Exploring datasets -After creating a client instance, you can start exploring available datasets. A straightforward way to do this in an interactive environment is to [list all datasets](/api-reference/tilebox.datasets/Client.datasets) and use the autocomplete feature in your Jupyter notebook. +After creating a client instance, you can start exploring available datasets. A straightforward way to do this in an interactive environment is to [list all datasets](/api-reference/python/tilebox.datasets/Client.datasets) and use the autocomplete feature in your Jupyter notebook. ```python Python datasets = client.datasets() datasets. # trigger autocomplete here to view available datasets ``` +```go Go +package main + +import ( + "context" + "github.com/tilebox/tilebox-go/datasets/v1" + "log" +) + +func main() { + client := datasets.NewClient() + + ctx := context.Background() + allDatasets, err := client.Datasets.List(ctx) + if err != nil { + log.Fatalf("Failed to list datasets: %v", err) + } + + for _, dataset := range allDatasets { + log.Printf("Dataset: %s", dataset.Name) + } +} +``` @@ -105,6 +145,26 @@ datasets. # trigger autocomplete here to view available datasets client = Client(token="invalid-key") # runs without error datasets = client.datasets() # raises AuthenticationError ``` +```go Go +package main + +import ( + "context" + "github.com/tilebox/tilebox-go/datasets/v1" + "log" +) + +func main() { + // runs without error + client := datasets.NewClient(datasets.WithAPIKey("invalid-key")) + + // returns an error + _, err := client.Datasets.List(context.Background()) + if err != nil { + log.Fatalf("Failed to list datasets: %v", err) + } +} +``` ## Next steps diff --git a/datasets/open-data.mdx b/datasets/open-data.mdx index 4524270..07b694f 100644 --- a/datasets/open-data.mdx +++ b/datasets/open-data.mdx @@ -4,11 +4,11 @@ description: Learn about the Open data available in Tilebox. icon: star --- -Tilebox not only provides access to your own, private datasets but also to a growing number of public datasets. These datasets are available to all users of Tilebox and are a great way to get started and prototype your applications even before data from your own satellites is available. +On top of access to your own, private datasets, Tilebox provides access to a growing number of public datasets. +These datasets are available to all users of Tilebox and are a great way to get started and prototype your applications even before data from your own satellites is available. - If there is a public dataset you would like to see in Tilebox, - please get in touch. + If there is a dataset you would like to see in Tilebox, you can request it in the [Console open data page](https://console.tilebox.com/datasets/open-data). ## Accessing Open Data through Tilebox @@ -20,7 +20,7 @@ By using the [datasets API](/datasets), you can start prototyping your applicati ## Available datasets - The Tilebox Console contains in-depth descriptions of each dataset. Check out the [Sentinel 5P Tropomi](https://console.tilebox.com/datasets/explorer/feb2bcc9-8fdf-4714-8a63-395ee9d3f323?view=documentation) documentation as an example. + The Tilebox Console contains in-depth descriptions of each dataset. Check out the [Sentinel 5P Tropomi](https://console.tilebox.com/datasets/explorer/bb394de4-b47f-4069-bc4c-6e6a2c9f0641?view=documentation) documentation as an example. ### Copernicus Data Space diff --git a/datasets/query.mdx b/datasets/query.mdx index 4c2d254..ffd659e 100644 --- a/datasets/query.mdx +++ b/datasets/query.mdx @@ -5,15 +5,6 @@ description: Learn how to query and load data from Tilebox datasets. icon: server --- -## Overview - -This section provides an overview of the API for loading data from a collection. It includes usage examples for many common scenarios. - -| Method | Description | -| -------------------------------------------------------------------- | ---------------------------------------------------- | -| [`collection.load`](/api-reference/tilebox.datasets/Collection.load) | Query data points from a collection. | -| [`collection.find`](/api-reference/tilebox.datasets/Collection.find) | Find a specific datapoint in a collection by its id. | - Check out the examples below for common scenarios when loading data from collections. @@ -25,9 +16,34 @@ datasets = client.datasets() collections = datasets.open_data.copernicus.sentinel1_sar.collections() collection = collections["S1A_IW_RAW__0S"] ``` +```go Go +package main + +import ( + "context" + "log" + + "github.com/tilebox/tilebox-go/datasets/v1" +) + +func main() { + ctx := context.Background() + client := datasets.NewClient() + + dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.sentinel1_sar") + if err != nil { + log.Fatalf("Failed to get dataset: %v", err) + } + + collection, err := client.Collections.Get(ctx, dataset.ID, "S1A_IW_RAW__0S") + if err != nil { + log.Fatalf("Failed to get collection: %v", err) + } +} +``` -To load data points from a dataset collection, use the [load](/api-reference/tilebox.datasets/Collection.load) method. It requires a `time_or_interval` parameter to specify the time or time interval for loading. +To load data points from a dataset collection, use the [load](/api-reference/python/tilebox.datasets/Collection.load) method. It requires a `time_or_interval` parameter to specify the time or time interval for loading. ## Filtering by time @@ -40,9 +56,29 @@ To load data for a specific time interval, use a `tuple` in the form `(start, en interval = ("2017-01-01", "2023-01-01") data = collection.load(interval, show_progress=True) ``` +```go Go +startDate := time.Date(2017, time.January, 1, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC) +interval := query.NewTimeInterval(startDate, endDate) + +var datapoints []*v1.Sentinel1Sar +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, + &datapoints, + datasets.WithTemporalExtent(interval), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} + +log.Printf("Queried %d datapoints", len(datapoints)) +``` -```plaintext Output +Output + + +```plaintext Python Size: 725MB Dimensions: (time: 1109597, latlon: 2) Coordinates: @@ -56,12 +92,18 @@ Data variables: (12/30) satellite (time) object 9MB 'SENTINEL-1' ... 'SENTINEL-1' ... ... ``` +```plaintext Go +Queried 1109597 datapoints +``` + The `show_progress` parameter is optional and can be used to display a [tqdm](https://tqdm.github.io/) progress bar while loading data. -A time interval specified as a tuple is interpreted as a half-closed interval. This means the start time is inclusive, and the end time is exclusive. For instance, using an end time of `2023-01-01` includes data points up to `2022-12-31 23:59:59.999`, but excludes those from `2023-01-01 00:00:00.000`. This behavior mimics the Python `range` function and is useful for chaining time intervals. +A time interval specified as a tuple is interpreted as a half-closed interval. This means the start time is inclusive, and the end time is exclusive. +For instance, using an end time of `2023-01-01` includes data points up to `2022-12-31 23:59:59.999`, but excludes those from `2023-01-01 00:00:00.000`. +This behavior mimics the Python `range` function and is useful for chaining time intervals. ```python Python @@ -76,6 +118,28 @@ for year in [2017, 2018, 2019, 2020, 2021, 2022]: # to the result of the single request in the code example above. data = xr.concat(data, dim="time") ``` +```go Go +var datapoints []*v1.Sentinel1Sar + +for year := 2017; year <= 2022; year++ { + startDate := time.Date(year, time.January, 1, 0, 0, 0, 0, time.UTC) + interval := query.NewTimeInterval(startDate, startDate.AddDate(1, 0, 0)) + + var partialDatapoints []*v1.Sentinel1Sar + err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, + &partialDatapoints, + datasets.WithTemporalExtent(interval), + ) + if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) + } + + // Concatenate the data into a single dataset, which is equivalent + // to the result of the single request in the code example above. + datapoints = append(datapoints, partialDatapoints...) +} +``` Above example demonstrates how to split a large time interval into smaller chunks while loading data in separate requests. Typically, this is not necessary as the datasets client auto-paginates large intervals. @@ -108,6 +172,33 @@ print(interval2.to_half_open()) # Query data for a time interval data = collection.load(interval1, show_progress=True) ``` +```go Go +interval1 := query.TimeInterval{ + Start: time.Date(2017, time.January, 1, 0, 0, 0, 0, time.UTC), + End: time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC), + EndInclusive: false, +} + +interval2 := query.TimeInterval{ + Start: time.Date(2017, time.January, 1, 0, 0, 0, 0, time.UTC), + End: time.Date(2022, time.December, 31, 23, 59, 59, 999999999, time.UTC), + EndInclusive: true, +} + +log.Println("Inclusivity is indicated by interval notation: ( and [") +log.Println(interval1.String()) +log.Println(interval2.String()) +log.Println("They are equivalent:", interval1.Equal(&interval2)) +log.Println(interval2.ToHalfOpen().String()) + +// Query data for a time interval +var datapoints []*v1.Sentinel1Sar +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, + &datapoints, + datasets.WithTemporalExtent(interval1), +) +``` ```plaintext Output @@ -133,9 +224,24 @@ Here's how to load a data point at a specific millisecond from a [collection](/d data = collection.load("2024-08-01 00:00:01.362") print(data) ``` + ```go Go + temporalExtent := query.NewPointInTime(time.Date(2024, time.August, 1, 0, 0, 1, 362000000, time.UTC)) + + var datapoints []*v1.Sentinel1Sar + err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapoints, + datasets.WithTemporalExtent(temporalExtent), + ) + + log.Printf("Queried %d datapoints", len(datapoints)) + log.Printf("First datapoint time: %s", datapoints[0].GetTime().AsTime()) + ``` -```plaintext Output +Output + + +```plaintext Python Size: 721B Dimensions: (time: 1, latlon: 2) Coordinates: @@ -149,6 +255,11 @@ Data variables: (12/30) satellite (time) object 8B 'SENTINEL-1' ... ... ``` +```plaintext Go +Queried 1 datapoints +First datapoint time: 2024-08-01 00:00:01.362 +0000 UTC +``` + Tilebox uses millisecond precision for timestamps. To load all data points for a specific second, it's a [time interval](/datasets/query#time-interval) request. Refer to the examples below for details. @@ -156,7 +267,7 @@ Data variables: (12/30) The output of the `load` method is an `xarray.Dataset` object. To learn more about Xarray, visit the dedicated [Xarray page](/sdks/python/xarray). -### Time iterables +### Time iterables (Python only) You can specify a time interval by using an iterable of `TimeScalar`s as the `time_or_interval` parameter. This is especially useful when you want to use the output of a previous `load` call as input for another load. Here's how that works. @@ -205,11 +316,34 @@ tokyo_time = pytz.timezone('Asia/Tokyo').localize( ) print(tokyo_time) data = collection.load(tokyo_time) -print(data) # time is in UTC since API always returns UTC timestamps +print(data) +``` +```go Go +// Tokyo has a UTC+9 hours offset, so this is the same as +// 2017-01-01 02:45:25.679 UTC +location, _ := time.LoadLocation("Asia/Tokyo") +tokyoTime := query.NewPointInTime(time.Date(2017, 1, 1, 11, 45, 25, 679000000, location)) +log.Println(tokyoTime) + +var datapoints []*v1.Sentinel1Sar +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapoints, + datasets.WithTemporalExtent(tokyoTime), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} + +log.Printf("Queried %d datapoints", len(datapoints)) +// time is in UTC since API always returns UTC timestamps +log.Printf("First datapoint time: %s", datapoints[0].GetTime().AsTime()) ``` -```plaintext Output +Output + + +```plaintext Python 2017-01-01 11:45:25.679000+09:00 Size: 725B Dimensions: (time: 1, latlon: 2) @@ -221,29 +355,107 @@ Coordinates: Data variables: ... ``` +```plaintext Go +[2017-01-01 11:45:25.679 +0900 JST, 2017-01-01 11:45:25.679 +0900 JST] +Queried 1 datapoints +First datapoint time: 2017-01-01 02:45:25.679 +0000 UTC +``` + ## Filtering by area of interest -[Spatio-temporal](/datasets/types/spatiotemporal) also come with spatial filtering capabilities. When querying, you can no longer specify a time interval, but additionally also specify a bounding box or a polygon as an area of interest to filter by. +[Spatio-temporal](/datasets/types/spatiotemporal) also come with spatial filtering capabilities. When querying, you can specify a time interval, and additionally also specify a bounding box or a polygon as an area of interest to filter by. - - Spatio-temporal datasets - including spatial filtering capabilities - are currently in development and not available yet. Stay tuned for updates! - +Here is how to query Sentinel-2 `S2A_S2MSI2A` data over Colorado for April 2025. + + +```python Python +from shapely import MultiPolygon +from tilebox.datasets import Client + +area = MultiPolygon( + [ + (((-109.10, 40.98), (-102.01, 40.95), (-102.06, 36.82), (-109.06, 36.96), (-109.10, 40.98)),), + ] +) + +client = Client() +datasets = client.datasets() +sentinel2_msi = datasets.open_data.copernicus.sentinel2_msi +data = sentinel2_msi.collection("S2A_S2MSI2A").query( + temporal_extent=("2025-04-01", "2025-05-02"), + spatial_extent=area, + show_progress=True, +) +``` +```go Go +ctx := context.Background() +client := datasets.NewClient() + +dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.sentinel2_msi") +if err != nil { + log.Fatalf("Failed to get dataset: %v", err) +} + +collection, err := client.Collections.Get(ctx, dataset.ID, "S2A_S2MSI2A") +if err != nil { + log.Fatalf("Failed to get collection: %v", err) +} + +startDate := time.Date(2025, 4, 1, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2025, 5, 2, 0, 0, 0, 0, time.UTC) +timeInterval := query.NewTimeInterval(startDate, endDate) +area := orb.MultiPolygon{ + { + {{-109.10, 40.98}, {-102.01, 40.95}, {-102.06, 36.82}, {-109.06, 36.96}, {-109.10, 40.98}}, + }, +} + +var datapoints []*v1.Sentinel2Msi +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapoints, + datasets.WithTemporalExtent(timeInterval), + datasets.WithSpatialExtent(area), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} +``` + ## Fetching only metadata Sometimes, it may be useful to load only dataset metadata fields without the actual data fields. This can be done by setting the `skip_data` parameter to `True`. For example, when only checking if a datapoint exists, you may want to use `skip_data=True` to avoid loading the data fields. -If this flag is set, the response will only include the required fields for the given dataset type, but no additional custom data fields. +If this flag is set, the response will only include the required fields for the given dataset type, but no custom data fields. ```python Python data = collection.load("2024-08-01 00:00:01.362", skip_data=True) print(data) ``` +```go Go +temporalExtent := query.NewPointInTime(time.Date(2024, time.August, 1, 0, 0, 1, 362000000, time.UTC)) + +var datapoints []*v1.Sentinel1Sar +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapoints, + datasets.WithTemporalExtent(temporalExtent), + datasets.WithSkipData(), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} + +log.Printf("Queried %d datapoints", len(datapoints)) +log.Printf("First datapoint time: %s", datapoints[0].GetTime().AsTime()) +``` -```plaintext Output +Output + + +```plaintext Python Size: 160B Dimensions: (time: 1) Coordinates: @@ -253,6 +465,11 @@ Coordinates: Data variables: *empty* ``` +```plaintext Go +Queried 1 datapoints +First datapoint time: 2024-08-01 00:00:01.362 +0000 UTC +``` + ## Empty response @@ -264,18 +481,39 @@ The `load` method always returns an `xarray.Dataset` object, even if there are n data = collection.load(time_with_no_data_points) print(data) ``` +```go Go +timeWithNoDatapoints := query.NewPointInTime(time.Date(1997, time.February, 6, 10, 21, 0, 0, time.UTC)) + +var datapoints []*v1.Sentinel1Sar +err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapoints, + datasets.WithTemporalExtent(timeWithNoDatapoints), +) +if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) +} + +log.Printf("Queried %d datapoints", len(datapoints)) +``` -```plaintext Output +Output + + +```plaintext Python Size: 0B Dimensions: () Data variables: *empty* ``` +```plaintext Go +Queried 0 datapoints +``` + ## By datapoint ID -If you know the ID of the data point you want to load, you can use [collection.find](/api-reference/tilebox.datasets/Collection.find). +If you know the ID of the data point you want to load, you can use [find](/api-reference/python/tilebox.datasets/Collection.find). This method always returns a single data point or raises an exception if no data point with the specified ID exists. @@ -285,12 +523,23 @@ This method always returns a single data point or raises an exception if no data datapoint = collection.find(datapoint_id) print(datapoint) ``` - ```go Golang - fmt.Println("test") - ``` +```go Go +datapointID := uuid.MustParse("01910b3c-8552-424d-e116-81d0c3402ccc") + +var datapoint v1.Sentinel1Sar +err = client.Datapoints.GetInto(ctx, + []uuid.UUID{collection.ID}, datapointID, &datapoint, +) +if err != nil { + log.Fatalf("Failed to query datapoint: %v", err) +} + +fmt.Println(protojson.Format(&datapoint)) +``` -
Output
+Output + ```plaintext Python Size: 725B @@ -306,18 +555,47 @@ Data variables: (12/30) satellite object 8B 'SENTINEL-1' ... ... ``` -```plaintext Golang -Test ... +```plaintext Go +{ + "time": "2024-08-01T00:00:01.362Z", + "id": { + "uuid": "AZELPIVSQk3hFoHQw0AszA==" + }, + "ingestionTime": "2025-04-26T20:03:12.682144768Z", + "geometry": { + "wkb": "AQMAACDmEAAAAQAAAAUAAAAAAAAAAExWwA3gLZCg+DNAXrpJDAK3VcBX7C+7Jz80QBB6Nqs+z1XA3GgAb4E0NkAgY+5aQmZWwOauJeSD7jVAAAAAAABMVsAN4C2QoPgzQA==" + }, + "granuleName": "S1A_IW_RAW__0SDV_20240801T000001_20240801T000033_055012_06B3B6_2345.SAFE", + "processingLevel": "PROCESSING_LEVEL_L0", + "productType": "IW_RAW__0S", + "copernicusId": { + "uuid": "qqUjpbjDTem9ET1YesYsPA==" + }, + "platform": "S1A", + "orbitNumber": "55012", + "relativeOrbitNumber": "165", + "processingBaseline": "NaN", + "stopTime": "2024-08-01T00:00:33.762Z", + "centroid": { + "wkb": "AQEAAAAlta8FMQ5WwAhqkiM6FzVA" + }, + "published": "2024-08-01T07:05:56.740Z", + "updated": "2024-08-01T07:06:13.431Z", + "location": "/eodata/Sentinel-1/SAR/IW_RAW__0S/2024/08/01/S1A_IW_RAW__0SDV_20240801T000001_20240801T000033_055012_06B3B6_2345.SAFE", + "fileSize": "1668437350", + "resolution": "0", + "flightDirection": "FLIGHT_DIRECTION_ASCENDING", + "polarization": "POLARIZATION_VV_VH", + "acquisitionMode": "ACQUISITION_MODE_IW" +} ``` -Since `find` returns only a single data point, the output dataset does not include a `time` dimension. - - You can also set the `skip_data` parameter when calling `find` to load only the metadata of the data point, same as for `load`. + You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `load`. -### Possible errors +## Automatic pagination -- `NotFoundError`: raised if no data point with the given ID is found in the collection -- `ValueError`: raised if the specified `datapoint_id` is not a valid UUID +Querying large time intervals can return a large number of data points. +Tilebox automatically handles pagination for you by sending paginated requests to the server. diff --git a/datasets/types/spatiotemporal.mdx b/datasets/types/spatiotemporal.mdx index 784e860..10acb1f 100644 --- a/datasets/types/spatiotemporal.mdx +++ b/datasets/types/spatiotemporal.mdx @@ -5,7 +5,7 @@ icon: earth-europe --- - Spatio-temporal datasets are currently in development and not available yet. Stay tuned for updates! + Spatio-temporal datasets are currently in development and not available yet. Stay tuned for updates Each spatio-temporal dataset comes with a set of required and auto-generated fields for each data point. @@ -19,14 +19,13 @@ While the specific data fields between different time series datasets can vary,
- For indexing and querying, Tilebox truncates timestamps to millisecond precision. However, Timeseries datasets may contain arbitrary custom `Timestamp` fields that store timestamps up to a nanosecond precision. + For indexing and querying, Tilebox truncates timestamps to millisecond precision. But Timeseries datasets may contain arbitrary custom `Timestamp` fields that store timestamps up to a nanosecond precision. A location on the earth's surface associated with each data point. Supported geometry types are `Polygon`, `MultiPolygon`, `Point` and `MultiPoint`. - ## Auto-generated fields @@ -44,7 +43,7 @@ While the specific data fields between different time series datasets can vary, ## Creating a spatio-temporal dataset To create a spatio-temporal dataset, use the [Tilebox Console](/console) and select `Spatio-temporal Dataset` as the dataset type. The required and auto-generated fields -outlined above will be automatically added to the dataset schema. +already outlined will be automatically added to the dataset schema. ## Spatio-temporal queries diff --git a/datasets/types/timeseries.mdx b/datasets/types/timeseries.mdx index 5beca7b..77c2034 100644 --- a/datasets/types/timeseries.mdx +++ b/datasets/types/timeseries.mdx @@ -15,10 +15,9 @@ While the specific data fields between different time series datasets can vary, - For indexing and querying, Tilebox truncates timestamps to millisecond precision. However, Timeseries datasets may contain arbitrary custom `Timestamp` fields that store timestamps up to a nanosecond precision. + For indexing and querying, Tilebox truncates timestamps to millisecond precision. But Timeseries datasets may contain arbitrary custom `Timestamp` fields that store timestamps up to a nanosecond precision. - ## Auto-generated fields @@ -36,7 +35,7 @@ While the specific data fields between different time series datasets can vary, ## Creating a timeseries dataset To create a timeseries dataset, use the [Tilebox Console](/console) and select `Timeseries Dataset` as the dataset type. The required and auto-generated fields -outlined above will be automatically added to the dataset schema. +already outlined will be automatically added to the dataset schema. ## Time-based queries diff --git a/guides/datasets/create.mdx b/guides/datasets/create.mdx index f3ab991..5269778 100644 --- a/guides/datasets/create.mdx +++ b/guides/datasets/create.mdx @@ -47,7 +47,7 @@ This page guides you through the process of creating a dataset in Tilebox using Specify the fields for your dataset. Each field has these properties: - - `Name` is the name of the field (it should be snake_case). + - `Name` is the name of the field (it should be `snake_case`). - `Type` and `Array` let you specify the field data type and whether it's an array. See below for an explanation of the available data. - `Description` is an optional brief description of the field. You can use it to provide more context and details about the data. - `Example value` is an optional example for this field. It can be useful for documentation purposes. @@ -64,7 +64,7 @@ This page guides you through the process of creating a dataset in Tilebox using ## Automatic dataset schema documentation -By specifying the fields for your dataset, including the data type, description and an example value for each one, Tilebox +By specifying the fields for your dataset, including the data type, description, and an example value for each one, Tilebox is capable of automatically generating a documentation page for your dataset schema. @@ -72,7 +72,7 @@ is capable of automatically generating a documentation page for your dataset sch Dataset schema overview -## Adding additional documentation +## Adding extra documentation You can also add custom documentation to your dataset, providing more context and details about the data included data. This documentation supports rich formatting, including links, tables, code snippets, and more. diff --git a/guides/datasets/ingest.mdx b/guides/datasets/ingest.mdx index a2fa4bc..1b3e526 100644 --- a/guides/datasets/ingest.mdx +++ b/guides/datasets/ingest.mdx @@ -13,8 +13,8 @@ import { CodeOutputHeader } from '/snippets/components.mdx'; This page guides you through the process of ingesting data into a Tilebox dataset. Starting from an existing -dataset available as file in the [GeoParquet](https://geoparquet.org/) format, we'll walk you through the process of -ingestion that data into Tilebox as a [Timeseries](/datasets/types/timeseries) dataset. +dataset available as file in the [GeoParquet](https://geoparquet.org/) format, you'll go through the process of +ingesting that data into Tilebox as a [Timeseries](/datasets/types/timeseries) dataset. ## Related documentation @@ -35,7 +35,7 @@ from here: [modis_MCD12Q1.geoparquet](https://storage.googleapis.com/tbx-web-ass ## Installing the necessary packages This example uses a couple of python packages for reading parquet files and for visualizing the dataset. Install the -required packages using your preferred package manager. For new projects, we recommend using [uv](https://docs.astral.sh/uv/). +required packages using your preferred package manager. For new projects, Tilebox recommend using [uv](https://docs.astral.sh/uv/). ```bash uv @@ -93,8 +93,8 @@ modis_data.head(1000).explore(width=800, height=600) ## Create a Tilebox dataset -Now we'll create a [Timeseries](/datasets/types/timeseries) dataset with the same schema as the given MODIS dataset. -To do so, we'll use the [Tilebox Console](/console), navigate to `My Datasets` and click `Create Dataset`. We then select +Now you'll create a [Timeseries](/datasets/types/timeseries) dataset with the same schema as the given MODIS dataset. +To do so, you'll use the [Tilebox Console](/console), navigate to `My Datasets` and click `Create Dataset`. Then select `Timeseries Dataset` as the dataset type. @@ -102,12 +102,12 @@ To do so, we'll use the [Tilebox Console](/console), navigate to `My Datasets` a Step by step guide. -Now, to match the given MODIS dataset, we'll specify the following fields: +Now, to match the given MODIS dataset, you'll specify the following fields: | Field | Type | Note | | --- | --- | --- | | granule_name | string | MODIS granule name | -| geometry | Geometry | Tile boundary coords of the granule | +| geometry | Geometry | Tile boundary coordinates of the granule | | end_time | Timestamp | Measurement end time | | horizontal_tile_number | int64 | Horizontal modis tile number (0-35) | | vertical_tile_number | int64 | Vertical modis tile number (0-17) | @@ -128,7 +128,7 @@ In the console, this will look like the following: ## Access the dataset from Python -Our newly created dataset is now available. Let's access it from Python. For this, we'll need to know the dataset slug, +Your newly created dataset is now available. You can access it from Python. For this, you'll need to know the dataset slug, which was assigned automatically based on the specified `code_name`. To find out the slug, navigate to the dataset overview in the console. @@ -137,7 +137,7 @@ in the console. Explore the MODIS dataset -We can now instantiate the dataset client and access the dataset. +You can now instantiate the dataset client and access the dataset. ```python Python @@ -150,7 +150,7 @@ dataset = client.dataset("tilebox.modis") # replace with your dataset slug ## Create a collection -Next, we'll create a collection to insert our data into. +Next, you'll create a collection to insert your data into. ```python Python @@ -160,7 +160,7 @@ collection = dataset.get_or_create_collection("MCD12Q1") ## Ingest the data -Now, we'll finally ingest the MODIS data into the collection. +Now, you'll finally ingest the MODIS data into the collection. ```python Python @@ -177,7 +177,7 @@ Successfully ingested 7245 datapoints! ## Query the newly ingested data -We can now query the newly ingested data. Let's query a subset of the data for a specific time range. +You can now query the newly ingested data. You can query a subset of the data for a specific time range. Since the data is now stored directly in the Tilebox dataset, you can query and access it from anywhere. @@ -217,7 +217,6 @@ Data variables: (12/14) For more information on accessing and querying data, check out [querying data](/datasets/query). - ## View the data in the console You can also view your data in the Console, by navigate to the dataset, selecting the collection and then clicking @@ -230,7 +229,7 @@ on one of the data points. ## Next steps -Congrats! You've successfully ingested data into Tilebox. You can now explore the data in the console and use it for +Congrats. You've successfully ingested data into Tilebox. You can now explore the data in the console and use it for further processing and analysis. diff --git a/mint.json b/mint.json index 2fe2d48..adc4841 100644 --- a/mint.json +++ b/mint.json @@ -167,9 +167,10 @@ }, { "group": "Go", - "icon": "golang", "pages": [ - "sdks/go/introduction" + "sdks/go/install", + "sdks/go/examples", + "sdks/go/protobuf" ] }, { @@ -180,38 +181,94 @@ ] }, { - "group": "tilebox.datasets", + "group": "Python", "pages": [ - "api-reference/tilebox.datasets/Client", - "api-reference/tilebox.datasets/Client.datasets", - "api-reference/tilebox.datasets/Client.dataset", - "api-reference/tilebox.datasets/Dataset.collections", - "api-reference/tilebox.datasets/Dataset.collection", - "api-reference/tilebox.datasets/Dataset.create_collection", - "api-reference/tilebox.datasets/Dataset.get_or_create_collection", - "api-reference/tilebox.datasets/Collection.delete", - "api-reference/tilebox.datasets/Collection.find", - "api-reference/tilebox.datasets/Collection.info", - "api-reference/tilebox.datasets/Collection.ingest", - "api-reference/tilebox.datasets/Collection.load" + { + "group": "tilebox.datasets", + "pages": [ + "api-reference/python/tilebox.datasets/Client", + "api-reference/python/tilebox.datasets/Client.datasets", + "api-reference/python/tilebox.datasets/Client.dataset", + "api-reference/python/tilebox.datasets/Dataset.collections", + "api-reference/python/tilebox.datasets/Dataset.collection", + "api-reference/python/tilebox.datasets/Dataset.create_collection", + "api-reference/python/tilebox.datasets/Dataset.get_or_create_collection", + "api-reference/python/tilebox.datasets/Collection.delete", + "api-reference/python/tilebox.datasets/Collection.find", + "api-reference/python/tilebox.datasets/Collection.info", + "api-reference/python/tilebox.datasets/Collection.ingest", + "api-reference/python/tilebox.datasets/Collection.load" + ] + }, + { + "group": "tilebox.workflows", + "pages": [ + "api-reference/python/tilebox.workflows/Client", + "api-reference/python/tilebox.workflows/Task", + "api-reference/python/tilebox.workflows/Client.runner", + "api-reference/python/tilebox.workflows/TaskRunner.run_all", + "api-reference/python/tilebox.workflows/TaskRunner.run_forever", + "api-reference/python/tilebox.workflows/ExecutionContext.submit_subtask", + "api-reference/python/tilebox.workflows/ExecutionContext.job_cache", + "api-reference/python/tilebox.workflows/JobCache.group", + "api-reference/python/tilebox.workflows/JobCache.__iter__", + "api-reference/python/tilebox.workflows/JobClient.submit", + "api-reference/python/tilebox.workflows/JobClient.retry", + "api-reference/python/tilebox.workflows/JobClient.cancel", + "api-reference/python/tilebox.workflows/JobClient.visualize" + ] + } ] }, { - "group": "tilebox.workflows", + "group": "Go", "pages": [ - "api-reference/tilebox.workflows/Client", - "api-reference/tilebox.workflows/Task", - "api-reference/tilebox.workflows/Client.runner", - "api-reference/tilebox.workflows/TaskRunner.run_all", - "api-reference/tilebox.workflows/TaskRunner.run_forever", - "api-reference/tilebox.workflows/ExecutionContext.submit_subtask", - "api-reference/tilebox.workflows/ExecutionContext.job_cache", - "api-reference/tilebox.workflows/JobCache.group", - "api-reference/tilebox.workflows/JobCache.__iter__", - "api-reference/tilebox.workflows/JobClient.submit", - "api-reference/tilebox.workflows/JobClient.retry", - "api-reference/tilebox.workflows/JobClient.cancel", - "api-reference/tilebox.workflows/JobClient.visualize" + { + "group": "datasets", + "pages": [ + "api-reference/go/datasets/Get", + "api-reference/go/datasets/List", + "api-reference/go/datasets/Collections.Create", + "api-reference/go/datasets/Collections.Get", + "api-reference/go/datasets/Collections.GetOrCreate", + "api-reference/go/datasets/Collections.List", + "api-reference/go/datasets/Datapoints.GetInto", + "api-reference/go/datasets/Datapoints.Query", + "api-reference/go/datasets/Datapoints.QueryInto", + "api-reference/go/datasets/Datapoints.Ingest", + "api-reference/go/datasets/Datapoints.Delete", + "api-reference/go/datasets/Datapoints.DeleteIDs", + "api-reference/go/datasets/CollectAs", + "api-reference/go/datasets/Collect", + "api-reference/go/datasets/As" + ] + }, + { + "group": "workflows", + "pages": [ + "api-reference/go/workflows/Task", + "api-reference/go/workflows/GetCurrentCluster", + "api-reference/go/workflows/SubmitSubtask", + "api-reference/go/workflows/SubmitSubtasks", + "api-reference/go/workflows/WithTaskSpan", + "api-reference/go/workflows/WithTaskSpanResult", + "api-reference/go/workflows/NewTaskRunner", + "api-reference/go/workflows/TaskRunner.GetRegisteredTask", + "api-reference/go/workflows/TaskRunner.RegisterTasks", + "api-reference/go/workflows/TaskRunner.Run", + "api-reference/go/workflows/Clusters.Create", + "api-reference/go/workflows/Clusters.Get", + "api-reference/go/workflows/Clusters.Delete", + "api-reference/go/workflows/Clusters.List", + "api-reference/go/workflows/Jobs.Submit", + "api-reference/go/workflows/Jobs.Get", + "api-reference/go/workflows/Jobs.Retry", + "api-reference/go/workflows/Jobs.Cancel", + "api-reference/go/workflows/Jobs.List", + "api-reference/go/workflows/Collect" + + ] + } ] } ], diff --git a/quickstart.mdx b/quickstart.mdx index 0cc51fb..3b946bb 100644 --- a/quickstart.mdx +++ b/quickstart.mdx @@ -124,4 +124,220 @@ If you prefer to work locally, follow these steps to get started. + +## Start with Examples + +Explore the provided [Examples](/sdks/go/examples) to begin your journey with Tilebox. These examples offer a step-by-step guide to using the API and showcase many features supported by Tilebox Go clients. You can also use these examples as a foundation for your own projects. + +## Start on Your Device + +If you prefer to work locally, follow these steps to get started. + + + + Add the Tilebox library in your project. + + ```bash Shell + go get github.com/tilebox/tilebox-go + ``` + + Install [tilebox-generate](https://github.com/tilebox/tilebox-generate) command-line tool on your machine. + It's used to generate Go structs for Tilebox datasets. + + ```bash Shell + go install github.com/tilebox/tilebox-generate@latest + ``` + + + Create an API key by logging into the [Tilebox Console](https://console.tilebox.com), navigating to [Account -> API Keys](https://console.tilebox.com/account/api-keys), and clicking the "Create API Key" button. + + + Tilebox Console + Tilebox Console + + + Copy the API key and keep it somewhere safe. You will need it to authenticate your requests. + + + Create a cluster by logging into the [Tilebox Console](https://console.tilebox.com), navigating to [Workflows -> Clusters](https://console.tilebox.com/workflows/clusters), and clicking the "Create cluster" button. + + + Tilebox Console + Tilebox Console + + + Copy the cluster slug, you will need it to run your workflows. + + + Run [tilebox-generate](https://github.com/tilebox/tilebox-generate) in the root directory of your Go project. + It generates the dataset type for Sentinel-2 MSI dataset. It will generate a `./protogen/tilebox/v1/sentinel2_msi.pb.go` file. + + ```bash Shell + tilebox-generate --dataset open_data.copernicus.sentinel2_msi --tilebox-api-key $TILEBOX_API_KEY + ``` + + + Use the datasets client to query data from a dataset. + + ```go Go + package main + + import ( + "context" + "log" + "log/slog" + "time" + + "github.com/google/uuid" + "github.com/paulmach/orb" + "github.com/paulmach/orb/encoding/wkt" + "github.com/tilebox/tilebox-go/datasets/v1" + "github.com/tilebox/tilebox-go/query" + ) + + func main() { + ctx := context.Background() + client := datasets.NewClient() + + // select a dataset + dataset, err := client.Datasets.Get(ctx, "open_data.copernicus.sentinel2_msi") + if err != nil { + log.Fatalf("Failed to get dataset: %v", err) + } + + // select a collection + collection, err := client.Collections.Get(ctx, dataset.ID, "S2A_S2MSI1C") + if err != nil { + log.Fatalf("Failed to get collection: %v", err) + } + + // load data from a collection in a given time range and spatial extent + colorado := orb.Polygon{ + {{-109.05, 37.09}, {-102.06, 37.09}, {-102.06, 41.59}, {-109.05, 41.59}, {-109.05, 37.09}}, + } + startDate := time.Date(2025, time.March, 1, 0, 0, 0, 0, time.UTC) + endDate := time.Date(2025, time.April, 1, 0, 0, 0, 0, time.UTC) + march2025 := query.NewTimeInterval(startDate, endDate) + + // You have to use tilebox-generate to generate the dataset type + var datapointsOverColorado []*v1.Sentinel2Msi + err = client.Datapoints.QueryInto(ctx, + []uuid.UUID{collection.ID}, &datapointsOverColorado, + datasets.WithTemporalExtent(march2025), + datasets.WithSpatialExtent(colorado), + ) + if err != nil { + log.Fatalf("Failed to query datapoints: %v", err) + } + + slog.Info("Found datapoints over Colorado in March 2025", slog.Int("count", len(datapointsOverColorado))) + slog.Info("First datapoint over Colorado", + slog.String("id", datapointsOverColorado[0].GetId().AsUUID().String()), + slog.Time("event time", datapointsOverColorado[0].GetTime().AsTime()), + slog.Time("ingestion time", datapointsOverColorado[0].GetIngestionTime().AsTime()), + slog.String("geometry", wkt.MarshalString(datapointsOverColorado[0].GetGeometry().AsGeometry())), + slog.String("granule name", datapointsOverColorado[0].GetGranuleName()), + slog.String("processing level", datapointsOverColorado[0].GetProcessingLevel().String()), + slog.String("product type", datapointsOverColorado[0].GetProductType()), + // and so on... + ) + } + ``` + + + Use the workflows client to create a task and submit it as a job. + + ```go Go + package main + + import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/workflows/v1" + ) + + type HelloTask struct { + Greeting string + Name string + } + + func (t *HelloTask) Execute(ctx context.Context) error { + slog.InfoContext(ctx, "Hello from the main task!", slog.String("Greeting", t.Greeting), slog.String("Name", t.Name)) + + err := workflows.SubmitSubtasks(ctx, &HelloSubtask{Name: t.Name}) + if err != nil { + return err + } + + return nil + } + + type HelloSubtask struct { + Name string + } + + func (t *HelloSubtask) Execute(context.Context) error { + slog.Info("Hello from the subtask!", slog.String("Name", t.Name)) + return nil + } + + func main() { + ctx := context.Background() + + // Replace with your actual cluster and token + clusterSlug := "YOUR_COMPUTE_CLUSTER" + client := workflows.NewClient() + + job, err := client.Jobs.Submit(ctx, "hello-world", clusterSlug, + []workflows.Task{ + &HelloTask{ + Greeting: "Greetings", + Name: "Tilebox", + }, + }, + ) + if err != nil { + slog.ErrorContext(ctx, "Failed to submit job", slog.Any("error", err)) + return + } + + slog.InfoContext(ctx, "Job submitted", slog.String("job_id", job.ID.String())) + + runner, err := client.NewTaskRunner( + workflows.WithCluster(clusterSlug), + ) + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = runner.RegisterTasks( + &HelloTask{}, + &HelloSubtask{}, + ) + if err != nil { + slog.Error("failed to register task", slog.Any("error", err)) + return + } + + runner.Run(context.Background()) + } + ``` + + + Review the following guides to learn more about the modules that make up Tilebox: + + + + Learn how to create a Timeseries dataset using the Tilebox Console. + + + Learn how to ingest an existing CSV dataset into a Timeseries dataset collection. + + + + + + diff --git a/sdks/go/examples.mdx b/sdks/go/examples.mdx new file mode 100644 index 0000000..71e3ab0 --- /dev/null +++ b/sdks/go/examples.mdx @@ -0,0 +1,62 @@ +--- +title: Examples +description: Examples maintained to use and learn from. +icon: notebook +--- + +To quickly become familiar with the Go client, you can explore some standalone examples. + +You can access the examples on [ GitHub](https://github.com/tilebox/tilebox-go/tree/main/examples). +More examples can be found throughout the docs. + +## Workflows examples + + + + How to use Tilebox Workflows to submit and execute a simple task. + + [ Open in + Github](https://github.com/tilebox/tilebox-go/tree/main/examples/workflows/helloworld) + + + + How to submit a task and run a workflow using protobuf messages. + + [ Open in + Github](https://github.com/tilebox/tilebox-go/blob/main/examples/workflows/protobuf-task) + + + + How to set up tracing and logging for workflows using [Axiom](https://axiom.co/) observability platform. + + [ Open in + Github](https://github.com/tilebox/tilebox-go/tree/main/examples/workflows/axiom) + + + + How to set up tracing and logging for workflows using [OpenTelemetry](https://opentelemetry.io/). + + [ Open in + Github](https://github.com/tilebox/tilebox-go/blob/main/examples/workflows/opentelemetry) + + + + +## Datasets examples + + + + How to query datapoints from a Tilebox dataset. + + [ Open in + Github](https://github.com/tilebox/tilebox-go/blob/main/examples/datasets/query/main.go) + + + + How to create a collection, ingest datapoints, and then delete them. + + [ Open in + Github](https://github.com/tilebox/tilebox-go/blob/main/examples/datasets/ingest/main.go) + + + diff --git a/sdks/go/install.mdx b/sdks/go/install.mdx new file mode 100644 index 0000000..317d441 --- /dev/null +++ b/sdks/go/install.mdx @@ -0,0 +1,32 @@ +--- +title: Installation +description: Install the Tilebox Go library +icon: download +--- + +## Package Overview + +Tilebox offers a Go SDK for accessing Tilebox services. It additionally includes a command-line tool (tilebox-generate) that can be installed separately. + + + + Datasets and workflows client for Tilebox + + + Command-line tool to generate Tilebox datasets types for Go + + + +## Installation + +Add `tilebox-go` to your project. + +```bash Shell +go get github.com/tilebox/tilebox-go +``` + +Install `tilebox-generate` command-line tool on your machine. + +```bash Shell +go install github.com/tilebox/tilebox-generate@latest +``` diff --git a/sdks/go/introduction.mdx b/sdks/go/introduction.mdx deleted file mode 100644 index 8cf4678..0000000 --- a/sdks/go/introduction.mdx +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Introduction -description: Learn about the Tilebox Go SDK -icon: wrench ---- - - - The Tilebox Go SDK is currently in development. Stay tuned for updates! - diff --git a/sdks/go/protobuf.mdx b/sdks/go/protobuf.mdx new file mode 100644 index 0000000..6a35e9f --- /dev/null +++ b/sdks/go/protobuf.mdx @@ -0,0 +1,231 @@ +--- +title: Protobuf +description: Overview of protobuf, common use cases, and implementation details. +icon: chart-bar +--- + +Tilebox uses [Protocol Buffers](https://protobuf.dev/), with a custom generation tool, combined with standard Go data structures. + +[Protocol Buffers](https://protobuf.dev/) (often referred to as `protobuf`) is a schema definition language with an efficient binary format and native language support for lots of languages, including Go. +Protocol buffers are open source since 2008 and are maintained by Google. + +## tilebox-generate + +Protobuf schemas are typically defined in a `.proto` file, and then converted to a native Go struct using the protobuf compiler. +Tilebox datasets already define a protobuf schema as well, and automate the generation of Go structs for existing datasets through a quick `tilebox-generate` command-line tool. + +See [Installation](/sdks/go/install) for more details on how to install `tilebox-generate`. + +```sh +tilebox-generate --dataset open_data.copernicus.sentinel1_sar +``` + +The preceding command will generate a `./protogen/tilebox/v1/sentinel1_sar.pb.go` file. More flags can be set to change the default output folders, package name, etc. + +This file contains everything needed to work with the [Sentinel-1 SAR](https://console.tilebox.com/datasets/explorer/e27e6a58-c149-4379-9fdf-9d43903cba74) dataset. +It's recommended to check the generated files you use in your version control system. + +If you open this file, you will see that it starts with `// Code generated by protoc-gen-go. DO NOT EDIT.`. +It means that the file was generated by the `protoc-gen-go` tool, which is part of the protobuf compiler. +After editing a dataset, you can call the generate command again to ensure that the changes are reflected in the generated file. + +The file contains a `Sentinel1Sar` struct, which is a Go struct that represents a datapoint in the dataset. + +```go Go +type Sentinel1Sar struct { + xxx_hidden_GranuleName *string `protobuf:"bytes,1,opt,name=granule_name,json=granuleName"` + xxx_hidden_ProcessingLevel v1.ProcessingLevel `protobuf:"varint,2,opt,name=processing_level,json=processingLevel,enum=datasets.v1.ProcessingLevel"` + // more fields +} +``` + +Notice that the fields are private (starting with a lowercase letter), so they are not accessible. +Protobuf hides the fields and provides getters and setters to access them. + +## Protobuf 101 + +### Initializing a message + +Here is how to initialize a `v1.Sentinel1Sar` message. + +```go Go +import ( + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" +) + +datapoint := v1.Sentinel1Sar_builder{ + Time: timestamppb.New(time.Now()), + GranuleName: proto.String("S1A_EW_GRDH_1SSH_20141004T020507_20141004T020611_002673_002FAF_8645_COG.SAFE"), + ProductType: proto.String("EW_GRDH_1S-COG"), + FileSize: proto.Int64(488383473), +}.Build() +``` + +Protobuf fields are private and provides a builder pattern to create a message. + +`proto.String` is a helper function that converts `string` to `*string`. +This allows protobuf to differentiate between a field that is set to an empty string and a field that is not set (nil). +An exhaustive list of those helper functions can be found [here](https://github.com/golang/protobuf/blob/master/proto/wrappers.go). + +Only primitives have a `proto.XXX` helper function. +Complex types such as timestamps, durations, UUIDs, and geometries have a [constructor function](#constructors). + +### Getters and setters + +Protobuf provides methods to get, set, clear and check if a field is set. + +```go Go +fmt.Println(datapoint.GetGranuleName()) + +datapoint.SetGranuleName("my amazing granule") + +datapoint.ClearGranuleName() + +if datapoint.HasGranuleName() { + fmt.Println("Granule name is set") +} +``` + +Getters for primitive types will return a Go native type (for example, int64, string, etc.). +Getters for complex types such as timestamps, durations, UUIDs, and geometries can also be converted to more standard types using [AsXXX](#asxxx-methods) methods. + +## Well known types + +Beside Go primitives, Tilebox supports some well known types: + +- Duration: A duration of time. See [Duration](https://protobuf.dev/reference/protobuf/google.protobuf/#duration) for more information. +- Timestamp: A point in time. See [Timestamp](https://protobuf.dev/reference/protobuf/google.protobuf/#timestamp) for more information. +- UUID: A [universally unique identifier (UUID)](https://en.wikipedia.org/wiki/Universally_unique_identifier). +- Geometry: Geospatial geometries of type Point, LineString, Polygon or MultiPolygon. + +They have a couple of useful methods to work with them. + +### Constructors + +```go Go +import ( + "github.com/paulmach/orb" + datasetsv1 "github.com/tilebox/tilebox-go/protogen/go/datasets/v1" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/timestamppb" +) + +timestamppb.New(time.Now()) +durationpb.New(10 * time.Second) +datasetsv1.NewUUID(uuid.New()) +datasetsv1.NewGeometry(orb.Point{1, 2}) +``` + +### `CheckValid` method + +`CheckValid` returns an error if the field is invalid. + +```go Go +err := datapoint.GetTime().CheckValid() +if err != nil { + fmt.Println(err) +} +``` + +### `IsValid` method + +`IsValid` reports whether the field is valid. It's equivalent to `CheckValid == nil`. + +```go Go +if datapoint.GetTime().IsValid() { + fmt.Println("Valid") +} +``` + +### `AsXXX` methods + +`AsXXX` methods convert the field to a more user friendly type. + +- `AsUUID` will convert a `datasetsv1.UUID` field to a [uuid.UUID](https://pkg.go.dev/github.com/google/uuid#UUID) type +- `AsTime` will convert a `timestamppb.Timestamp` field to a [time.Time](https://pkg.go.dev/time#Time) type +- `AsDuration` will convert a `durationpb.Duration` field to a [time.Duration](https://pkg.go.dev/time#Duration) type +- `AsGeometry` will convert a `datasetsv1.Geometry` field to an [orb.Geometry](https://github.com/paulmach/orb?tab=readme-ov-file#shared-geometry-interface) interface + +```go Go +datapoint.GetId().AsUUID() // uuid.UUID +datapoint.GetTime().AsTime() // time.Time +datapoint.GetDuration().AsDuration() // time.Duration +datapoint.GetGeometry().AsGeometry() // orb.Geometry +``` + +Those methods performs conversion on a best-effort basis. Type validity must be checked beforehand using `IsValid` or `CheckValid` methods. + +## Common data operations + +Datapoints are contained in a standard Go slice so all the usual [slice operations](https://gobyexample.com/slices) and [slice functions](https://pkg.go.dev/slices) can be used. + +The usual pattern to iterate over data in Go is by using a `for` loop. + +As an example, here is how to extract the `copernicus_id` fields from the datapoints. + +```go Go +// assuming datapoints has been filled using `client.Datapoints.QueryInto` method +var datapoints []*v1.Sentinel1Sar + +copernicusIDs := make([]uuid.UUID, len(datapoints)) +for i, dp := range datapoints { + copernicusIDs[i] = dp.GetCopernicusId().AsUUID() +} +``` + +Here is an example of filtering out datapoints that have been published before January 2000 and are not from the Sentinel-1C platform. + +```go Go +jan2000 := time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC) +// slice of length of 0, but preallocate a capacity of len(datapoints) +s1cDatapoints := make([]*v1.Sentinel1Sar, 0, len(datapoints)) + +for _, dp := range datapoints { + if dp.GetPublished().AsTime().Before(jan2000) { + continue + } + if dp.GetPlatform() != "S1C" { + continue + } + + s1cDatapoints = append(s1cDatapoints, proto.CloneOf(dp)) // Copy the underlying data +} +``` + +## Converting to JSON + +Protobuf messages can be converted to JSON without loss of information. This is useful for interoperability with other systems that doesn't use protobuf. +A guide on protoJSON can be found format here: https://protobuf.dev/programming-guides/json/ + +```go Go +originalDatapoint := datapoints[0] + +// Convert proto.Message to JSON as bytes +jsonDatapoint, err := protojson.Marshal(originalDatapoint) +if err != nil { + log.Fatalf("Failed to marshal datapoint: %v", err) +} +fmt.Println(string(jsonDatapoint)) +``` + +```plaintext Output +{"time":"2001-01-01T00:00:00Z","id":{"uuid":"AOPHpzQAAmV2MZ4+Zv+JGg=="},"ingestionTime":"2025-03-25T10:26:10.577385176Z","granuleName":"MCD12Q1.A2001001.h02v08.061.2022146033342.hdf","geometry":{"wkb":"AQMAAAABAAAABQAAAFIi9vf7TmTAXsX3////I0Bexff///9jwAAAAAAAAAAACUn4//+/YsAAAAAAAAAAAC7AdjgMCmPAXsX3////I0BSIvb3+05kwF7F9////yNA"},"endTime":"2001-12-31T23:59:59Z","horizontalTileNumber":"2","verticalTileNumber":"8","tileId":"51002008","fileSize":"176215","checksum":"771212892","checksumType":"CKSUM","dayNightFlag":"Day","publishedAt":"2022-06-23T10:58:13.895Z"} +``` + +It can also be converted back to a `proto.Message`. + +```go Go +// Convert JSON bytes to proto.Message +unmarshalledDatapoint := &v1.Sentinel1Sar{} +err = protojson.Unmarshal(jsonDatapoint, unmarshalledDatapoint) +if err != nil { + log.Fatalf("Failed to unmarshal datapoint: %v", err) +} + +fmt.Println("Are both equal?", proto.Equal(unmarshalledDatapoint, originalDatapoint)) +``` + +```plaintext Output +Are both equal? true +``` diff --git a/sdks/introduction.mdx b/sdks/introduction.mdx index a2b082c..d6ef858 100644 --- a/sdks/introduction.mdx +++ b/sdks/introduction.mdx @@ -20,7 +20,7 @@ The following language SDKs are currently available for Tilebox. Select one to l Tilebox Go diff --git a/sdks/python/install.mdx b/sdks/python/install.mdx index 2761838..a93461f 100644 --- a/sdks/python/install.mdx +++ b/sdks/python/install.mdx @@ -22,7 +22,7 @@ Tilebox offers a Python SDK for accessing Tilebox services. The SDK includes sep Install the Tilebox python packages using your preferred package manager. - For new projects we recommend using [uv](https://docs.astral.sh/uv/). + For new projects Tilebox recommend using [uv](https://docs.astral.sh/uv/). diff --git a/sdks/python/sample-notebooks.mdx b/sdks/python/sample-notebooks.mdx index 10b9ce7..3737115 100644 --- a/sdks/python/sample-notebooks.mdx +++ b/sdks/python/sample-notebooks.mdx @@ -14,8 +14,9 @@ You can access the sample notebooks on [ Google Driv Right click a notebook in Google Drive and select `Open with -> Google Colaboratory` to open it directly in the browser using [Google Colab](https://colab.research.google.com/). -### Notebook overview +More examples can be found throughout the docs. +### Notebook overview {/* @@ -33,8 +34,8 @@ You can access the sample notebooks on [ Google Driv - This notebook demonstrates how to ingest data into a Custom Dataset. In this case we are using a sample dataset from the [MODIS instrument](https://lpdaac.usgs.gov/products/mcd12q1v006/) which - we have prepared. + This notebook demonstrates how to ingest data into a Custom Dataset. In this case it's using a sample dataset from the [MODIS instrument](https://lpdaac.usgs.gov/products/mcd12q1v006/) which + is already prepared. [ Open in Colab](https://colab.research.google.com/drive/1QS-srlWPMJg4csc0ycn36yCX9U6mvIpW) diff --git a/snippets/components.mdx b/snippets/components.mdx index 53267c8..968f53c 100644 --- a/snippets/components.mdx +++ b/snippets/components.mdx @@ -11,7 +11,6 @@ export const HeroCard = ({ children, title, description, href }) => { ); }; - export const CodeOutputHeader = () => { return (
diff --git a/storage/clients.mdx b/storage/clients.mdx index 62a5fc5..b51a5bd 100644 --- a/storage/clients.mdx +++ b/storage/clients.mdx @@ -4,10 +4,15 @@ description: Learn about the different storage clients available in Tilebox to a icon: hard-drive --- -Tilebox does not host the actual open data satellite products but instead relies on publicly accessible storage providers for data access. Instead Tilebox ingests available metadata as [datasets](/datasets/concepts/datasets) to enable high performance querying and structured access of the data as [xarray.Dataset](/sdks/python/xarray). +Tilebox does not host the actual open data satellite products but instead relies on publicly accessible storage providers for data access. +Tilebox ingests available metadata as [datasets](/datasets/concepts/datasets) to enable high performance querying and structured access of the data as [xarray.Dataset](/sdks/python/xarray). Below is a list of the storage providers currently supported by Tilebox. + + This feature is only available in the Python SDK. + + ## Copernicus Data Space The [Copernicus Data Space](https://dataspace.copernicus.eu/) is an open ecosystem that provides free instant access to data and services from the Copernicus Sentinel missions. Check out the [ASF Open Data datasets](/datasets/open-data#copernicus-data-space) that are available in Tilebox. @@ -25,7 +30,7 @@ from tilebox.datasets import Client from tilebox.storage import CopernicusStorageClient # Creating clients -client = Client(token="YOUR_TILEBOX_API_KEY") +client = Client() datasets = client.datasets() storage_client = CopernicusStorageClient( access_key="YOUR_ACCESS_KEY", @@ -70,7 +75,7 @@ Contents: For cases where only a subset of the available file objects for a product is needed, you may restrict your download to just that subset. First, list available objects using `list_objects`, filter them, and then download using `download_objects`. -For example, a Sentinel-2 L2A product includes many files such as metadata, different bands in various resolutions, masks, and quicklook images. The following example shows how to download only specific files from a Sentinel-2 L2A product. +For example, a Sentinel-2 L2A product includes many files such as metadata, different bands in multiple resolutions, masks, and quicklook images. The following example shows how to download only specific files from a Sentinel-2 L2A product. ```python Python {4, 15} collection = datasets.open_data.copernicus.sentinel2_msi.collections()["S2A_S2MSI2A"] @@ -118,7 +123,7 @@ from tilebox.datasets import Client from tilebox.storage import ASFStorageClient # Creating clients -client = Client(token="YOUR_TILEBOX_API_KEY") +client = Client() datasets = client.datasets() storage_client = ASFStorageClient( user="YOUR_ASF_USER", @@ -191,7 +196,7 @@ from tilebox.datasets import Client from tilebox.storage import UmbraStorageClient # Creating clients -client = Client(token="YOUR_TILEBOX_API_KEY") +client = Client() datasets = client.datasets() storage_client = UmbraStorageClient(cache_directory=Path("./data")) diff --git a/vale/styles/config/vocabularies/docs/accept.txt b/vale/styles/config/vocabularies/docs/accept.txt index 79654fe..2e5547a 100644 --- a/vale/styles/config/vocabularies/docs/accept.txt +++ b/vale/styles/config/vocabularies/docs/accept.txt @@ -1,5 +1,9 @@ # (?i) makes the regex case-insensitive, see https://vale.sh/docs/topics/vocab/#case-sensitivity +Assistance +Custom Dataset +Custom Datasets +tilebox-generate Tilebox (?i)Xarray NumPy @@ -10,6 +14,8 @@ Datalore Colab Tropomi georeferenced +Geospatial +Geopandas Fortran Zarr datetime @@ -17,7 +23,8 @@ accessor Pipenv Opendata APIs -datapoint +(?i)datapoint +(?i)datapoints dataclass subtask subtasks @@ -37,7 +44,7 @@ subtasks subtask parallelizable deserializing -idempotency +(?i)idempotency SDKs SDK coroutines @@ -47,3 +54,12 @@ coroutines (?i)pushbroom rollout (?i)automations +(?i)protobuf +UUIDs +(?i)getters +(?i)ingest +(?i)Timeseries +point in time +bool +boolean +(?i)modis diff --git a/workflows/caches.mdx b/workflows/caches.mdx index 99c102b..87357e4 100644 --- a/workflows/caches.mdx +++ b/workflows/caches.mdx @@ -148,7 +148,7 @@ Caches are isolated per job, meaning that each job's cache data is only accessib ## Storing and Retrieving Data -The job cache can be accessed via the `ExecutionContext` passed to a tasks `execute` function. This [`job_cache`](/api-reference/tilebox.workflows/ExecutionContext.job_cache) object provides methods to handle data storage and retrieval from the cache. The specifics of data storage depend on the chosen cache backend. +The job cache can be accessed via the `ExecutionContext` passed to a tasks `execute` function. This [`job_cache`](/api-reference/python/tilebox.workflows/ExecutionContext.job_cache) object provides methods to handle data storage and retrieval from the cache. The specifics of data storage depend on the chosen cache backend. The cache API is designed to be simple and can handle all types of data, supporting binary data in the form of `bytes`, identified by `str` cache keys. This allows for storing many different data types, such as pickled Python objects, serialized JSON, UTF-8, or binary data. diff --git a/workflows/concepts/clusters.mdx b/workflows/concepts/clusters.mdx index 7fbfc30..dd9d831 100644 --- a/workflows/concepts/clusters.mdx +++ b/workflows/concepts/clusters.mdx @@ -36,6 +36,12 @@ To manage clusters, first instantiate a cluster client using the `clusters` meth client = Client() clusters = client.clusters() ``` + ```go Go + import "github.com/tilebox/tilebox-go/workflows/v1" + + client := workflows.NewClient() + clusterClient := client.Clusters + ``` ### Creating a Cluster @@ -47,11 +53,20 @@ To create a cluster, use the `create` method on the cluster client and provide a cluster = clusters.create("testing") print(cluster) ``` + ```go Go + cluster := client.Clusters.Create("testing") + fmt.Println(cluster) + ``` -```plaintext Output + +```plaintext Python Cluster(slug='testing-CvufcSxcC9SKfe', display_name='testing') ``` +```go Go +&{testing-CvufcSxcC9SKfe testing} +``` + ### Cluster Slug @@ -66,12 +81,29 @@ To list all available clusters, use the `all` method: all_clusters = clusters.all() print(all_clusters) ``` + ```go Go + clusters, err := client.Clusters.List(ctx) + if err != nil { + slog.Error("failed to list clusters", slog.Any("error", err)) + return + } + + for _, cluster := range clusters { + fmt.Println(cluster) + } + ``` -```plaintext Output + +```plaintext Python [Cluster(slug='testing-CvufcSxcC9SKfe', display_name='testing'), Cluster(slug='production-EifhUozDpwAJDL', display_name='Production')] ``` +```go Go +&{testing-CvufcSxcC9SKfe testing} +&{production-EifhUozDpwAJDL Production} +``` + ### Fetching a Specific Cluster @@ -82,11 +114,24 @@ To fetch a specific cluster, use the `find` method and pass the cluster's slug: cluster = clusters.find("testing-CvufcSxcC9SKfe") print(cluster) ``` + ```go Go + cluster, err := client.Clusters.Get(ctx, "testing-CvufcSxcC9SKfe") + if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return + } + fmt.Println(cluster) + ``` -```plaintext Output + +```plaintext Python Cluster(slug='testing-CvufcSxcC9SKfe', display_name='testing') ``` +```go Go +&{testing-CvufcSxcC9SKfe testing} +``` + ### Deleting a Cluster @@ -96,6 +141,9 @@ To delete a cluster, use the `delete` method and pass the cluster's slug: ```python Python clusters.delete("testing-CvufcSxcC9SKfe") ``` + ```go Go + err := client.Clusters.Delete(ctx, "testing-CvufcSxcC9SKfe") + ``` ## Jobs Across Different Clusters @@ -127,10 +175,63 @@ class DummyTask(Task): client = Client() job_client = client.jobs() job = job_client.submit( + "my-job", MultiClusterWorkflow(), cluster="testing-CvufcSxcC9SKfe", ) ``` +```go Go +package main + +import ( + "context" + + "github.com/tilebox/tilebox-go/workflows/v1" + "github.com/tilebox/tilebox-go/workflows/v1/subtask" +) + +type MultiClusterWorkflow struct{} + +func (t *MultiClusterWorkflow) Execute(ctx context.Context) error { + // this submits a task to the same cluster as the one currently executing this task + sameCluster, err := workflows.SubmitSubtask(ctx, &DummyTask{}) + if err != nil { + return err + } + + otherCluster, err := workflows.SubmitSubtask( + ctx, + &DummyTask{}, + // this task runs only on a task runner in the "other-cluster" cluster + subtask.WithClusterSlug("other-cluster-As3dcSb3D9SAdK"), + // dependencies can be specified across clusters + subtask.WithDependencies(sameCluster), + ) + if err != nil { + return err + } + + _ = otherCluster + return nil +} + +type DummyTask struct{} + +func main() { + ctx := context.Background() + client := workflows.NewClient() + + // submit a job to the "testing" cluster + _, _ = client.Jobs.Submit( + ctx, + "my-job", + "testing-CvufcSxcC9SKfe", + []workflows.Task{ + &MultiClusterWorkflow{}, + }, + ) +} +``` This workflow requires at least two task runners to complete. One must be in the "testing" cluster, and the other must be in the "other-cluster" cluster. If no task runners are available in the "other-cluster," the task submitted to that cluster will remain queued until a task runner is available. It won't execute on a task runner in the "testing" cluster, even if the task runner has the `DummyTask` registered. diff --git a/workflows/concepts/jobs.mdx b/workflows/concepts/jobs.mdx index e23b5fc..639d4ff 100644 --- a/workflows/concepts/jobs.mdx +++ b/workflows/concepts/jobs.mdx @@ -22,9 +22,15 @@ from tilebox.workflows import Client client = Client() job_client = client.jobs() ``` +```go Go +import "github.com/tilebox/tilebox-go/workflows/v1" + +client := workflows.NewClient() +jobClient := client.Jobs +``` -After obtaining a job client, submit a job using the [submit](/api-reference/tilebox.workflows/JobClient.submit) method. You need to provide a name for the job, an instance of the root [task](/workflows/concepts/tasks), and a [cluster](/workflows/concepts/clusters) to execute the root task on. +After obtaining a job client, submit a job using the [submit](/api-reference/python/tilebox.workflows/JobClient.submit) method. You need to provide a name for the job, an instance of the root [task](/workflows/concepts/tasks), and a [cluster](/workflows/concepts/clusters) to execute the root task on. ```python Python @@ -34,6 +40,25 @@ from my_workflow import MyTask cluster = "dev-cluster" job = job_client.submit('my-job', MyTask("some", "parameters"), cluster) ``` +```go Go +cluster, err := client.Clusters.Get(ctx, "dev-cluster") +if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return +} + +job, err := client.Jobs.Submit(ctx, "my-job", cluster, + []workflows.Task{ + &MyTask{ + Some: "parameters", + }, + }, +) +if err != nil { + slog.Error("Failed to submit job", slog.Any("error", err)) + return +} +``` Once a job is submitted, it's immediately scheduled for execution. The root task will be picked up and executed as soon as an [eligible task runner](/workflows/concepts/task-runners#task-selection) is available. @@ -49,6 +74,14 @@ from my_workflow import MyFlakyTask cluster = "dev-cluster" job = job_client.submit('my-job', MyFlakyTask(), cluster, max_retries=5) ``` +```go Go +myJob, err := client.Jobs.Submit(ctx, "my-job", cluster, + []workflows.Task{ + &MyFlakyTask{}, + }, + job.WithMaxRetries(5), +) +``` In this example, if `MyFlakyTask` fails, it will be retried up to five times before being marked as failed. @@ -67,6 +100,25 @@ print(job.id) # 018dd029-58ca-74e5-8b58-b4f99d610f9a # Later, in another process or machine, retrieve job info job = job_client.find("018dd029-58ca-74e5-8b58-b4f99d610f9a") ``` +```go Go +myJob, err := client.Jobs.Submit(ctx, "my-job", cluster, + []workflows.Task{ + &helloworld.HelloTask{ + Some: "parameters", + }, + }, +) +if err != nil { + slog.Error("Failed to submit job", slog.Any("error", err)) + return +} + +// 018dd029-58ca-74e5-8b58-b4f99d610f9a +slog.Info("Job submitted", slog.String("job_id", myJob.ID.String())) + +// Later, in another process or machine, retrieve job info +job, err := client.Jobs.Get(ctx, uuid.MustParse("018dd029-58ca-74e5-8b58-b4f99d610f9a")) +``` @@ -78,7 +130,11 @@ job = job_client.find("018dd029-58ca-74e5-8b58-b4f99d610f9a") Visualizing the execution of a job can be helpful. The Tilebox workflow orchestrator tracks all tasks in a job, including [sub-tasks](/workflows/concepts/tasks#task-composition-and-subtasks) and [dependencies](/workflows/concepts/tasks#dependencies). This enables the visualization of the execution of a job as a graph diagram. -`display` is designed for use in an [interactive environment](/sdks/python/sample-notebooks#interactive-environments) such as a Jupyter notebook. In non-interactive environments, use [visualize](/api-reference/tilebox.workflows/JobClient.visualize), which returns the rendered diagram as an SVG string. +`display` is designed for use in an [interactive environment](/sdks/python/sample-notebooks#interactive-environments) such as a Jupyter notebook. In non-interactive environments, use [visualize](/api-reference/python/tilebox.workflows/JobClient.visualize), which returns the rendered diagram as an SVG string. + + + + Visualization isn't supported in Go yet. @@ -148,6 +204,45 @@ class SubTask(Task): job = job_client.submit('custom-display-names', RootTask(3), "dev-cluster") job_client.display(job) ``` +```go Go +type RootTask struct { + NumSubtasks int +} + +func (t *RootTask) Execute(ctx context.Context) error { + err := workflows.SetTaskDisplay(ctx, fmt.Sprintf("Root(%d)", t.NumSubtasks)) + if err != nil { + return fmt.Errorf("failed to set task display: %w", err) + } + + for i := range t.NumSubtasks { + _, err := workflows.SubmitSubtask(ctx, &SubTask{Index: i}) + if err != nil { + return fmt.Errorf("failed to submit subtask: %w", err) + } + } + return nil +} + +type SubTask struct { + Index int +} + +func (t *SubTask) Execute(ctx context.Context) error { + err := workflows.SetTaskDisplay(ctx, fmt.Sprintf("Leaf Nr. %d", t.Index)) + if err != nil { + return fmt.Errorf("failed to set task display: %w", err) + } + return nil +} + +// in main +job, err := client.Jobs.Submit(ctx, "custom-display-names", cluster, + []workflows.Task{&RootTask{ + NumSubtasks: 3, + }}, +) +``` @@ -167,6 +262,18 @@ job = job_client.submit('my-job', MyTask(), "dev-cluster") # After a short while, the job gets canceled job_client.cancel(job) ``` +```go Go +job, err := client.Jobs.Submit(ctx, "my-job", cluster, + []workflows.Task{&MyTask{}}, +) +if err != nil { + slog.Error("Failed to submit job", slog.Any("error", err)) + return +} + +// After a short while, the job gets canceled +err = client.Jobs.Cancel(ctx, job.ID) +``` @@ -209,6 +316,72 @@ class PrintMovieStats(Task): context.current_task.display = response["Title"] print(f"{response['Title']} was released on {response['Released']}") ``` +```go Go +package movie + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + + "github.com/tilebox/tilebox-go/workflows/v1" +) + +type MoviesStats struct { + Titles []string +} + +func (t *MoviesStats) Execute(ctx context.Context) error { + for _, title := range t.Titles { + _, err := workflows.SubmitSubtask(ctx, &PrintMovieStats{Title: title}) + if err != nil { + return fmt.Errorf("failed to submit subtask: %w", err) + } + } + return nil +} + +type Movie struct { + Title *string `json:"Title"` + Released *string `json:"Released"` +} + +type PrintMovieStats struct { + Title string +} + +func (t *PrintMovieStats) Execute(ctx context.Context) error { + apiURL := fmt.Sprintf("http://www.omdbapi.com/?t=%s&apikey=%s", url.QueryEscape(t.Title), "") + response, err := http.Get(apiURL) + if err != nil { + return fmt.Errorf("failed to fetch movie: %w", err) + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + var movie Movie + err = json.Unmarshal(body, &movie) + if err != nil { + return fmt.Errorf("failed to unmarshal response: %w", err) + } + + // set the display name of the task to the title of the movie: + err := workflows.SetTaskDisplay(ctx, *movie.Title) + if err != nil { + return fmt.Errorf("failed to set task display: %w", err) + } + + fmt.Printf("%s was released on %s\n", *movie.Title, *movie.Released) + return nil +} +``` Submitting the workflow as a job reveals a bug in the `PrintMovieStats` task. @@ -224,6 +397,18 @@ job = job_client.submit('movies-stats', MoviesStats([ job_client.display(job) ``` +```go Go +job, err := client.Jobs.Submit(ctx, "movies-stats", cluster, + []workflows.Task{&MoviesStats{ + Titles: []string{ + "The Matrix", + "Shrek 2", + "Tilebox - The Movie", + "The Avengers", + }, + }}, +) +``` @@ -259,6 +444,47 @@ class PrintMovieStats(Task): context.current_task.display = f"NotFound: {self.title}" print(f"Could not find the release date for {self.title}") ``` +```go Go +type PrintMovieStats struct { + Title string +} + +func (t *PrintMovieStats) Execute(ctx context.Context) error { + url2 := fmt.Sprintf("http://www.omdbapi.com/?t=%s&apikey=%s", url.QueryEscape(t.Title), "") + response, err := http.Get(url2) + if err != nil { + return fmt.Errorf("failed to fetch movie: %w", err) + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + var movie Movie + err = json.Unmarshal(body, &movie) + if err != nil { + return fmt.Errorf("failed to unmarshal response: %w", err) + } + + if movie.Released != nil && movie.Title != nil { + err := workflows.SetTaskDisplay(ctx, *movie.Title) + if err != nil { + return fmt.Errorf("failed to set task display: %w", err) + } + fmt.Printf("%s was released on %s\n", *movie.Title, *movie.Released) + } else { + err := workflows.SetTaskDisplay(ctx, fmt.Sprintf("NotFound: %s", t.Title)) + if err != nil { + return fmt.Errorf("failed to set task display: %w", err) + } + fmt.Printf("Could not find the release date for %s\n", t.Title) + } + + return nil +} +``` With this fix, and after redeploying the task runners with the updated `PrintMovieStats` implementation, you can retry the job: @@ -268,6 +494,9 @@ With this fix, and after redeploying the task runners with the updated `PrintMov job_client.retry(job) job_client.display(job) ``` +```go Go +err = client.Jobs.Retry(ctx, job.ID) +``` diff --git a/workflows/concepts/task-runners.mdx b/workflows/concepts/task-runners.mdx index da79c80..37de22c 100644 --- a/workflows/concepts/task-runners.mdx +++ b/workflows/concepts/task-runners.mdx @@ -50,13 +50,55 @@ def main(): if __name__ == "__main__": main() ``` +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/workflows/v1" + // your own workflow: + "github.com/my_org/myworkflow" +) + +func main() { + // 1. connect to the Tilebox Workflows API + client := workflows.NewClient() + + // 2. select a cluster to join + runner, err := client.NewTaskRunner("dev-cluster") + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + // 3. register tasks + err = runner.RegisterTasks( + &myworkflow.MyTask{}, + &myworkflow.OtherTask{}, + ) + if err != nil { + slog.Error("failed to register task", slog.Any("error", err)) + return + } + + // 4. listen for new tasks to execute + runner.Run(context.Background()) +} +``` -To start the task runner locally, run it as a Python script: +To start the task runner locally, run it as a script: -```bash + +```bash Python > python task_runner.py ``` +```bash Go +> go run . +``` + ## Task Selection @@ -146,12 +188,56 @@ Here's an example of a distributed workflow: def execute(self, context: ExecutionContext) -> None: pass ``` +```go Go +package distributed + +import ( + "context" + "fmt" + "github.com/tilebox/tilebox-go/workflows/v1" + "github.com/tilebox/tilebox-go/workflows/v1/subtask" +) + +type DistributedWorkflow struct{} + +func (t *DistributedWorkflow) Execute(ctx context.Context) error { + downloadTask, err := workflows.SubmitSubtask(ctx, &DownloadData{}) + if err != nil { + return fmt.Errorf("failed to submit download subtask: %w", err) + } + + _, err = workflows.SubmitSubtask(ctx, &ProcessData{}, subtask.WithDependencies(downloadTask)) + if err != nil { + return fmt.Errorf("failed to submit process subtask: %w", err) + } + return nil +} + +// DownloadData Download a dataset and store it in a shared internal bucket. +// Requires a good network connection for high download bandwidth. +type DownloadData struct{} + +func (t *DownloadData) Execute(ctx context.Context) error { + return nil +} + +// ProcessData Perform compute-intensive processing of a dataset. +// The dataset must be available in an internal bucket. +// Requires access to a GPU for optimal performance. +type ProcessData struct{} + +func (t *ProcessData) Execute(ctx context.Context) error { + return nil +} +``` To achieve distributed execution for this workflow, no single task runner capable of executing all three of the tasks is set up. Instead, two task runners, each capable of executing one of the tasks are set up: one in a high-speed network environment and the other with GPU access. When the distributed workflow runs, the first task runner picks up the `DownloadData` task, while the second picks up the `ProcessData` task. The `DistributedWorkflow` does not require specific hardware, so it can be registered with both runners and executed by either one. - -```python download_task_runner.py + + + +```python Python from tilebox.workflows import Client client = Client() @@ -161,7 +247,43 @@ high_network_speed_runner = client.runner( ) high_network_speed_runner.run_forever() ``` -```python gpu_task_runner.py +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/workflows/v1" +) + +func main() { + client := workflows.NewClient() + + highNetworkSpeedRunner, err := client.NewTaskRunner("dev-cluster") + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = highNetworkSpeedRunner.RegisterTasks( + &DownloadData{}, + &DistributedWorkflow{}, + ) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + highNetworkSpeedRunner.RunForever(context.Background()) +} +``` + + + + + +```python Python from tilebox.workflows import Client client = Client() @@ -171,7 +293,40 @@ gpu_runner = client.runner( ) gpu_runner.run_forever() ``` - +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/workflows/v1" +) + +func main() { + client := workflows.NewClient() + + gpuRunner, err := client.NewTaskRunner("dev-cluster") + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = gpuRunner.RegisterTasks( + &ProcessData{}, + &DistributedWorkflow{}, + ) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + gpuRunner.RunForever(context.Background()) +} +``` + + + Now, both `download_task_runner.py` and `gpu_task_runner.py` are started, in parallel, on different machines with the required hardware for each. When `DistributedWorkflow` is submitted, it executes on one of the two runners, and it's submitted sub-tasks are handled by the appropriate runner. diff --git a/workflows/concepts/tasks.mdx b/workflows/concepts/tasks.mdx index 84a095c..a8d9867 100644 --- a/workflows/concepts/tasks.mdx +++ b/workflows/concepts/tasks.mdx @@ -18,7 +18,15 @@ from tilebox.workflows import Task, ExecutionContext class MyFirstTask(Task): def execute(self, context: ExecutionContext): - print(f"Hello World!") + print("Hello World!") +``` +```go Go +type MyFirstTask struct{} + +func (t *MyFirstTask) Execute(ctx context.Context) error { + slog.Info("Hello World!") + return nil +} ``` @@ -32,7 +40,7 @@ This example demonstrates a simple task that prints "Hello World!" to the consol The `execute` method is the entry point for executing the task. This is where the task's logic is defined. It's invoked by a [task runner](/workflows/concepts/task-runners) when the task runs and performs the task's operation. - The `context` argument is an `ExecutionContext` instance that provides access to an [API for submitting new tasks](/api-reference/tilebox.workflows/ExecutionContext.submit_subtask) as part of the same job and features like [shared caching](/api-reference/tilebox.workflows/ExecutionContext.job_cache). + The `context` argument is an `ExecutionContext` instance that provides access to an [API for submitting new tasks](/api-reference/python/tilebox.workflows/ExecutionContext.submit_subtask) as part of the same job and features like [shared caching](/api-reference/python/tilebox.workflows/ExecutionContext.job_cache). @@ -46,14 +54,18 @@ This example demonstrates a simple task that prints "Hello World!" to the consol Tasks often require input parameters to operate. These inputs can range from simple values to complex data structures. By inheriting from the `Task` class, the task is treated as a Python `dataclass`, allowing input parameters to be defined as class attributes. - Tasks must be **serializable to JSON** because they may be distributed across a cluster of [task runners](/workflows/concepts/task-runners). + Tasks must be **serializable to JSON or to protobuf** because they may be distributed across a cluster of [task runners](/workflows/concepts/task-runners). + + + + In Go, task parameters must be exported fields of the task struct (starting with an uppercase letter), otherwise they will not be serialized to JSON. Supported types for input parameters include: - Basic types such as `str`, `int`, `float`, `bool` - Lists and dictionaries of basic types -- Nested data classes that are also JSON-serializable +- Nested data classes that are also JSON-serializable or protobuf-serializable ```python Python @@ -67,6 +79,24 @@ Supported types for input parameters include: task = ParametrizableTask("Hello", 3, {"key": "value"}) ``` + ```go Go + type ParametrizableTask struct { + Message string + Number int + Data map[string]string + } + + func (t *ParametrizableTask) Execute(context.Context) error { + slog.Info(strings.Repeat(t.Message, t.Number)) + return nil + } + + task := &ParametrizableTask{ + message: "Hello", + number: 3, + data: map[string]string{"key": "value"}, + } + ``` ## Task Composition and subtasks @@ -92,6 +122,36 @@ class ChildTask(Task): # which will result in 5 ChildTasks being submitted and executed as well task = ParentTask(5) ``` +```go Go +type ParentTask struct { + NumSubtasks int +} + +func (t *ParentTask) Execute(ctx context.Context) error { + for i := range t.NumSubtasks { + _, err := workflows.SubmitSubtask(ctx, &ChildTask{Index: i}) + if err != nil { + return err + } + } + + return nil +} + +type ChildTask struct { + Index int +} + +func (t *ChildTask) Execute(context.Context) error { + slog.Info("Executing ChildTask", slog.Int("index", t.Index)) + + return nil +} + +// after submitting this task, a task runner may pick it up and execute it +// which will result in 5 ChildTasks being submitted and executed as well +task := &ParentTask{numSubtasks: 5} +``` In this example, a `ParentTask` submits `ChildTask` tasks as subtasks. The number of subtasks to be submitted is based on the `num_subtasks` attribute of the `ParentTask`. The `submit_subtask` method takes an instance of a task as its argument, meaning the task to be submitted must be instantiated with concrete parameters first. @@ -127,6 +187,87 @@ class DownloadImage(Task): with file.open("wb") as file: file.write(response.content) ``` +```go Go +package dogs + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + + "github.com/tilebox/tilebox-go/workflows/v1" +) + +type DogImage struct { + ID string `json:"id"` + URL string `json:"url"` + Width *int `json:"width"` + Height *int `json:"height"` +} + +type DownloadRandomDogImages struct { + NumImages int +} + +func (t *DownloadRandomDogImages) Execute(ctx context.Context) error { + url := fmt.Sprintf("https://api.thedogapi.com/v1/images/search?limit=%d", t.NumImages) + response, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed to download images: %w", err) + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + var dogImages []DogImage + err = json.Unmarshal(body, &dogImages) + if err != nil { + return err + } + + for _, dogImage := range dogImages { + _, err := workflows.SubmitSubtask(ctx, &DownloadImage{URL: dogImage.URL}) + if err != nil { + return err + } + } + return nil +} + +type DownloadImage struct { + URL string +} + +func (t *DownloadImage) Execute(context.Context) error { + response, err := http.Get(t.URL) + if err != nil { + return fmt.Errorf("failed to download image: %w", err) + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + err = os.MkdirAll("dogs", 0o755) + if err != nil { + return fmt.Errorf("failed to create dogs directory: %w", err) + } + + elements := strings.Split(t.URL, "/") + file := fmt.Sprintf("dogs/%s", elements[len(elements)-1]) + + return os.WriteFile(file, body, 0o600) +} +``` This example consists of the following tasks: @@ -160,6 +301,24 @@ job = jobs.submit( jobs.display(job) ``` +```go Go +ctx := context.Background() +client := workflows.NewClient() + +job, err := client.Jobs.Submit(ctx, "download-dog-images", "dev-cluster", + []workflows.Task{ + &helloworld.DownloadRandomDogImages{ + NumImages: 5, + }, + }, +) +if err != nil { + slog.Error("Failed to submit job", slog.Any("error", err)) + return +} + +// now our deployed task runners will pick up the task and execute it +``` @@ -203,6 +362,22 @@ For example, the `RecursiveTask` below is a valid task that submits smaller inst if self.num >= 2: context.submit_subtask(RecursiveTask(self.num // 2)) ``` + ```go Go + type RecursiveTask struct { + Num int + } + + func (t *RecursiveTask) Execute(ctx context.Context) error { + slog.Info("Executing RecursiveTask", slog.Int("num", t.Num)) + if t.Num >= 2 { + _, err := workflows.SubmitSubtask(ctx, &RecursiveTask{Num: t.Num / 2}) + if err != nil { + return err + } + } + return nil + } + ``` ### Recursive subtask example @@ -214,22 +389,68 @@ To improve this, recursive subtask submission decomposes a `DownloadRandomDogIma An implementation of this recursive submission may look like this: - ```python Python - class DownloadRandomDogImages(Task): - num_images: int +```python Python +class DownloadRandomDogImages(Task): + num_images: int - def execute(self, context: ExecutionContext) -> None: - if self.num_images > 4: - half = self.num_images // 2 - remaining = self.num_images - half # account for odd numbers - context.submit_subtask(DownloadRandomDogImages(half)) - context.submit_subtask(DownloadRandomDogImages(remaining)) - else: - url = f"https://api.thedogapi.com/v1/images/search?limit={self.num_images}" - response = httpx.get(url) - for dog_image in response.json()[:self.num_images]: - context.submit_subtask(DownloadImage(dog_image["url"])) - ``` + def execute(self, context: ExecutionContext) -> None: + if self.num_images > 4: + half = self.num_images // 2 + remaining = self.num_images - half # account for odd numbers + context.submit_subtask(DownloadRandomDogImages(half)) + context.submit_subtask(DownloadRandomDogImages(remaining)) + else: + url = f"https://api.thedogapi.com/v1/images/search?limit={self.num_images}" + response = httpx.get(url) + for dog_image in response.json()[:self.num_images]: + context.submit_subtask(DownloadImage(dog_image["url"])) +``` +```go Go +type DownloadRandomDogImages struct { + NumImages int +} + +func (t *DownloadRandomDogImages) Execute(ctx context.Context) error { + if t.NumImages > 4 { + half := t.NumImages / 2 + remaining := t.NumImages - half // account for odd numbers + _, err := workflows.SubmitSubtask(ctx, &DownloadRandomDogImages{NumImages: half}) + if err != nil { + return err + } + _, err = workflows.SubmitSubtask(ctx, &DownloadRandomDogImages{NumImages: remaining}) + if err != nil { + return err + } + } else { + url := fmt.Sprintf("https://api.thedogapi.com/v1/images/search?limit=%d", t.NumImages) + response, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed to download images: %w", err) + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + var dogImages []DogImage + err = json.Unmarshal(body, &dogImages) + if err != nil { + return err + } + + for _, dogImage := range dogImages { + _, err := workflows.SubmitSubtask(ctx, &DownloadImage{URL: dogImage.URL}) + if err != nil { + return err + } + } + } + return nil +} +``` With this implementation, downloading a large number of images (for example, 9) results in the following tasks being executed: @@ -274,11 +495,44 @@ class FlakyTask(Task): if random.random() < 0.1: raise Exception("FlakyTask failed randomly") ``` +```go Go +package flaky + +import ( + "context" + "errors" + "log/slog" + "math/rand/v2" + + "github.com/tilebox/tilebox-go/workflows/v1" + "github.com/tilebox/tilebox-go/workflows/v1/subtask" +) + +type RootTask struct{} + +func (t *RootTask) Execute(ctx context.Context) error { + _, err := workflows.SubmitSubtask(ctx, &FlakyTask{}, + subtask.WithMaxRetries(5), + ) + return err +} + +type FlakyTask struct{} + +func (t *FlakyTask) Execute(context.Context) error { + slog.Info("Executing FlakyTask") + + if rand.Float64() < 0.1 { + return errors.New("FlakyTask failed randomly") + } + return nil +} +``` ## Dependencies -Tasks often rely on other tasks. For example, a task that processes data might depend on a task that fetches that data. **Tasks can express their dependencies on other tasks** by using the `depends_on` argument of the [submit_subtask](/api-reference/tilebox.workflows/ExecutionContext.submit_subtask) method. This means that a dependent task will only execute after the task it relies on has successfully completed. +Tasks often rely on other tasks. For example, a task that processes data might depend on a task that fetches that data. **Tasks can express their dependencies on other tasks** by using the `depends_on` argument of the [`submit_subtask`](/api-reference/python/tilebox.workflows/ExecutionContext.submit_subtask) method. This means that a dependent task will only execute after the task it relies on has successfully completed. The `depends_on` argument accepts a list of tasks, enabling a task to depend on multiple other tasks. @@ -308,6 +562,48 @@ class PrintTask(Task): def execute(self, context: ExecutionContext) -> None: print(self.message) ``` +```go Go +type RootTask struct{} + +func (t *RootTask) Execute(ctx context.Context) error { + firstTask, err := workflows.SubmitSubtask( + ctx, + &PrintTask{Message: "Executing first"}, + ) + if err != nil { + return err + } + + secondTask, err := workflows.SubmitSubtask( + ctx, + &PrintTask{Message: "Executing second"}, + subtask.WithDependencies(firstTask), + ) + if err != nil { + return err + } + + _, err = workflows.SubmitSubtask( + ctx, + &PrintTask{Message: "Executing last"}, + subtask.WithDependencies(secondTask), + ) + if err != nil { + return err + } + + return nil +} + +type PrintTask struct { + Message string +} + +func (t *PrintTask) Execute(context.Context) error { + slog.Info("PrintTask", slog.String("message", t.Message)) + return nil +} +``` The `RootTask` submits three `PrintTask` tasks as subtasks. These tasks depend on each other, meaning the second task executes only after the first task has successfully completed, and the third only executes after the second completes. The tasks are executed sequentially. @@ -366,6 +662,155 @@ A practical example is a workflow that fetches news articles from an API and pro "dev-cluster" ) ``` +```go Go +package news + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "os" + "time" + + "github.com/tilebox/tilebox-go/workflows/v1" + "github.com/tilebox/tilebox-go/workflows/v1/subtask" +) + +const newsAPIKey = "YOUR_API_KEY" + +type NewsWorkflow struct { + Category string + MaxArticles int +} + +func (t *NewsWorkflow) Execute(ctx context.Context) error { + fetchTask, err := workflows.SubmitSubtask(ctx, &FetchNews{ + Category: t.Category, + MaxArticles: t.MaxArticles, + }) + if err != nil { + return err + } + + _, err = workflows.SubmitSubtask(ctx, &PrintHeadlines{}, subtask.WithDependencies(fetchTask)) + if err != nil { + return err + } + + _, err = workflows.SubmitSubtask(ctx, &MostFrequentAuthors{}, subtask.WithDependencies(fetchTask)) + if err != nil { + return err + } + + return nil +} + +type News struct { + Status string `json:"status"` + TotalResults int `json:"totalResults"` + Articles []struct { + Source struct { + ID *string `json:"id"` + Name string `json:"name"` + } `json:"source"` + Author *string `json:"author"` + Title string `json:"title"` + Description *string `json:"description"` + URL string `json:"url"` + URLToImage *string `json:"urlToImage"` + PublishedAt time.Time `json:"publishedAt"` + Content *string `json:"content"` + } `json:"articles"` +} + +type FetchNews struct { + Category string + MaxArticles int +} + +func (t *FetchNews) Execute(context.Context) error { + url := fmt.Sprintf("https://newsapi.org/v2/top-headlines?category=%s&pageSize=%d&country=us&apiKey=%s", t.Category, t.MaxArticles, newsAPIKey) + response, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed to download news: %w", err) + } + + defer response.Body.Close() + body, err := io.ReadAll(response.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + // check out our documentation page on caches to learn + // about a better way of passing data between tasks + return os.WriteFile("news.json", body, 0o600) +} + +type PrintHeadlines struct{} + +func (t *PrintHeadlines) Execute(context.Context) error { + newsBytes, err := os.ReadFile("news.json") + if err != nil { + return fmt.Errorf("failed to read news: %w", err) + } + + var news News + err = json.Unmarshal(newsBytes, &news) + if err != nil { + return fmt.Errorf("failed to unmarshal news: %w", err) + } + + for _, article := range news.Articles { + slog.Info("Article", slog.Time("published_at", article.PublishedAt), slog.String("title", article.Title)) + } + + return nil +} + +type MostFrequentAuthors struct{} + +func (t *MostFrequentAuthors) Execute(context.Context) error { + newsBytes, err := os.ReadFile("news.json") + if err != nil { + return fmt.Errorf("failed to read news: %w", err) + } + + var news News + err = json.Unmarshal(newsBytes, &news) + if err != nil { + return fmt.Errorf("failed to unmarshal news: %w", err) + } + + authors := make(map[string]int) + for _, article := range news.Articles { + if article.Author == nil { + continue + } + authors[*article.Author]++ + } + + for author, count := range authors { + slog.Info("Author", slog.String("author", author), slog.Int("count", count)) + } + + return nil +} + +// in main now submit a job, and then visualize it +/* +job, err := client.Jobs.Submit(ctx, "process-news", "dev-cluster", + []workflows.Task{ + &NewsWorkflow{ + Category: "science", + MaxArticles: 5, + }, + }, +) +*/ +``` ```plaintext Output @@ -414,27 +859,48 @@ If unspecified, the identifier of a task defaults to the class name. For instanc To address this, Tilebox Workflows offers a way to explicitly specify the identifier of a task. This is done by overriding the `identifier` method of the `Task` class. This method should return a unique string identifying the task. This decouples the task's identifier from its class name, allowing you to change the identifier without renaming the class. It also allows tasks with the same class name to have different identifiers. The `identifier` method can also specify a version number for the task—see the section on [semantic versioning](#semantic-versioning) below for more details. - ```python Python - class MyTask(Task): - def execute(self, context: ExecutionContext) -> None: - pass +```python Python +class MyTask(Task): + def execute(self, context: ExecutionContext) -> None: + pass - # MyTask has the identifier "MyTask" and the default version of "v0.0" +# MyTask has the identifier "MyTask" and the default version of "v0.0" - class MyTask2(Task): - @staticmethod - def identifier() -> tuple[str, str]: - return "tilebox.com/example_workflow/MyTask", "v1.0" +class MyTask2(Task): + @staticmethod + def identifier() -> tuple[str, str]: + return "tilebox.com/example_workflow/MyTask", "v1.0" - def execute(self, context: ExecutionContext) -> None: - pass + def execute(self, context: ExecutionContext) -> None: + pass - # MyTask2 has the identifier "tilebox.com/example_workflow/MyTask" and the version "v1.0" - ``` +# MyTask2 has the identifier "tilebox.com/example_workflow/MyTask" and the version "v1.0" +``` +```go Go +type MyTask struct{} + +func (t *MyTask) Execute(context.Context) error { + return nil +} + +// MyTask has the identifier "MyTask" and the default version of "v0.0" + +type MyTask2 struct{} + +func (t *MyTask2) Identifier() workflows.TaskIdentifier { + return workflows.NewTaskIdentifier("tilebox.com/example_workflow/MyTask", "v1.0") +} + +func (t *MyTask2) Execute(context.Context) error { + return nil +} + +// MyTask2 has the identifier "tilebox.com/example_workflow/MyTask" and the version "v1.0" +``` - The `identifier` method must be defined as either a `classmethod` or a `staticmethod`, meaning it can be called without instantiating the class. + In python, the `identifier` method must be defined as either a `classmethod` or a `staticmethod`, meaning it can be called without instantiating the class. ## Semantic Versioning @@ -448,15 +914,26 @@ You assign a version number by overriding the `identifier` method of the task cl For example, this task has the identifier `"tilebox.com/example_workflow/MyTask"` and the version `"v1.3"`: - ```python Python - class MyTask(Task): - @staticmethod - def identifier() -> tuple[str, str]: - return "tilebox.com/example_workflow/MyTask", "v1.3" +```python Python +class MyTask(Task): + @staticmethod + def identifier() -> tuple[str, str]: + return "tilebox.com/example_workflow/MyTask", "v1.3" - def execute(self, context: ExecutionContext) -> None: - pass - ``` + def execute(self, context: ExecutionContext) -> None: + pass +``` +```go Go +type MyTask struct{} + +func (t *MyTask) Identifier() workflows.TaskIdentifier { + return workflows.NewTaskIdentifier("tilebox.com/example_workflow/MyTask", "v1.3") +} + +func (t *MyTask) Execute(context.Context) error { + return nil +} +``` When a task is submitted as part of a job, the version from which it's submitted is recorded and may differ from the version on the task runner executing the task. diff --git a/workflows/near-real-time/automations.mdx b/workflows/near-real-time/automations.mdx index 7a70484..52c39c9 100644 --- a/workflows/near-real-time/automations.mdx +++ b/workflows/near-real-time/automations.mdx @@ -4,6 +4,10 @@ description: Process data in near-real-time by triggering jobs based on external icon: repeat --- + + This feature is only available in the Python SDK. + + ## Introduction Tilebox Workflows can execute jobs in two ways: a one-time execution triggered by a user, typically a batch processing, and near-real-time execution based on specific external events. @@ -31,6 +35,10 @@ To create a trigger, define a special task that serves as a prototype. In respon Each automation has a [task identifier](/workflows/concepts/tasks#task-identifiers), a [version](/workflows/concepts/tasks#semantic-versioning), and [input parameters](/workflows/concepts/tasks#input-parameters), just like regular tasks. Automations also automatically provide a special `trigger` attribute that contains information about the event that initiated the task's execution. + + Go doesn't support registering automations yet, please use python or the console instead. + + ## Automation Client The Tilebox Workflows client includes a sub-client for managing automations. You can create this sub-client by calling the `automations` method on the main client instance. diff --git a/workflows/near-real-time/cron.mdx b/workflows/near-real-time/cron.mdx index 493437c..8f07207 100644 --- a/workflows/near-real-time/cron.mdx +++ b/workflows/near-real-time/cron.mdx @@ -4,6 +4,10 @@ description: Trigger jobs based on a Cron schedule. icon: clock --- + + This feature is only available in the Python SDK. + + ## Creating Cron tasks Cron tasks run repeatedly on a specified [cron](https://en.wikipedia.org/wiki/Cron) schedule. diff --git a/workflows/near-real-time/storage-events.mdx b/workflows/near-real-time/storage-events.mdx index 7b2759b..b227bee 100644 --- a/workflows/near-real-time/storage-events.mdx +++ b/workflows/near-real-time/storage-events.mdx @@ -4,6 +4,10 @@ description: Trigger jobs after objects are created or modified in a storage loc icon: right-to-line --- + + This feature is only available in the Python SDK. + + ## Creating a Storage Event Task Storage Event Tasks are automations triggered when objects are created or modified in a [storage location](#storage-locations). diff --git a/workflows/observability/logging.mdx b/workflows/observability/logging.mdx index 847b442..8ee7984 100644 --- a/workflows/observability/logging.mdx +++ b/workflows/observability/logging.mdx @@ -21,6 +21,7 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e To configure logging with Axiom, you first need to create a [Axiom Dataset](https://axiom.co/docs/reference/datasets) to export your workflow logs to. You will also need an [Axiom API key](https://axiom.co/docs/reference/tokens) with the necessary write permissions for your Axiom dataset. + ```python Python from tilebox.workflows import Client, Task, ExecutionContext from tilebox.workflows.observability.logging import configure_otel_logging_axiom @@ -45,6 +46,65 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e if __name__ == "__main__": main() ``` +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/examples/workflows/axiom" + "github.com/tilebox/tilebox-go/observability" + "github.com/tilebox/tilebox-go/observability/logger" + "github.com/tilebox/tilebox-go/workflows/v1" +) + +// specify a service name and version to identify the instrumenting application in traces and logs +var service = &observability.Service{Name: "task-runner", Version: "dev"} + +func main() { + ctx := context.Background() + + // Setup OpenTelemetry logging and slog + // It uses AXIOM_API_KEY and AXIOM_LOGS_DATASET from the environment + axiomHandler, shutdownLogger, err := logger.NewAxiomHandler(ctx, service, + logger.WithLevel(slog.LevelInfo), // export logs at info level and above as OTEL logs + ) + defer shutdownLogger(ctx) + if err != nil { + slog.Error("failed to set up axiom log handler", slog.Any("error", err)) + return + } + tileboxLogger := logger.New( // initialize a slog.Logger + axiomHandler, // export logs to the Axiom handler + logger.NewConsoleHandler(logger.WithLevel(slog.LevelWarn)), // and additionally, export WARN and ERROR logs to stdout + ) + slog.SetDefault(tileboxLogger) // all future slog calls will be forwarded to the tilebox logger + + client := workflows.NewClient() + + cluster, err := client.Clusters.Get(ctx, "dev-cluster") + if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return + } + + taskRunner, err := client.NewTaskRunner(cluster) + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = taskRunner.RegisterTasks(&MyTask{}) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + taskRunner.RunForever(ctx) +} +``` + Setting the environment variables `AXIOM_API_KEY` and `AXIOM_LOGS_DATASET` allows you to omit these arguments in the `configure_otel_logging_axiom` function. @@ -54,6 +114,7 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e If you are using another OpenTelemetry-compatible backend besides Axiom, such as OpenTelemetry Collector or Jaeger, you can configure logging by specifying the URL endpoint to export log messages to. + ```python Python from tilebox.workflows import Client from tilebox.workflows.observability.logging import configure_otel_logging @@ -79,6 +140,71 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e if __name__ == "__main__": main() ``` +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/examples/workflows/opentelemetry" + "github.com/tilebox/tilebox-go/observability" + "github.com/tilebox/tilebox-go/observability/logger" + "github.com/tilebox/tilebox-go/workflows/v1" +) + +// specify a service name and version to identify the instrumenting application in traces and logs +var service = &observability.Service{Name: "task-runner", Version: "dev"} + +func main() { + ctx := context.Background() + + endpoint := "http://localhost:4318" + headers := map[string]string{ + "Authorization": "Bearer ", + } + + // Setup an OpenTelemetry log handler, exporting logs to an OTEL compatible log endpoint + otelHandler, shutdownLogger, err := logger.NewOtelHandler(ctx, service, + logger.WithEndpointURL(endpoint), + logger.WithHeaders(headers), + logger.WithLevel(slog.LevelInfo), // export logs at info level and above as OTEL logs + ) + defer shutdownLogger(ctx) + if err != nil { + slog.Error("failed to set up otel log handler", slog.Any("error", err)) + return + } + tileboxLogger := logger.New( // initialize a slog.Logger + otelHandler, // export logs to the OTEL handler + logger.NewConsoleHandler(logger.WithLevel(slog.LevelWarn)), // and additionally, export WARN and ERROR logs to stdout + ) + slog.SetDefault(tileboxLogger) // all future slog calls will be forwarded to the tilebox logger + + client := workflows.NewClient() + + cluster, err := client.Clusters.Get(ctx, "dev-cluster") + if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return + } + + taskRunner, err := client.NewTaskRunner(cluster) + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = taskRunner.RegisterTasks(&MyTask{}) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + taskRunner.RunForever(ctx) +} +``` + If you set the environment variable `OTEL_LOGS_ENDPOINT`, you can omit that argument in the `configure_otel_logging` function. @@ -87,6 +213,7 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e To log messages to the standard console output, use the `configure_console_logging` function. + ```python Python from tilebox.workflows import Client from tilebox.workflows.observability.logging import configure_console_logging @@ -106,6 +233,48 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e if __name__ == "__main__": main() ``` +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/examples/workflows/opentelemetry" + "github.com/tilebox/tilebox-go/observability/logger" + "github.com/tilebox/tilebox-go/workflows/v1" +) + +func main() { + ctx := context.Background() + + tileboxLogger := logger.New(logger.NewConsoleHandler(logger.WithLevel(slog.LevelWarn))) + slog.SetDefault(tileboxLogger) // all future slog calls will be forwarded to the tilebox logger + + client := workflows.NewClient() + + cluster, err := client.Clusters.Get(ctx, "dev-cluster") + if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return + } + + taskRunner, err := client.NewTaskRunner(cluster) + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = taskRunner.RegisterTasks(&MyTask{}) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + taskRunner.RunForever(ctx) +} +``` + The console logging backend is not recommended for production use. Log messages will be emitted to the standard output of each task runner rather than a centralized logging system. It is intended for local development and testing of workflows. @@ -119,6 +288,7 @@ The Tilebox workflow SDKs include support for exporting OpenTelemetry logs. To e Use the logger provided by the Tilebox SDK to emit log messages from your tasks. You can then use it to send log messages to the [configured logging backend](#configure-logging). Log messages emitted within a task's `execute` method are also automatically recorded as span events for the current [job trace](/workflows/observability/tracing). + ```python Python import logging from tilebox.workflows import Task, ExecutionContext @@ -131,10 +301,27 @@ class MyTask(Task): # emit a log message to the configured OpenTelemetry backend logger.info("Hello world from configured logger!") ``` +```go Go +package tasks + +import ( + "context" + "log/slog" +) + +type MyTask struct{} + +func (t *MyTask) Execute(context.Context) error { + // emit a log message to the configured OpenTelemetry backend + slog.Info("Hello world from configured logger!") + return nil +} +``` + ## Logging task runner internals -Tilebox task runners also internally use a logger. By default, it's set to the WARNING level, but you can change it by explicitly configuring a logger for the workflows client when constructing the task runner. +In python, Tilebox task runners also internally use a logger. By default, it's set to the WARNING level, but you can change it by explicitly configuring a logger for the workflows client when constructing the task runner. ```python Python from tilebox.workflows import Client diff --git a/workflows/observability/tracing.mdx b/workflows/observability/tracing.mdx index bd30a29..5d02bef 100644 --- a/workflows/observability/tracing.mdx +++ b/workflows/observability/tracing.mdx @@ -31,6 +31,7 @@ The Tilebox workflow SDKs have built-in support for exporting OpenTelemetry trac To configure tracing with Axiom, you first need to create a [Axiom Dataset](https://axiom.co/docs/reference/datasets) to export your workflow traces to. You will also need an [Axiom API key](https://axiom.co/docs/reference/tokens) with the necessary write permissions for your Axiom dataset. + ```python Python from tilebox.workflows import Client from tilebox.workflows.observability.tracing import configure_otel_tracing_axiom @@ -55,6 +56,60 @@ The Tilebox workflow SDKs have built-in support for exporting OpenTelemetry trac if __name__ == "__main__": main() ``` +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/examples/workflows/axiom" + "github.com/tilebox/tilebox-go/observability" + "github.com/tilebox/tilebox-go/observability/tracer" + "github.com/tilebox/tilebox-go/workflows/v1" + "go.opentelemetry.io/otel" +) + +// specify a service name and version to identify the instrumenting application in traces and logs +var service = &observability.Service{Name: "task-runner", Version: "dev"} + +func main() { + ctx := context.Background() + + // Setup an OpenTelemetry trace span processor, exporting traces and spans to Axiom + // It uses AXIOM_API_KEY and AXIOM_TRACES_DATASET from the environment + tileboxTracerProvider, shutdown, err := tracer.NewAxiomProvider(ctx, service) + defer shutdown(ctx) + if err != nil { + slog.Error("failed to set up axiom tracer provider", slog.Any("error", err)) + return + } + otel.SetTracerProvider(tileboxTracerProvider) // set the tilebox tracer provider as the global OTEL tracer provider + + client := workflows.NewClient() + + cluster, err := client.Clusters.Get(ctx, "dev-cluster") + if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return + } + + taskRunner, err := client.NewTaskRunner(cluster) + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = taskRunner.RegisterTasks(&MyTask{}) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + taskRunner.RunForever(ctx) +} +``` + Set the environment variables `AXIOM_API_KEY` and `AXIOM_TRACES_DATASET` to omit those arguments @@ -65,6 +120,7 @@ The Tilebox workflow SDKs have built-in support for exporting OpenTelemetry trac If you are using another OpenTelemetry-compatible backend besides Axiom, like OpenTelemetry Collector or Jaeger, you can configure tracing by specifying the URL endpoint to export traces to. + ```python Python from tilebox.workflows import Client from tilebox.workflows.observability.tracing import configure_otel_tracing @@ -90,6 +146,67 @@ The Tilebox workflow SDKs have built-in support for exporting OpenTelemetry trac if __name__ == "__main__": main() ``` +```go Go +package main + +import ( + "context" + "log/slog" + + "github.com/tilebox/tilebox-go/examples/workflows/opentelemetry" + "github.com/tilebox/tilebox-go/observability" + "github.com/tilebox/tilebox-go/observability/tracer" + "github.com/tilebox/tilebox-go/workflows/v1" + "go.opentelemetry.io/otel" +) + +// specify a service name and version to identify the instrumenting application in traces and logs +var service = &observability.Service{Name: "task-runner", Version: "dev"} + +func main() { + ctx := context.Background() + + endpoint := "http://localhost:4318" + headers := map[string]string{ + "Authorization": "Bearer ", + } + + // Setup an OpenTelemetry trace span processor, exporting traces and spans to an OTEL compatible trace endpoint + tileboxTracerProvider, shutdown, err := tracer.NewOtelProvider(ctx, service, + tracer.WithEndpointURL(endpoint), + tracer.WithHeaders(headers), + ) + defer shutdown(ctx) + if err != nil { + slog.Error("failed to set up otel span processor", slog.Any("error", err)) + return + } + otel.SetTracerProvider(tileboxTracerProvider) // set the tilebox tracer provider as the global OTEL tracer provider + + client := workflows.NewClient() + + cluster, err := client.Clusters.Get(ctx, "dev-cluster") + if err != nil { + slog.Error("failed to get cluster", slog.Any("error", err)) + return + } + + taskRunner, err := client.NewTaskRunner(cluster) + if err != nil { + slog.Error("failed to create task runner", slog.Any("error", err)) + return + } + + err = taskRunner.RegisterTasks(&MyTask{}) + if err != nil { + slog.Error("failed to register tasks", slog.Any("error", err)) + return + } + + taskRunner.RunForever(ctx) +} +``` + Set the environment variable `OTEL_TRACES_ENDPOINT` to omit that argument