diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 23bc8c0..0edd879 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -4,7 +4,17 @@ sidebarTitle: "Quickstart" description: "Get started with LanceDB in minutes." icon: rocket --- -import { PyConnect, PyConnectCloud, RsConnect, RsConnectCloud, TsConnect, TsConnectCloud } from '/snippets/connection.mdx'; +import { + PyConnect, + PyConnectCloud, + PyConnectObjectStorage, + RsConnect, + RsConnectCloud, + RsConnectObjectStorage, + TsConnect, + TsConnectCloud, + TsConnectObjectStorage, +} from '/snippets/connection.mdx'; import { PyQuickstartCreateTable, PyQuickstartVectorSearch1, @@ -39,8 +49,16 @@ cargo add lancedb ## 2. Connect to a LanceDB database -Using LanceDB's open source version is as simple as importing LanceDB as a library -and pointing to a local path -- no servers needed! +LanceDB supports several URI patterns to connect to a database. + +- A local filesystem path (when using it as an embedded library) +- A `db://...` URI (when using LanceDB Cloud or Enterprise) +- An object storage URI: `s3://...`, `gs://...`, or `az://...` (OSS mode) + +### Connect via local path with LanceDB + +The simplest way to begin is to use LanceDB OSS. Simply import LanceDB as an embedded library in your +client SDK of choice and point to a local path. @@ -52,17 +70,38 @@ and pointing to a local path -- no servers needed! - { "// Rust imports go here\n" } + { "use lancedb::connect;\n" } + { "\n" } {RsConnect} -### Optional: LanceDB Cloud or Enterprise versions +### Connect via object storage URIs + +You can also connect LanceDB OSS directly to object storage: + + + + {PyConnectObjectStorage} + + + + {TsConnectObjectStorage} + + + + {RsConnectObjectStorage} + + + +For credentials, endpoints, and provider-specific options, see +[Configuring storage](/storage/configuration). + +### Connect to LanceDB Enterprise -If you're looking for a fully-managed solution, -you can use LanceDB Cloud, which provides managed infrastructure, -security, and automatic backups. Simply replace the local path with a remote `uri` +If you're using a managed LanceDB service on either LanceDB Cloud or Enterprise, you can connect using a `db://` URI, +along with any encessary credentials. Simply replace the local path with a remote `uri` that points to where your data is stored, and you're ready to go. @@ -79,8 +118,7 @@ that points to where your data is stored, and you're ready to go. -For enormous scale and more advanced use cases beyond just search -- like -feature engineering, model training and more, check out [LanceDB Enterprise](/enterprise). +To learn more about LanceDB Enterprise, see the [Enterprise documentation](/enterprise). ## 3. Obtain data and ingest into LanceDB diff --git a/docs/snippets/connection.mdx b/docs/snippets/connection.mdx index 739d240..5977a33 100644 --- a/docs/snippets/connection.mdx +++ b/docs/snippets/connection.mdx @@ -4,11 +4,17 @@ export const PyConnect = "import lancedb\n\nuri = \"ex_lancedb\"\ndb = lancedb.c export const PyConnectCloud = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\n"; +export const PyConnectObjectStorage = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\ndb = lancedb.connect(uri)\n"; + export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasync function connectExample(uri: string) {\n const db = await lancedb.connect(uri);\n return db;\n}\n"; export const TsConnectCloud = "const uri = \"db://your-database-uri\";\nconst apiKey = \"your-api-key\";\nconst region = \"us-east-1\";\n"; +export const TsConnectObjectStorage = "async function connectObjectStorageExample() {\n const uri = \"s3://your-bucket/path\";\n // You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\n const db = await lancedb.connect(uri);\n return db;\n}\n"; + export const RsConnect = "async fn connect_example(uri: &str) {\n let db = connect(uri).execute().await.unwrap();\n let _ = db;\n}\n"; export const RsConnectCloud = "let uri = \"db://your-database-uri\";\nlet api_key = \"your-api-key\";\nlet region = \"us-east-1\";\n"; +export const RsConnectObjectStorage = "let uri = \"s3://your-bucket/path\";\n// You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\n"; + diff --git a/docs/tables/index.mdx b/docs/tables/index.mdx index 22d51c6..b982c1a 100644 --- a/docs/tables/index.mdx +++ b/docs/tables/index.mdx @@ -6,7 +6,7 @@ icon: "table" keywords: ["create table", "polars", "pandas", "pyarrow", "dataframe", "nested data"] --- -import { PyConnect, PyConnectCloud, TsConnect, TsConnectCloud, RsConnect, RsConnectCloud } from '/snippets/connection.mdx'; +import { PyConnect, TsConnect, RsConnect } from '/snippets/connection.mdx'; import { PyBasicImports, PyDataLoad, @@ -162,31 +162,20 @@ We start by connecting to a LanceDB database path. -If you're using LanceDB Cloud or Enterprise, replace the local connection string +If you're using LanceDB Enterprise, replace the local connection string with the appropriate remote URI and authentication details. - + +**Working with remote tables** + When you connect to a remote URI (Cloud/Enterprise), `open_table(...)` returns a *remote* table. Remote tables support core operations (ingest, search, update, delete), but some convenience methods for bulk data export are not available. In the Python SDK, `table.to_arrow()` and `table.to_pandas()` are not implemented for remote tables. To retrieve data, use search queries instead: `table.search(query).limit(n).to_arrow()`. - - - - - {PyConnectCloud} - - - - {TsConnectCloud} - + - - {RsConnectCloud} - - ## Create a table and ingest data diff --git a/tests/py/test_connection.py b/tests/py/test_connection.py index 2a9b504..32195df 100644 --- a/tests/py/test_connection.py +++ b/tests/py/test_connection.py @@ -20,4 +20,16 @@ def test_connection(): uri = "db://your-database-uri" api_key = "your-api-key" region = "us-east-1" -# --8<-- [end:connect_cloud] \ No newline at end of file +# --8<-- [end:connect_cloud] + + +def connect_object_storage_config(): + # --8<-- [start:connect_object_storage] + import lancedb + + uri = "s3://your-bucket/path" + # You can also use "gs://your-bucket/path" or "az://your-container/path". + db = lancedb.connect(uri) + # --8<-- [end:connect_object_storage] + + return db diff --git a/tests/rs/connection.rs b/tests/rs/connection.rs index 32272e6..a358bcd 100644 --- a/tests/rs/connection.rs +++ b/tests/rs/connection.rs @@ -20,6 +20,7 @@ async fn main() { // Keep the cloud snippet in this file, but don't run it in CI. let _ = connect_cloud_config(); + let _ = connect_object_storage_config(); } fn connect_cloud_config() -> (String, String, String) { @@ -32,6 +33,15 @@ fn connect_cloud_config() -> (String, String, String) { (uri.to_string(), api_key.to_string(), region.to_string()) } +fn connect_object_storage_config() -> &'static str { + // --8<-- [start:connect_object_storage] + let uri = "s3://your-bucket/path"; + // You can also use "gs://your-bucket/path" or "az://your-container/path". + // --8<-- [end:connect_object_storage] + + uri +} + #[allow(dead_code)] fn repo_root() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("..") diff --git a/tests/ts/connection.test.ts b/tests/ts/connection.test.ts index 795402b..8055a94 100644 --- a/tests/ts/connection.test.ts +++ b/tests/ts/connection.test.ts @@ -26,4 +26,13 @@ const apiKey = "your-api-key"; const region = "us-east-1"; // --8<-- [end:connect_cloud] -void [uri, apiKey, region]; +// --8<-- [start:connect_object_storage] +async function connectObjectStorageExample() { + const uri = "s3://your-bucket/path"; + // You can also use "gs://your-bucket/path" or "az://your-container/path". + const db = await lancedb.connect(uri); + return db; +} +// --8<-- [end:connect_object_storage] + +void [uri, apiKey, region, connectObjectStorageExample];