Documentation Index
Fetch the complete documentation index at: https://databridge-add-core-funcs.mintlify.app/llms.txt
Use this file to discover all available pages before exploring further.
Usage
from morphik import Morphik
db = Morphik()
# Batch ingest files with shared metadata
result = db.ingest_files(
files=["document1.pdf", "document2.docx", "image.png"],
metadata={"category": "reports"},
use_colpali=True,
parallel=True
)
# Process the results
for doc in result["documents"]:
print(f"Successfully ingested: {doc.filename} (ID: {doc.external_id})")
# Check for errors
for error in result["errors"]:
print(f"Error ingesting {error.get('filename')}: {error.get('error')}")
from morphik import AsyncMorphik
async with AsyncMorphik() as db:
# Batch ingest files with shared metadata
result = await db.ingest_files(
files=["document1.pdf", "document2.docx", "image.png"],
metadata={"category": "reports"},
use_colpali=True,
parallel=True
)
# Process the results
for doc in result["documents"]:
print(f"Successfully ingested: {doc.filename} (ID: {doc.external_id})")
# Check for errors
for error in result["errors"]:
print(f"Error ingesting {error.get('filename')}: {error.get('error')}")
Parameters
files (List[Union[str, bytes, BinaryIO, Path]]): List of files to ingest (path strings, bytes, file objects, or Path objects)
metadata (Dict[str, Any] | List[Dict[str, Any]], optional): Metadata to apply to the files. Can be either:
- A single dict to apply to all files
- A list of dicts, one per file (must match the length of
files)
use_colpali (bool, optional): Whether to use ColPali-style embedding model. Defaults to True.
parallel (bool, optional): Whether to process files in parallel. Defaults to True.
When specifying metadata, you can include Python datetime, date, Decimal, or numeric types. The SDK normalizes them, sends the accompanying metadata_types, and unlocks the advanced queries outlined in Metadata Filtering.
Returns
An object containing:
documents: List of successfully ingested Document objects
errors: List of errors encountered during ingestion (each error is a dict with ‘filename’ and ‘error’ keys)
Advanced Examples
# Ingest files with different metadata for each file
files = ["report.pdf", "data.csv", "presentation.pptx"]
# Metadata must match the length of files list
metadata_list = [
{"category": "reports", "author": "Alice"},
{"category": "data", "source": "database"},
{"category": "presentations", "department": "marketing"}
]
result = db.ingest_files(
files=files,
metadata=metadata_list
)
# Ingest files with different metadata for each file
files = ["report.pdf", "data.csv", "presentation.pptx"]
# Metadata must match the length of files list
metadata_list = [
{"category": "reports", "author": "Alice"},
{"category": "data", "source": "database"},
{"category": "presentations", "department": "marketing"}
]
result = await db.ingest_files(
files=files,
metadata=metadata_list
)
import io
from pathlib import Path
# Mixing different file input types
file1 = "document.pdf" # Path string
file2 = Path("image.png") # Path object
file3 = open("data.csv", "rb") # File object
file4 = b"Hello, world!" # Bytes (requires filename)
file5 = io.BytesIO(b"Some in-memory data") # BytesIO (requires filename)
result = db.ingest_files(
files=[file1, file2, file3, file4, file5],
metadata=[
{"type": "document"},
{"type": "image"},
{"type": "data"},
{"type": "text", "filename": "hello.txt"},
{"type": "text", "filename": "memory-data.txt"}
]
)
# Don't forget to close file objects
file3.close()
import io
from pathlib import Path
# Mixing different file input types
file1 = "document.pdf" # Path string
file2 = Path("image.png") # Path object
file3 = open("data.csv", "rb") # File object
file4 = b"Hello, world!" # Bytes (requires filename)
file5 = io.BytesIO(b"Some in-memory data") # BytesIO (requires filename)
result = await db.ingest_files(
files=[file1, file2, file3, file4, file5],
metadata=[
{"type": "document"},
{"type": "image"},
{"type": "data"},
{"type": "text", "filename": "hello.txt"},
{"type": "text", "filename": "memory-data.txt"}
]
)
# Don't forget to close file objects
file3.close()