ApiServerConfig

FieldTypeDefaultDescription
authAuthConfig
{
  "allowAnonymous": true,
  "didEncryption": {
    "enabled": false
  },
  "jwtSecret": "",
  "passwordPolicy": {
    "minNewPasswordLength": 8
  },
  "providers": []
}
Authentication & authorization
databaseDatabaseConfig
{
  "provider": "InMemory"
}
Database
datasetEnvVarsDatasetEnvVarsConfig
{
  "enabled": false
}
Dataset environment variable feature
emailEmailConfig
{
  "gateway": {
    "kind": "Dummy"
  },
  "senderAddress": ""
}
Email gateway configuration
engineEngineConfig
{
  "datafusionEmbedded": {
    "base": {
      "datafusion.catalog.default_catalog": "kamu",
      "datafusion.catalog.default_schema": "kamu",
      "datafusion.catalog.information_schema": "true",
      "datafusion.sql_parser.enable_ident_normalization": "false"
    },
    "batchQuery": {},
    "compaction": {
      "datafusion.execution.target_partitions": "1"
    },
    "ingest": {
      "datafusion.execution.target_partitions": "1"
    },
    "useLegacyArrowBufferEncoding": false
  },
  "images": {
    "datafusion": "ghcr.io/kamu-data/engine-datafusion:0.9.0",
    "flink": "ghcr.io/kamu-data/engine-flink:0.18.2-flink_1.16.0-scala_2.12-java8",
    "risingwave": "ghcr.io/kamu-data/engine-risingwave:0.2.0-risingwave_1.7.0-alpha",
    "spark": "ghcr.io/kamu-data/engine-spark:0.23.1-spark_3.5.0"
  },
  "networkNs": "Private",
  "runtime": "Podman",
  "shutdownTimeout": "5s",
  "startTimeout": "30s"
}
Ingest and transform engines
extraExtraConfig
{
  "graphql": {}
}
Experimental and temporary module configuration
flowSystemFlowSystemConfig
{
  "flowAgent": {
    "awaitingStepSecs": 1,
    "defaultRetryPolicies": {},
    "mandatoryThrottlingPeriodSecs": 60
  },
  "flowSystemEventAgent": {
    "batchSize": 100,
    "maxListeningTimeoutMs": 60000,
    "minDebounceIntervalMs": 100
  },
  "taskAgent": {
    "taskCheckingIntervalSecs": 1
  }
}
Configuration for the flow system
identityIdentityConfignullUNSTABLE: Identity configuration
outboxOutboxConfig
{
  "awaitingStepSecs": 1,
  "batchSize": 20
}
Outbox configuration
protocolProtocolConfig
{
  "flightSql": {
    "allowAnonymous": true,
    "anonSessionExpirationTimeout": "5m",
    "anonSessionInactivityTimeout": "5s",
    "authedSessionExpirationTimeout": "30m",
    "authedSessionInactivityTimeout": "5s"
  },
  "ipfs": {
    "httpGateway": "http://localhost:8080/",
    "preResolveDnslink": true
  }
}
Protocols
quotaQuotaConfig
{
  "account": {}
}
Default quotas configured by type
repoRepoConfig
{
  "caching": {
    "registryCacheEnabled": false
  }
}
Dataset repository
runtimeRuntimeConfig{}Tokio runtime
searchSearchConfig
{
  "embeddingsChunker": {
    "kind": "Simple",
    "splitParagraphs": false,
    "splitSections": false
  },
  "embeddingsEncoder": {
    "dimensions": 1536,
    "kind": "OpenAi",
    "modelName": "text-embedding-ada-002"
  },
  "indexer": {
    "clearOnStart": false,
    "incrementalIndexing": false
  },
  "repo": {
    "kind": "Dummy"
  },
  "semanticSearchThresholdScore": 0.0
}
Search configuration
sourceSourceConfig
{
  "ethereum": {
    "commitAfterBlocksScanned": 1000000,
    "getLogsBlockStride": 100000,
    "rpcEndpoints": [],
    "useBlockTimestampFallback": false
  },
  "mqtt": {
    "brokerIdleTimeoutMs": 1000
  },
  "targetRecordsPerSlice": 10000
}
Ingestion's sources
uploadRepoUploadRepoConfig
{
  "maxFileSizeMb": 50,
  "storage": {
    "kind": "Local"
  }
}
File upload repository
urlUrlConfig
{
  "baseUrlFlightsql": "grpc://localhost:50050",
  "baseUrlPlatform": "http://localhost:4200/",
  "baseUrlRest": "http://localhost:8080/"
}
External URLs
webhooksWebhooksConfig
{
  "deliveryTimeoutSecs": 10,
  "maxConsecutiveFailures": 5,
  "secretEncryptionEnabled": false
}
Configuration for webhooks

AccountConfig

FieldTypeDefaultDescription
accountNameAccountName
accountTypeAccountType"User"
avatarUrlstringnull
displayNamestringnull

Auto-derived from account_name if omitted

emailEmail
idAccountIDnull

Auto-derived from account_name if omitted

passwordPassword
propertiesarray[]
providerstring"password"
registeredAtstringnull
treatDatasetsAsPublicbooleanfalse

AccountID

Base type: string

AccountName

Base type: string

AccountPropertyName

Variants
CanProvisionAccounts
Admin

AccountType

Variants
User
Organization

AuthConfig

FieldTypeDefaultDescription
allowAnonymousbooleantrue
didEncryptionDidSecretEncryptionConfig
{
  "enabled": false
}
jwtSecretstring""
passwordPolicyPasswordPolicyConfig
{
  "minNewPasswordLength": 8
}
providersarray[]

AuthProviderConfig

Variants
Github
Password

AuthProviderConfig::Github

FieldTypeDefaultDescription
clientIdstring
clientSecretstring
kindstring

AuthProviderConfig::Password

FieldTypeDefaultDescription
accountsarray[]
kindstring

ContainerRuntimeType

Variants
Docker
Podman

DatabaseConfig

Variants
InMemory
Sqlite
Postgres

DatabaseConfig::InMemory

FieldTypeDefaultDescription
providerstring

DatabaseConfig::Sqlite

FieldTypeDefaultDescription
databasePathstring
providerstring

DatabaseConfig::Postgres

FieldTypeDefaultDescription
acquireTimeoutSecsintegernull
credentialsPolicyDatabaseCredentialsPolicyConfig
databaseNamestring
hoststring
maxConnectionsintegernull
maxLifetimeSecsintegernull
portintegernull
providerstring

DatabaseCredentialSourceConfig

Variants
RawPassword
AwsSecret
AwsIamToken

DatabaseCredentialSourceConfig::RawPassword

FieldTypeDefaultDescription
kindstring
rawPasswordstring
userNamestring

DatabaseCredentialSourceConfig::AwsSecret

FieldTypeDefaultDescription
kindstring
secretNamestring

DatabaseCredentialSourceConfig::AwsIamToken

FieldTypeDefaultDescription
kindstring
userNamestring

DatabaseCredentialsPolicyConfig

FieldTypeDefaultDescription
rotationFrequencyInMinutesintegernull
sourceDatabaseCredentialSourceConfig

DatasetEnvVarsConfig

FieldTypeDefaultDescription
enabledbooleanfalse
encryptionKeystringnull

Represents the encryption key for the dataset env vars. This field is required if enabled is true or None.

The encryption key must be a 32-character alphanumeric string, which includes both uppercase and lowercase Latin letters (A-Z, a-z) and digits (0-9).

To generate use:

tr -dc 'A-Za-z0-9' < /dev/urandom | head -c 32; echo

DidSecretEncryptionConfig

FieldTypeDefaultDescription
enabledbooleanfalse
encryptionKeystringnull

The encryption key must be a 32-character alphanumeric string, which includes both uppercase and lowercase Latin letters (A-Z, a-z) and digits (0-9).

To generate use:

tr -dc 'A-Za-z0-9' < /dev/urandom | head -c 32; echo

DurationString

Base type: string

Email

Base type: string

EmailConfig

FieldTypeDefaultDescription
gatewayEmailConfigGateway
senderAddressstring
senderNamestringnull

EmailConfigGateway

Variants
Dummy
Postmark

EmailConfigGateway::Dummy

FieldTypeDefaultDescription
kindstring

EmailConfigGateway::Postmark

FieldTypeDefaultDescription
apiKeystring
kindstring

EmbeddingsChunkerConfig

Variants
Simple

EmbeddingsChunkerConfig::Simple

FieldTypeDefaultDescription
kindstring
splitParagraphsbooleanfalse
splitSectionsbooleanfalse

EmbeddingsEncoderConfig

Variants
OpenAi
Dummy

EmbeddingsEncoderConfig::OpenAi

FieldTypeDefaultDescription
apiKeystringnull
dimensionsinteger1536
kindstring
modelNamestring"text-embedding-ada-002"
urlstringnull

EmbeddingsEncoderConfig::Dummy

FieldTypeDefaultDescription
kindstring

EngineConfig

FieldTypeDefaultDescription
datafusionEmbeddedEngineConfigDatafusion
{
  "base": {
    "datafusion.catalog.default_catalog": "kamu",
    "datafusion.catalog.default_schema": "kamu",
    "datafusion.catalog.information_schema": "true",
    "datafusion.sql_parser.enable_ident_normalization": "false"
  },
  "batchQuery": {},
  "compaction": {
    "datafusion.execution.target_partitions": "1"
  },
  "ingest": {
    "datafusion.execution.target_partitions": "1"
  },
  "useLegacyArrowBufferEncoding": false
}
Embedded Datafusion engine configuration
imagesEngineImagesConfig
{
  "datafusion": "ghcr.io/kamu-data/engine-datafusion:0.9.0",
  "flink": "ghcr.io/kamu-data/engine-flink:0.18.2-flink_1.16.0-scala_2.12-java8",
  "risingwave": "ghcr.io/kamu-data/engine-risingwave:0.2.0-risingwave_1.7.0-alpha",
  "spark": "ghcr.io/kamu-data/engine-spark:0.23.1-spark_3.5.0"
}
UNSTABLE: Default engine images
maxConcurrencyintegernullMaximum number of engine operations that can be performed concurrently
networkNsNetworkNamespaceType"Private"

Type of the networking namespace (relevant when running in container environments)

runtimeContainerRuntimeType"Podman"Type of the runtime to use when running the data processing engines
shutdownTimeoutDurationString"5s"Timeout for waiting the engine container to stop gracefully
startTimeoutDurationString"30s"Timeout for starting an engine container

EngineConfigDatafusion

FieldTypeDefaultDescription
baseobject
{
  "datafusion.catalog.default_catalog": "kamu",
  "datafusion.catalog.default_schema": "kamu",
  "datafusion.catalog.information_schema": "true",
  "datafusion.sql_parser.enable_ident_normalization": "false"
}

Base configuration options See: <https://datafusion.apache.org/user-guide/configs.html>

batchQueryobject{}Batch query-specific overrides to the base config
compactionobject
{
  "datafusion.execution.target_partitions": "1"
}
Compaction-specific overrides to the base config
ingestobject
{
  "datafusion.execution.target_partitions": "1"
}
Ingest-specific overrides to the base config
useLegacyArrowBufferEncodingbooleanfalse

Makes arrow batches use contiguous Binary and Utf8 encodings instead of more modern BinaryView and Utf8View. This is only needed for compatibility with some older libraries that don’t yet support them.

See: kamu-node#277

EngineImagesConfig

FieldTypeDefaultDescription
datafusionstring"ghcr.io/kamu-data/engine-datafusion:0.9.0"UNSTABLE: Datafusion engine image
flinkstring"ghcr.io/kamu-data/engine-flink:0.18.2-flink_1.16.0-scala_2.12-java8"UNSTABLE: Flink engine image
risingwavestring"ghcr.io/kamu-data/engine-risingwave:0.2.0-risingwave_1.7.0-alpha"UNSTABLE: RisingWave engine image
sparkstring"ghcr.io/kamu-data/engine-spark:0.23.1-spark_3.5.0"UNSTABLE: Spark engine image

EthRpcEndpoint

FieldTypeDefaultDescription
chainIdinteger
chainNamestring
nodeUrlstring

EthereumSourceConfig

FieldTypeDefaultDescription
commitAfterBlocksScannedinteger1000000

Forces iteration to stop after the specified number of blocks were scanned even if we didn’t reach the target record number. This is useful to not lose a lot of scanning progress in case of an RPC error.

getLogsBlockStrideinteger100000

Default number of blocks to scan within one query to eth_getLogs RPC endpoint.

rpcEndpointsarray[]Default RPC endpoints to use if source does not specify one explicitly.
useBlockTimestampFallbackbooleanfalse

Many providers don’t yet return blockTimestamp from eth_getLogs RPC endpoint and in such cases block_timestamp column will be null. If you enable this fallback the library will perform additional call to eth_getBlock to populate the timestam, but this may result in significant performance penalty when fetching many log records.

See: ethereum/execution-apis#295

ExtraConfig

FieldTypeDefaultDescription
graphqlGqlConfig{}

FlightSqlConfig

FieldTypeDefaultDescription
allowAnonymousbooleantrueWhether clients can authenticate as 'anonymous' user
anonSessionExpirationTimeoutDurationString"5m"

Time after which FlightSQL client session will be forgotten and client will have to re-authroize (for anonymous clients)

anonSessionInactivityTimeoutDurationString"5s"

Time after which FlightSQL session context will be released to free the resources (for anonymous clients)

authedSessionExpirationTimeoutDurationString"30m"

Time after which FlightSQL client session will be forgotten and client will have to re-authroize (for authenticated clients)

authedSessionInactivityTimeoutDurationString"5s"

Time after which FlightSQL session context will be released to free the resources (for authenticated clients)

FlowAgentConfig

FieldTypeDefaultDescription
awaitingStepSecsinteger1
defaultRetryPoliciesobject{}
mandatoryThrottlingPeriodSecsinteger60

FlowSystemConfig

FieldTypeDefaultDescription
flowAgentFlowAgentConfig
{
  "awaitingStepSecs": 1,
  "defaultRetryPolicies": {},
  "mandatoryThrottlingPeriodSecs": 60
}
flowSystemEventAgentFlowSystemEventAgentConfig
{
  "batchSize": 100,
  "maxListeningTimeoutMs": 60000,
  "minDebounceIntervalMs": 100
}
taskAgentTaskAgentConfig
{
  "taskCheckingIntervalSecs": 1
}

FlowSystemEventAgentConfig

FieldTypeDefaultDescription
batchSizeinteger100
maxListeningTimeoutMsinteger60000
minDebounceIntervalMsinteger100

GqlConfig

FieldTypeDefaultDescription

IdentityConfig

FieldTypeDefaultDescription
privateKeyPrivateKeynull

Private key used to sign API responses. Currently only ed25519 keys are supported.

To generate use:

dd if=/dev/urandom bs=1 count=32 status=none |
    base64 -w0 |
    tr '+/' '-_' |
    tr -d '=' |
    (echo -n u && cat)

The command above:

  • reads 32 random bytes
  • base64-encodes them
  • converts default base64 encoding to base64url and removes padding
  • prepends a multibase prefix

IpfsConfig

FieldTypeDefaultDescription
httpGatewaystring"http://localhost:8080/"

HTTP Gateway URL to use for downloads. For safety, it defaults to http://localhost:8080 - a local IPFS daemon. If you don’t have IPFS installed, you can set this URL to one of the public gateways like https://ipfs.io. List of public gateways can be found here: https://ipfs.github.io/public-gateway-checker/

preResolveDnslinkbooleantrue

Whether kamu should pre-resolve IPNS DNSLink names using DNS or leave it to the Gateway.

MqttSourceConfig

FieldTypeDefaultDescription
brokerIdleTimeoutMsinteger1000

Time in milliseconds to wait for MQTT broker to send us some data after which we will consider that we have “caught up” and end the polling loop.

NetworkNamespaceType

Corresponds to podman’s containers.conf::netns We podman is used inside containers (e.g. podman-in-docker or podman-in-k8s) it usually runs uses host network namespace.

Variants
Private
Host

OutboxConfig

FieldTypeDefaultDescription
awaitingStepSecsinteger1
batchSizeinteger20

Password

Base type: string

PasswordPolicyConfig

FieldTypeDefaultDescription
minNewPasswordLengthinteger8

PrivateKey

Base type: string

ProtocolConfig

FieldTypeDefaultDescription
flightSqlFlightSqlConfig
{
  "allowAnonymous": true,
  "anonSessionExpirationTimeout": "5m",
  "anonSessionInactivityTimeout": "5s",
  "authedSessionExpirationTimeout": "30m",
  "authedSessionInactivityTimeout": "5s"
}
FlightSQL configuration
ipfsIpfsConfig
{
  "httpGateway": "http://localhost:8080/",
  "preResolveDnslink": true
}
IPFS configuration

QuotaAccountConfig

FieldTypeDefaultDescription
defaultStorageLimitInBytesintegernull

QuotaConfig

FieldTypeDefaultDescription
accountQuotaAccountConfig{}

RepoCachingConfig

FieldTypeDefaultDescription
metadataLocalFsCachePathstringnull
registryCacheEnabledbooleanfalse

RepoConfig

FieldTypeDefaultDescription
cachingRepoCachingConfig
{
  "registryCacheEnabled": false
}
dataBlocksPageSizeintegernull
repoUrlUrlOrPathnull

RetryPolicyConfig

FieldTypeDefaultDescription
backoffTypeRetryPolicyConfigBackoffTypenull
maxAttemptsintegernull
minDelaySecsintegernull

RetryPolicyConfigBackoffType

Variants
Fixed
Linear
Exponential
ExponentialWithJitter

RuntimeConfig

FieldTypeDefaultDescription
maxBlockingThreadsintegernull
threadStackSizeintegernull
workerThreadsintegernull

SearchConfig

FieldTypeDefaultDescription
embeddingsChunkerEmbeddingsChunkerConfig
{
  "kind": "Simple",
  "splitParagraphs": false,
  "splitSections": false
}
Embeddings chunker configuration
embeddingsEncoderEmbeddingsEncoderConfig
{
  "dimensions": 1536,
  "kind": "OpenAi",
  "modelName": "text-embedding-ada-002"
}
Embeddings encoder configuration
indexerSearchIndexerConfig
{
  "clearOnStart": false,
  "incrementalIndexing": false
}
Indexer configuration
repoSearchRepositoryConfig
{
  "kind": "Dummy"
}
Search repository configuration
semanticSearchThresholdScorenumber0.0

SearchIndexerConfig

FieldTypeDefaultDescription
clearOnStartbooleanfalse
incrementalIndexingbooleanfalseWhether incremental indexing is enabled

SearchRepositoryConfig

Variants
Dummy
Elasticsearch

SearchRepositoryConfig::Dummy

FieldTypeDefaultDescription
kindstring

SearchRepositoryConfig::Elasticsearch

FieldTypeDefaultDescription
caCertPemPathstringnull
embeddingDimensionsinteger1536
enableCompressionbooleanfalse
indexPrefixstring"kamu-node"
kindstring
passwordstringnull
timeoutSecsinteger30
urlstring"http://localhost:9200"

SourceConfig

FieldTypeDefaultDescription
ethereumEthereumSourceConfig
{
  "commitAfterBlocksScanned": 1000000,
  "getLogsBlockStride": 100000,
  "rpcEndpoints": [],
  "useBlockTimestampFallback": false
}
Ethereum-specific configuration
mqttMqttSourceConfig
{
  "brokerIdleTimeoutMs": 1000
}
MQTT-specific configuration
targetRecordsPerSliceinteger10000

Target number of records after which we will stop consuming from the resumable source and commit data, leaving the rest for the next iteration. This ensures that one data slice doesn’t become too big.

TaskAgentConfig

FieldTypeDefaultDescription
taskCheckingIntervalSecsinteger1

UploadRepoConfig

FieldTypeDefaultDescription
maxFileSizeMbinteger50
storageUploadRepoStorageConfig
{
  "kind": "Local"
}

UploadRepoStorageConfig

Variants
S3
Local

UploadRepoStorageConfig::S3

FieldTypeDefaultDescription
bucketS3Urlstring
kindstring

UploadRepoStorageConfig::Local

FieldTypeDefaultDescription
kindstring

UrlConfig

FieldTypeDefaultDescription
baseUrlFlightsqlUrlOrPath"grpc://localhost:50050"
baseUrlPlatformUrlOrPath"http://localhost:4200/"
baseUrlRestUrlOrPath"http://localhost:8080/"

UrlOrPath

Base type: string

WebhooksConfig

FieldTypeDefaultDescription
deliveryTimeoutSecsinteger10
maxConsecutiveFailuresinteger5
secretEncryptionEnabledbooleanfalse
secretEncryptionKeystringnull

Represents the encryption key for the webhooks secret. This field is required if secret_encryption_enabled is true or None.

The encryption key must be a 32-character alphanumeric string, which includes both uppercase and lowercase Latin letters (A-Z, a-z) and digits (0-9).

Example

let config = WebhooksConfig { … secret_encryption_enabled: Some(true), encryption_key: Some(String::from(“aBcDeFgHiJkLmNoPqRsTuVwXyZ012345”)) }; ```