93 lines
2.2 KiB
JSON
93 lines
2.2 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"type": "object",
|
|
"title": "Data Catalog Metadata Schema",
|
|
"description": "Schema for data catalog metadata",
|
|
"properties": {
|
|
"id": {
|
|
"type": "string",
|
|
"description": "Unique identifier for the dataset"
|
|
},
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Human-readable name of the dataset"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"description": "Description of the dataset"
|
|
},
|
|
"source": {
|
|
"type": "string",
|
|
"description": "Source system or project"
|
|
},
|
|
"storage": {
|
|
"type": "object",
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"enum": ["s3", "minio", "azure-blob", "gcs"],
|
|
"description": "Storage type"
|
|
},
|
|
"bucket": {
|
|
"type": "string",
|
|
"description": "Bucket or container name"
|
|
},
|
|
"path": {
|
|
"type": "string",
|
|
"description": "Path within bucket"
|
|
}
|
|
},
|
|
"required": ["type", "bucket"]
|
|
},
|
|
"schema": {
|
|
"type": "object",
|
|
"description": "Data schema definition"
|
|
},
|
|
"format": {
|
|
"type": "string",
|
|
"enum": ["parquet", "json", "csv", "avro"],
|
|
"description": "Data format"
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Tags for categorization"
|
|
},
|
|
"owner": {
|
|
"type": "string",
|
|
"description": "Owner or team responsible"
|
|
},
|
|
"created": {
|
|
"type": "string",
|
|
"format": "date-time",
|
|
"description": "Creation timestamp"
|
|
},
|
|
"updated": {
|
|
"type": "string",
|
|
"format": "date-time",
|
|
"description": "Last update timestamp"
|
|
},
|
|
"access": {
|
|
"type": "object",
|
|
"properties": {
|
|
"level": {
|
|
"type": "string",
|
|
"enum": ["public", "internal", "restricted"],
|
|
"description": "Access level"
|
|
},
|
|
"permissions": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["read", "write", "delete"]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"required": ["id", "name", "source", "storage"]
|
|
}
|
|
|