Files
infrastructure/data-storage/data-catalog/metadata-schema.json
2026-02-09 21:51:46 -08:00

93 lines
2.2 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "Data Catalog Metadata Schema",
"description": "Schema for data catalog metadata",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier for the dataset"
},
"name": {
"type": "string",
"description": "Human-readable name of the dataset"
},
"description": {
"type": "string",
"description": "Description of the dataset"
},
"source": {
"type": "string",
"description": "Source system or project"
},
"storage": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["s3", "minio", "azure-blob", "gcs"],
"description": "Storage type"
},
"bucket": {
"type": "string",
"description": "Bucket or container name"
},
"path": {
"type": "string",
"description": "Path within bucket"
}
},
"required": ["type", "bucket"]
},
"schema": {
"type": "object",
"description": "Data schema definition"
},
"format": {
"type": "string",
"enum": ["parquet", "json", "csv", "avro"],
"description": "Data format"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Tags for categorization"
},
"owner": {
"type": "string",
"description": "Owner or team responsible"
},
"created": {
"type": "string",
"format": "date-time",
"description": "Creation timestamp"
},
"updated": {
"type": "string",
"format": "date-time",
"description": "Last update timestamp"
},
"access": {
"type": "object",
"properties": {
"level": {
"type": "string",
"enum": ["public", "internal", "restricted"],
"description": "Access level"
},
"permissions": {
"type": "array",
"items": {
"type": "string",
"enum": ["read", "write", "delete"]
}
}
}
}
},
"required": ["id", "name", "source", "storage"]
}