Skip to content

Commit 1ed6cfd

Browse files
feat: new Dataset permissions (#869)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com> Co-authored-by: Boris <boris@topoteretes.com>
1 parent ebebbb8 commit 1ed6cfd

File tree

76 files changed

+5381
-4213
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+5381
-4213
lines changed

.env.template

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,11 @@ LITELLM_LOG="ERROR"
6969
# Set this environment variable to disable sending telemetry data
7070
# TELEMETRY_DISABLED=1
7171

72+
# Set this variable to True to enforce usage of backend access control for Cognee
73+
# Note: This is only currently supported by the following databases:
74+
# Relational: SQLite, Postgres
75+
# Vector: LanceDB
76+
# Graph: KuzuDB
77+
#
78+
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
79+
ENABLE_BACKEND_ACCESS_CONTROL=False

.github/workflows/e2e_tests.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,3 +215,34 @@ jobs:
215215
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
216216
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
217217
run: poetry run python ./cognee/tests/test_s3.py
218+
219+
test-parallel-databases:
220+
name: Test using different async databases in parallel in Cognee
221+
runs-on: ubuntu-22.04
222+
steps:
223+
- name: Check out repository
224+
uses: actions/checkout@v4
225+
226+
- name: Cognee Setup
227+
uses: ./.github/actions/cognee_setup
228+
with:
229+
python-version: '3.11.x'
230+
231+
- name: Install specific graph db dependency
232+
run: |
233+
poetry install -E kuzu
234+
235+
- name: Run parallel databases test
236+
env:
237+
ENV: 'dev'
238+
LLM_MODEL: ${{ secrets.LLM_MODEL }}
239+
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
240+
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
241+
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
242+
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
243+
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
244+
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
245+
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
246+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
247+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
248+
run: poetry run python ./cognee/tests/test_parallel_databases.py

.github/workflows/test_suites.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747

4848
relational-db-migration-tests:
4949
name: Relational DB Migration Tests
50-
needs: [ basic-tests, e2e-tests ]
50+
needs: [ basic-tests, e2e-tests, graph-db-tests]
5151
uses: ./.github/workflows/relational_db_migration_tests.yml
5252
secrets: inherit
5353

@@ -79,7 +79,7 @@ jobs:
7979

8080
db-examples-tests:
8181
name: DB Examples Tests
82-
needs: [vector-db-tests]
82+
needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests]
8383
uses: ./.github/workflows/db_examples_tests.yml
8484
secrets: inherit
8585

.github/workflows/vector_db_tests.yml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,16 @@ jobs:
135135
run:
136136
shell: bash
137137

138+
services:
139+
qdrant:
140+
image: qdrant/qdrant:v1.14.1
141+
env:
142+
QDRANT__LOG_LEVEL: ERROR
143+
QDRANT__SERVICE__API_KEY: qdrant_api_key
144+
QDRANT__SERVICE__ENABLE_TLS: 0
145+
ports:
146+
- 6333:6333
147+
138148
steps:
139149
- name: Check out
140150
uses: actions/checkout@master
@@ -148,6 +158,19 @@ jobs:
148158
run: |
149159
poetry install -E qdrant
150160
161+
- name: Wait for Qdrant to be healthy
162+
run: |
163+
for i in {1..10}; do
164+
if curl -f http://127.0.0.1:6333/healthz; then
165+
echo "Qdrant is healthy!"
166+
exit 0
167+
fi
168+
echo "Waiting for Qdrant to be healthy..."
169+
sleep 3
170+
done
171+
echo "Qdrant failed to become healthy in time"
172+
exit 1
173+
151174
- name: Run default Qdrant
152175
env:
153176
ENV: 'dev'
@@ -159,8 +182,8 @@ jobs:
159182
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
160183
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
161184
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
162-
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
163-
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
185+
VECTOR_DB_URL: 127.0.0.1
186+
VECTOR_DB_KEY: qdrant_api_key
164187
run: poetry run python ./cognee/tests/test_qdrant.py
165188

166189
run-postgres-tests:

cognee/api/client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""FastAPI server for the Cognee API."""
22

33
import os
4+
45
import uvicorn
56
from cognee.shared.logging_utils import get_logger
67
import sentry_sdk
@@ -63,6 +64,7 @@ async def lifespan(app: FastAPI):
6364

6465
app = FastAPI(debug=app_environment != "prod", lifespan=lifespan)
6566

67+
6668
app.add_middleware(
6769
CORSMiddleware,
6870
allow_origins=["*"],

cognee/api/v1/add/add.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from uuid import UUID
12
from typing import Union, BinaryIO, List, Optional
23

34
from cognee.modules.pipelines import Task
@@ -11,9 +12,21 @@ async def add(
1112
dataset_name: str = "main_dataset",
1213
user: User = None,
1314
node_set: Optional[List[str]] = None,
15+
vector_db_config: dict = None,
16+
graph_db_config: dict = None,
17+
dataset_id: UUID = None,
1418
):
15-
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)]
19+
tasks = [
20+
Task(resolve_data_directories),
21+
Task(ingest_data, dataset_name, user, node_set, dataset_id),
22+
]
1623

1724
await cognee_pipeline(
18-
tasks=tasks, datasets=dataset_name, data=data, user=user, pipeline_name="add_pipeline"
25+
tasks=tasks,
26+
datasets=dataset_id if dataset_id else dataset_name,
27+
data=data,
28+
user=user,
29+
pipeline_name="add_pipeline",
30+
vector_db_config=vector_db_config,
31+
graph_db_config=graph_db_config,
1932
)

cognee/api/v1/add/routers/get_add_router.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from uuid import UUID
2+
23
from fastapi import Form, UploadFile, Depends
34
from fastapi.responses import JSONResponse
45
from fastapi import APIRouter
@@ -20,8 +21,8 @@ def get_add_router() -> APIRouter:
2021
@router.post("/", response_model=None)
2122
async def add(
2223
data: List[UploadFile],
24+
datasetName: str,
2325
datasetId: Optional[UUID] = Form(default=None),
24-
datasetName: Optional[str] = Form(default=None),
2526
user: User = Depends(get_authenticated_user),
2627
):
2728
"""This endpoint is responsible for adding data to the graph."""
@@ -30,19 +31,13 @@ async def add(
3031
if not datasetId and not datasetName:
3132
raise ValueError("Either datasetId or datasetName must be provided.")
3233

33-
if datasetId and not datasetName:
34-
dataset = await get_dataset(user_id=user.id, dataset_id=datasetId)
35-
try:
36-
datasetName = dataset.name
37-
except IndexError:
38-
raise ValueError("No dataset found with the provided datasetName.")
39-
4034
try:
4135
if isinstance(data, str) and data.startswith("http"):
4236
if "github" in data:
4337
# Perform git clone if the URL is from GitHub
4438
repo_name = data.split("/")[-1].replace(".git", "")
4539
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
40+
# TODO: Update add call with dataset info
4641
await cognee_add(
4742
"data://.data/",
4843
f"{repo_name}",
@@ -53,10 +48,10 @@ async def add(
5348
response.raise_for_status()
5449

5550
file_data = await response.content()
56-
51+
# TODO: Update add call with dataset info
5752
return await cognee_add(file_data)
5853
else:
59-
await cognee_add(data, datasetName, user=user)
54+
await cognee_add(data, dataset_name=datasetName, user=user, dataset_id=datasetId)
6055
except Exception as error:
6156
return JSONResponse(status_code=409, content={"error": str(error)})
6257

cognee/api/v1/cognify/cognify.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from cognee.modules.users.models import User
1010
from cognee.shared.data_models import KnowledgeGraph
1111
from cognee.tasks.documents import (
12-
check_permissions_on_documents,
12+
check_permissions_on_dataset,
1313
classify_documents,
1414
extract_chunks_from_documents,
1515
)
@@ -31,11 +31,18 @@ async def cognify(
3131
chunker=TextChunker,
3232
chunk_size: int = None,
3333
ontology_file_path: Optional[str] = None,
34+
vector_db_config: dict = None,
35+
graph_db_config: dict = None,
3436
):
3537
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
3638

3739
return await cognee_pipeline(
38-
tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
40+
tasks=tasks,
41+
datasets=datasets,
42+
user=user,
43+
pipeline_name="cognify_pipeline",
44+
vector_db_config=vector_db_config,
45+
graph_db_config=graph_db_config,
3946
)
4047

4148

@@ -48,7 +55,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
4855
) -> list[Task]:
4956
default_tasks = [
5057
Task(classify_documents),
51-
Task(check_permissions_on_documents, user=user, permissions=["write"]),
58+
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
5259
Task(
5360
extract_chunks_from_documents,
5461
max_chunk_size=chunk_size or get_max_chunk_tokens(),

cognee/api/v1/cognify/routers/get_cognify_router.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from uuid import UUID
12
from typing import List, Optional
23
from pydantic import BaseModel
34
from fastapi import Depends
@@ -10,6 +11,7 @@
1011

1112
class CognifyPayloadDTO(BaseModel):
1213
datasets: List[str]
14+
dataset_ids: Optional[List[UUID]]
1315
graph_model: Optional[BaseModel] = KnowledgeGraph
1416

1517

@@ -22,7 +24,9 @@ async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authentic
2224
from cognee.api.v1.cognify import cognify as cognee_cognify
2325

2426
try:
25-
await cognee_cognify(payload.datasets, user, payload.graph_model)
27+
# Send dataset UUIDs if they are given, if not send dataset names
28+
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
29+
await cognee_cognify(datasets, user, payload.graph_model)
2630
except Exception as error:
2731
return JSONResponse(status_code=409, content={"error": str(error)})
2832

cognee/api/v1/permissions/routers/get_permissions_router.py

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,69 @@
11
from uuid import UUID
2+
from typing import List
23

3-
from fastapi import APIRouter
4+
from fastapi import APIRouter, Depends
45
from fastapi.responses import JSONResponse
56

7+
from cognee.modules.users.models import User
8+
from cognee.modules.users.methods import get_authenticated_user
9+
610

711
def get_permissions_router() -> APIRouter:
812
permissions_router = APIRouter()
913

10-
@permissions_router.post("/roles/{role_id}/permissions")
11-
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
12-
from cognee.modules.users.permissions.methods import (
13-
give_default_permission_to_role as set_default_permission_to_role,
14-
)
15-
16-
await set_default_permission_to_role(role_id, permission_name)
17-
18-
return JSONResponse(status_code=200, content={"message": "Permission assigned to role"})
14+
@permissions_router.post("/datasets/{principal_id}/")
15+
async def give_datasets_permission_to_principal(
16+
permission_name: str,
17+
dataset_ids: List[UUID],
18+
principal_id: UUID,
19+
user: User = Depends(get_authenticated_user),
20+
):
21+
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
1922

20-
@permissions_router.post("/tenants/{tenant_id}/permissions")
21-
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
22-
from cognee.modules.users.permissions.methods import (
23-
give_default_permission_to_tenant as set_tenant_default_permissions,
23+
await authorized_give_permission_on_datasets(
24+
principal_id,
25+
[dataset_id for dataset_id in dataset_ids],
26+
permission_name,
27+
user.id,
2428
)
2529

26-
await set_tenant_default_permissions(tenant_id, permission_name)
27-
28-
return JSONResponse(status_code=200, content={"message": "Permission assigned to tenant"})
29-
30-
@permissions_router.post("/users/{user_id}/permissions")
31-
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
32-
from cognee.modules.users.permissions.methods import (
33-
give_default_permission_to_user as set_default_permission_to_user,
30+
return JSONResponse(
31+
status_code=200, content={"message": "Permission assigned to principal"}
3432
)
3533

36-
await set_default_permission_to_user(user_id, permission_name)
37-
38-
return JSONResponse(status_code=200, content={"message": "Permission assigned to user"})
39-
4034
@permissions_router.post("/roles")
41-
async def create_role(
42-
role_name: str,
43-
tenant_id: UUID,
44-
):
35+
async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
4536
from cognee.modules.users.roles.methods import create_role as create_role_method
4637

47-
await create_role_method(role_name=role_name, tenant_id=tenant_id)
38+
await create_role_method(role_name=role_name, owner_id=user.id)
4839

4940
return JSONResponse(status_code=200, content={"message": "Role created for tenant"})
5041

5142
@permissions_router.post("/users/{user_id}/roles")
52-
async def add_user_to_role(user_id: UUID, role_id: UUID):
43+
async def add_user_to_role(
44+
user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
45+
):
5346
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
5447

55-
await add_user_to_role_method(user_id=user_id, role_id=role_id)
48+
await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)
5649

5750
return JSONResponse(status_code=200, content={"message": "User added to role"})
5851

52+
@permissions_router.post("/users/{user_id}/tenants")
53+
async def add_user_to_tenant(
54+
user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
55+
):
56+
from cognee.modules.users.tenants.methods import add_user_to_tenant
57+
58+
await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
59+
60+
return JSONResponse(status_code=200, content={"message": "User added to tenant"})
61+
5962
@permissions_router.post("/tenants")
60-
async def create_tenant(tenant_name: str):
63+
async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
6164
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
6265

63-
await create_tenant_method(tenant_name=tenant_name)
66+
await create_tenant_method(tenant_name=tenant_name, user_id=user.id)
6467

6568
return JSONResponse(status_code=200, content={"message": "Tenant created."})
6669

0 commit comments

Comments
 (0)