Skip to content
Snippets Groups Projects
Commit 45444882 authored by Andréas Livet's avatar Andréas Livet
Browse files

Merge branch 'upload-tus' into 'develop'

Add MAX_UPLOAD_PDF_PAGES limit

See merge request ademe-group/sofia/sofia_collections!62
parents 0393ed7b 6f12642c
Branches
Tags
No related merge requests found
Pipeline #558345 failed
......@@ -72,3 +72,4 @@ QDRANT_PORT=6333
# Upload
MAX_UPLOAD_SIZE_IN_MB=1024
MAX_UPLOAD_PDF_PAGES=1500
......@@ -12,6 +12,7 @@ from starlette.datastructures import Headers
from app import crud
from app.api.deps import CurrentUser, SessionDep
from app.core.config import settings
from app.core.qdrant import client as qdrant_client
from app.core.qdrant import delete_vectors
from app.core.telemetry import telemetry_client
......@@ -178,6 +179,11 @@ def handle_pdf_upload(session: Any, collection_id: uuid.UUID, file: UploadFile)
# TODO: get n_pages
try:
doc = pikepdf.open(filename_or_stream=BytesIO(file.file.read()))
if len(doc.pages) > settings.MAX_UPLOAD_PDF_PAGES:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail=f"Document must contains less than {settings.MAX_UPLOAD_PDF_PAGES} pages.",
)
should_linearize = not doc.is_linearized
if doc.is_linearized and not doc.check_linearization():
should_linearize = True
......@@ -200,10 +206,13 @@ def handle_pdf_upload(session: Any, collection_id: uuid.UUID, file: UploadFile)
# Important because minio needs it after
file.file.seek(0)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Erreur à l'ouverture du document pdf",
) from exc
if not isinstance(exc, HTTPException):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Erreur à l'ouverture du document pdf",
) from exc
else:
raise exc
try:
new_doc = DocumentCreate(
collection_id=collection_id,
......
......@@ -185,6 +185,7 @@ class Settings(BaseSettings):
QDRANT: QdrantSettings = QdrantSettings()
MAX_UPLOAD_SIZE_IN_MB: int = 1024
MAX_UPLOAD_PDF_PAGES: int = 1500
def _check_default_secret(self, var_name: str, value: str | None) -> None:
if value == "changethis":
......
......@@ -85,7 +85,10 @@ def create_api_router(
return None
if _get_file_length(uuid) + len(chunk) > max_size:
raise HTTPException(status_code=413)
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail=f"Document size must be less than {max_size} bytes.",
)
f.write(chunk)
meta.offset += len(chunk)
......@@ -100,7 +103,6 @@ def create_api_router(
@router.head("/{uuid}", status_code=status.HTTP_200_OK)
def get_upload_metadata(_: CurrentUser, response: Response, uuid: str) -> Response:
meta = _read_metadata(uuid)
print("META", meta)
if meta is None or not _file_exists(uuid):
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND)
......@@ -111,7 +113,7 @@ def create_api_router(
response.headers["Cache-Control"] = "no-store"
encoded_filename = base64.b64encode(meta.metadata["filename"].encode("utf-8")).decode("utf-8")
encoded_filetype = base64.b64encode(meta.metadata["filetype"].encode("utf-8")).decode("utf-8")
response.headers["Upload-Metadata"] = f"filename {encoded_filename}, " f"filetype {encoded_filetype}"
response.headers["Upload-Metadata"] = f"filename {encoded_filename}, filetype {encoded_filetype}"
response.status_code = status.HTTP_200_OK
return response
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment