-
Notifications
You must be signed in to change notification settings - Fork 9
Add file size validation to document upload endpoint #584
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
828fa51
652adb9
d2bfc46
1adc93c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| """Add project_id and organization_id to job table | ||
|
|
||
| Revision ID: 043 | ||
| Revises: 042 | ||
| Create Date: 2026-02-04 14:39:00.000000 | ||
|
|
||
| """ | ||
| from alembic import op | ||
| import sqlalchemy as sa | ||
|
|
||
|
|
||
| # revision identifiers, used by Alembic. | ||
| revision = "043" | ||
| down_revision = "042" | ||
| branch_labels = None | ||
| depends_on = None | ||
|
|
||
|
|
||
| def upgrade(): | ||
| # ### commands auto generated by Alembic - please adjust! ### | ||
| # Add organization_id column | ||
| op.add_column( | ||
| "job", | ||
| sa.Column( | ||
| "organization_id", | ||
| sa.Integer(), | ||
| nullable=False, | ||
| comment="Reference to the organization", | ||
| ), | ||
| ) | ||
|
|
||
| # Add project_id column | ||
| op.add_column( | ||
| "job", | ||
| sa.Column( | ||
| "project_id", | ||
| sa.Integer(), | ||
| nullable=False, | ||
| comment="Reference to the project", | ||
| ), | ||
| ) | ||
|
|
||
| # Create foreign key constraints | ||
| op.create_foreign_key( | ||
| "fk_job_organization_id", | ||
| "job", | ||
| "organization", | ||
| ["organization_id"], | ||
| ["id"], | ||
| ondelete="CASCADE", | ||
| ) | ||
|
|
||
| op.create_foreign_key( | ||
| "fk_job_project_id", | ||
| "job", | ||
| "project", | ||
| ["project_id"], | ||
| ["id"], | ||
| ondelete="CASCADE", | ||
| ) | ||
|
|
||
| # Create indexes | ||
| op.create_index( | ||
| "ix_job_organization_id", | ||
| "job", | ||
| ["organization_id"], | ||
| unique=False, | ||
| ) | ||
|
|
||
| op.create_index( | ||
| "ix_job_project_id", | ||
| "job", | ||
| ["project_id"], | ||
| unique=False, | ||
| ) | ||
| # ### end Alembic commands ### | ||
|
|
||
|
|
||
| def downgrade(): | ||
| # ### commands auto generated by Alembic - please adjust! ### | ||
| # Drop indexes | ||
| op.drop_index("ix_job_project_id", table_name="job") | ||
| op.drop_index("ix_job_organization_id", table_name="job") | ||
|
|
||
| # Drop foreign key constraints | ||
| op.drop_constraint("fk_job_project_id", "job", type_="foreignkey") | ||
| op.drop_constraint("fk_job_organization_id", "job", type_="foreignkey") | ||
|
|
||
| # Drop columns | ||
| op.drop_column("job", "project_id") | ||
| op.drop_column("job", "organization_id") | ||
| # ### end Alembic commands ### | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,34 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """add composite index for conversation query optimization | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Revision ID: 044 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Revises: 043 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Create Date: 2026-02-04 15:24:00.000000 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from alembic import op | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # revision identifiers, used by Alembic. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| revision = "044" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| down_revision = "043" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| branch_labels = None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| depends_on = None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+11
to
+15
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# List all Alembic revision IDs to confirm the latest revision number.
python - <<'PY'
import re, pathlib
path = pathlib.Path("backend/app/alembic/versions")
revs = []
for f in path.glob("*.py"):
txt = f.read_text(encoding="utf-8")
m = re.search(r'revision\s*=\s*"(\d+)"', txt)
if m:
revs.append((int(m.group(1)), f.name))
for rev, name in sorted(revs):
print(f"{rev:03d} -> {name}")
PYRepository: ProjectTech4DevAI/kaapi-backend Length of output: 2069 🏁 Script executed: cat backend/app/alembic/versions/044_optimize_conversation_query.pyRepository: ProjectTech4DevAI/kaapi-backend Length of output: 934 Add type hints to upgrade() and downgrade() functions. The revision numbering (044) is correct. However, both the def upgrade() -> None:def downgrade() -> None:🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def upgrade(): | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # Create composite index to optimize the get_conversation_by_ancestor_id query | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # This query filters by: ancestor_response_id, project_id, is_deleted | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # and orders by: inserted_at DESC | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| op.create_index( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| "ix_openai_conversation_ancestor_project_active_time", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| "openai_conversation", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ["ancestor_response_id", "project_id", "is_deleted", "inserted_at"], | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| unique=False, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def downgrade(): | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| op.drop_index( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| "ix_openai_conversation_ancestor_project_active_time", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| table_name="openai_conversation", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+18
to
+34
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major Add required return type hints to migration functions. Both ✅ Suggested fix-def upgrade():
+def upgrade() -> None:
@@
-def downgrade():
+def downgrade() -> None:📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,6 +4,12 @@ Upload a document to Kaapi. | |||||||||||||||||||||
| - If a target format is specified, a transformation job will also be created to transform document into target format in the background. The response will include both the uploaded document details and information about the transformation job. | ||||||||||||||||||||||
| - If a callback URL is provided, you will receive a notification at that URL once the document transformation job is completed. | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| ### File Size Restrictions | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| - **Maximum file size**: 50MB (configurable via `MAX_DOCUMENT_UPLOAD_SIZE_MB` environment variable) | ||||||||||||||||||||||
| - Files exceeding the size limit will be rejected with a 413 (Payload Too Large) error | ||||||||||||||||||||||
| - Empty files will be rejected with a 422 (Unprocessable Entity) error | ||||||||||||||||||||||
|
Comment on lines
+7
to
+11
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Documentation states 50MB but the code default is 512MB. The documentation says "Maximum file size: 50MB" but 📝 Option: Update documentation to match code ### File Size Restrictions
-- **Maximum file size**: 50MB (configurable via `MAX_DOCUMENT_UPLOAD_SIZE_MB` environment variable)
+- **Maximum file size**: 512MB (configurable via `MAX_DOCUMENT_UPLOAD_SIZE_MB` environment variable)
- Files exceeding the size limit will be rejected with a 413 (Payload Too Large) error
- Empty files will be rejected with a 422 (Unprocessable Entity) error📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| ### Supported Transformations | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| The following (source_format → target_format) transformations are supported: | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| """Validation utilities for document uploads.""" | ||
|
|
||
| import logging | ||
| from pathlib import Path | ||
|
|
||
| from fastapi import HTTPException, UploadFile | ||
|
|
||
| from app.core.config import settings | ||
| from app.utils import mask_string | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # Maximum file size for document uploads (in bytes) | ||
| # Default: 512 MB, configurable via settings | ||
| MAX_DOCUMENT_SIZE = settings.MAX_DOCUMENT_UPLOAD_SIZE_MB * 1024 * 1024 | ||
|
|
||
|
|
||
| async def validate_document_file(file: UploadFile) -> int: | ||
| """ | ||
| Validate document file size. | ||
|
|
||
| Args: | ||
| file: The uploaded file | ||
|
|
||
| Returns: | ||
| File size in bytes if valid | ||
| """ | ||
|
|
||
| # Get file size by seeking to end | ||
| file.file.seek(0, 2) | ||
| file_size = file.file.tell() | ||
| file.file.seek(0) | ||
|
|
||
| if file_size > MAX_DOCUMENT_SIZE: | ||
| raise HTTPException( | ||
| status_code=413, | ||
| detail=f"File too large. Maximum size: {MAX_DOCUMENT_SIZE / (1024 * 1024):.0f}MB", | ||
| ) | ||
|
|
||
| if file_size == 0: | ||
| raise HTTPException( | ||
| status_code=422, | ||
| detail="Empty file uploaded" | ||
| ) | ||
|
|
||
| logger.info( | ||
| f"[validate_document_file] Document file validated: {mask_string(file.filename)} ({file_size} bytes)" | ||
| ) | ||
| return file_size |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-nullable columns without defaults will fail if the
jobtable has existing rows.Adding
organization_idandproject_idasnullable=Falsewithout aserver_defaultwill cause the migration to fail on databases with existing job records. Consider one of these approaches:server_defaultand backfill with valid IDs, then remove the default🛠️ Option 1: Two-phase migration with server_default
op.add_column( "job", sa.Column( "organization_id", sa.Integer(), - nullable=False, + nullable=True, comment="Reference to the organization", ), ) op.add_column( "job", sa.Column( "project_id", sa.Integer(), - nullable=False, + nullable=True, comment="Reference to the project", ), ) + + # TODO: Backfill existing rows with valid organization_id and project_id + # op.execute("UPDATE job SET organization_id = ..., project_id = ... WHERE organization_id IS NULL") + + # Then alter columns to NOT NULL after backfill + op.alter_column("job", "organization_id", nullable=False) + op.alter_column("job", "project_id", nullable=False)🤖 Prompt for AI Agents