2022-11-22 16:15:50 -05:00
|
|
|
"""add file expirations
|
|
|
|
|
|
|
|
Revision ID: 939a08e1d6e5
|
|
|
|
Revises: 7e246705da6a
|
|
|
|
Create Date: 2022-11-22 12:16:32.517184
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
# revision identifiers, used by Alembic.
|
|
|
|
revision = '939a08e1d6e5'
|
|
|
|
down_revision = '7e246705da6a'
|
|
|
|
|
|
|
|
from alembic import op
|
|
|
|
from flask import current_app
|
|
|
|
from flask_sqlalchemy import SQLAlchemy
|
|
|
|
from pathlib import Path
|
|
|
|
import sqlalchemy as sa
|
|
|
|
from sqlalchemy.ext.automap import automap_base
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
2024-09-27 11:39:18 -04:00
|
|
|
"""
|
|
|
|
For a file of a given size, determine the largest allowed lifespan of that file
|
|
|
|
|
|
|
|
Based on the current app's configuration:
|
|
|
|
Specifically, the MAX_CONTENT_LENGTH, as well as FHOST_{MIN,MAX}_EXPIRATION.
|
2022-11-22 16:15:50 -05:00
|
|
|
|
2024-09-27 11:39:18 -04:00
|
|
|
This lifespan may be shortened by a user's request, but no files should be
|
|
|
|
allowed to expire at a point after this number.
|
2022-11-22 16:15:50 -05:00
|
|
|
|
|
|
|
Value returned is a duration in milliseconds.
|
|
|
|
"""
|
|
|
|
def get_max_lifespan(filesize: int) -> int:
|
2024-09-27 11:39:18 -04:00
|
|
|
cfg = current_app.config
|
|
|
|
min_exp = cfg.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000)
|
|
|
|
max_exp = cfg.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000)
|
|
|
|
max_size = cfg.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024)
|
2022-11-22 16:15:50 -05:00
|
|
|
return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3)
|
|
|
|
|
2024-09-27 11:39:18 -04:00
|
|
|
|
2022-11-22 16:15:50 -05:00
|
|
|
Base = automap_base()
|
|
|
|
|
2024-09-27 11:39:18 -04:00
|
|
|
|
2022-11-22 16:15:50 -05:00
|
|
|
def upgrade():
|
|
|
|
op.add_column('file', sa.Column('expiration', sa.BigInteger()))
|
|
|
|
|
|
|
|
bind = op.get_bind()
|
|
|
|
Base.prepare(autoload_with=bind)
|
|
|
|
File = Base.classes.file
|
|
|
|
session = Session(bind=bind)
|
|
|
|
|
|
|
|
storage = Path(current_app.config["FHOST_STORAGE_PATH"])
|
2024-09-27 11:39:18 -04:00
|
|
|
current_time = time.time() * 1000
|
2022-11-22 16:15:50 -05:00
|
|
|
|
|
|
|
# List of file hashes which have not expired yet
|
|
|
|
# This could get really big for some servers
|
|
|
|
try:
|
|
|
|
unexpired_files = os.listdir(storage)
|
|
|
|
except FileNotFoundError:
|
2024-09-27 11:39:18 -04:00
|
|
|
return # There are no currently unexpired files
|
2022-11-22 16:15:50 -05:00
|
|
|
|
|
|
|
# Calculate an expiration date for all existing files
|
2022-12-20 08:23:14 -05:00
|
|
|
|
|
|
|
q = session.scalars(
|
2022-11-22 16:15:50 -05:00
|
|
|
sa.select(File)
|
|
|
|
.where(
|
2022-12-20 08:23:14 -05:00
|
|
|
sa.not_(File.removed)
|
2022-11-22 16:15:50 -05:00
|
|
|
)
|
|
|
|
)
|
2024-09-27 11:39:18 -04:00
|
|
|
updates = [] # We coalesce updates to the database here
|
2022-12-20 08:23:14 -05:00
|
|
|
|
|
|
|
# SQLite has a hard limit on the number of variables so we
|
|
|
|
# need to do this the slow way
|
|
|
|
files = [f for f in q if f.sha256 in unexpired_files]
|
|
|
|
|
2022-11-22 16:15:50 -05:00
|
|
|
for file in files:
|
|
|
|
file_path = storage / file.sha256
|
|
|
|
stat = os.stat(file_path)
|
2024-09-27 11:39:18 -04:00
|
|
|
# How long the file is allowed to live, in ms
|
|
|
|
max_age = get_max_lifespan(stat.st_size)
|
|
|
|
# When the file was created, in ms
|
|
|
|
file_birth = stat.st_mtime * 1000
|
|
|
|
updates.append({
|
|
|
|
'id': file.id,
|
|
|
|
'expiration': int(file_birth + max_age)})
|
2022-11-22 16:15:50 -05:00
|
|
|
|
|
|
|
# Apply coalesced updates
|
|
|
|
session.bulk_update_mappings(File, updates)
|
|
|
|
session.commit()
|
|
|
|
|
2024-09-27 11:39:18 -04:00
|
|
|
|
2022-11-22 16:15:50 -05:00
|
|
|
def downgrade():
|
|
|
|
op.drop_column('file', 'expiration')
|