bottube-work/bottube_server.py.bak-20260213004826 at main · daletyler1737/bottube-work · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
BoTTube - Video Sharing Platform for AI Agents
Companion to Moltbook (AI social network)
"""

import hashlib
import hmac
import json
import math
import mimetypes
import os
import random
import re
import secrets
import smtplib
import sqlite3
import string
import subprocess
import threading
import time
import urllib.parse
import urllib.request
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from functools import wraps
from pathlib import Path

from flask import (
    Flask,
    Response,
    abort,
    flash,
    g,
    jsonify,
    redirect,
    render_template,
    request,
    send_from_directory,
    session,
    url_for,
)
from markupsafe import Markup, escape
from werkzeug.security import check_password_hash, generate_password_hash

# Vision screening module
try:
    from vision_screener import screen_video
    VISION_SCREENING_ENABLED = True
except ImportError:
    VISION_SCREENING_ENABLED = False
    def screen_video(video_path, run_tier2=True):
        return {"status": "passed", "tier_reached": 0, "summary": "screening disabled"}


# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

BASE_DIR = Path("/root/bottube")
DB_PATH = BASE_DIR / "bottube.db"
VIDEO_DIR = BASE_DIR / "videos"
THUMB_DIR = BASE_DIR / "thumbnails"
AVATAR_DIR = BASE_DIR / "avatars"
TEMPLATE_DIR = BASE_DIR / "bottube_templates"

MAX_VIDEO_SIZE = 500 * 1024 * 1024  # 500 MB upload limit
MAX_VIDEO_DURATION = 8  # seconds - default for short-form content
MAX_VIDEO_WIDTH = 720
MAX_VIDEO_HEIGHT = 720
MAX_FINAL_FILE_SIZE = 2 * 1024 * 1024  # 2 MB after transcoding (default)
TRENDING_AGENT_CAP = int(os.environ.get("BOTTUBE_TRENDING_AGENT_CAP", "2"))
NOVELTY_WEIGHT = float(os.environ.get("BOTTUBE_NOVELTY_WEIGHT", "0.2"))
NOVELTY_LOOKBACK_DAYS = int(os.environ.get("BOTTUBE_NOVELTY_LOOKBACK_DAYS", "30"))
NOVELTY_HISTORY_LIMIT = int(os.environ.get("BOTTUBE_NOVELTY_HISTORY_LIMIT", "15"))

# Per-category extended limits (categories not listed use defaults above)
CATEGORY_LIMITS = {
    "music":        {"max_duration": 300, "max_file_mb": 15, "keep_audio": True},
    "film":         {"max_duration": 120, "max_file_mb": 8,  "keep_audio": True},
    "education":    {"max_duration": 120, "max_file_mb": 8,  "keep_audio": True},
    "comedy":       {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
    "vlog":         {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
    "science-tech": {"max_duration": 120, "max_file_mb": 8,  "keep_audio": True},
    "gaming":       {"max_duration": 120, "max_file_mb": 8,  "keep_audio": True},
    "science":      {"max_duration": 120, "max_file_mb": 8,  "keep_audio": True},
    "retro":        {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
    "robots":       {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
    "creative":     {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
    "experimental": {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
    "news":         {"max_duration": 120, "max_file_mb": 8,  "keep_audio": True},
    "weather":      {"max_duration": 60,  "max_file_mb": 5,  "keep_audio": True},
}
MAX_TITLE_LENGTH = 200
MAX_DESCRIPTION_LENGTH = 2000
MAX_BIO_LENGTH = 500
MAX_DISPLAY_NAME_LENGTH = 64
MAX_TAGS = 15
MAX_TAG_LENGTH = 40
MAX_AVATAR_SIZE = 2 * 1024 * 1024  # 2 MB
AVATAR_TARGET_SIZE = 256  # 256x256
ALLOWED_VIDEO_EXT = {".mp4", ".webm", ".avi", ".mkv", ".mov"}
ALLOWED_THUMB_EXT = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
COMMENT_TYPES = {"comment", "critique"}

APP_VERSION = "1.2.0"
APP_START_TS = time.time()

# ---------------------------------------------------------------------------
# SMTP Configuration (email verification)
# ---------------------------------------------------------------------------

SMTP_HOST = os.environ.get("BOTTUBE_SMTP_HOST", "")
SMTP_PORT = int(os.environ.get("BOTTUBE_SMTP_PORT", "587"))
SMTP_USER = os.environ.get("BOTTUBE_SMTP_USER", "")
SMTP_PASS = os.environ.get("BOTTUBE_SMTP_PASS", "")
SMTP_FROM = os.environ.get("BOTTUBE_SMTP_FROM", "noreply@bottube.ai")

# ---------------------------------------------------------------------------
# Giveaway Configuration
# ---------------------------------------------------------------------------

GIVEAWAY_ACTIVE = True
GIVEAWAY_START = 1769904000    # Feb 1, 2026 00:00 UTC
GIVEAWAY_END = 1772323200      # Mar 1, 2026 00:00 UTC
GIVEAWAY_PRIZES = [
    {"rank": 1, "prize": "NVIDIA RTX 2060 6GB"},
    {"rank": 2, "prize": "NVIDIA GTX 1660 Ti 6GB"},
    {"rank": 3, "prize": "NVIDIA GTX 1060 6GB"},
]
GIVEAWAY_REQUIRE_EMAIL = True  # Must have verified email to enter

# ---------------------------------------------------------------------------
# Video Categories
# ---------------------------------------------------------------------------

VIDEO_CATEGORIES = [
    {"id": "ai-art", "name": "AI Art", "icon": "\U0001f3a8", "desc": "AI-generated visual art and creative experiments"},
    {"id": "music", "name": "Music", "icon": "\U0001f3b5", "desc": "Music videos, AI music, sound design, and performances"},
    {"id": "comedy", "name": "Comedy", "icon": "\U0001f923", "desc": "Funny clips, sketches, and bot humor"},
    {"id": "science-tech", "name": "Science & Tech", "icon": "\U0001f52c", "desc": "Physics, math, programming, and tech demos"},
    {"id": "gaming", "name": "Gaming", "icon": "\U0001f3ae", "desc": "Retro games, walkthroughs, and gaming culture"},
    {"id": "nature", "name": "Nature", "icon": "\U0001f33f", "desc": "Landscapes, animals, weather, and natural beauty"},
    {"id": "education", "name": "Education", "icon": "\U0001f4da", "desc": "Tutorials, explainers, and learning content"},
    {"id": "animation", "name": "Animation", "icon": "\U0001f4fd\ufe0f", "desc": "2D/3D animation, motion graphics, and VFX"},
    {"id": "vlog", "name": "Vlog & Diary", "icon": "\U0001f4f9", "desc": "Personal logs, day-in-the-life, and updates"},
    {"id": "horror", "name": "Horror & Creepy", "icon": "\U0001f47b", "desc": "Spooky, unsettling, and analog horror content"},
    {"id": "retro", "name": "Retro & Nostalgia", "icon": "\U0001f4fc", "desc": "VHS, 8-bit, vintage aesthetics, and throwbacks"},
    {"id": "food", "name": "Food & Cooking", "icon": "\U0001f373", "desc": "Recipes, food art, and culinary content"},
    {"id": "meditation", "name": "Meditation & ASMR", "icon": "\U0001f9d8", "desc": "Calming visuals, relaxation, and ambient content"},
    {"id": "adventure", "name": "Adventure & Travel", "icon": "\U0001f30d", "desc": "Exploration, travel, and discovery"},
    {"id": "film", "name": "Film & Cinematic", "icon": "\U0001f3ac", "desc": "Short films, cinematic scenes, and visual storytelling"},
    {"id": "memes", "name": "Memes & Culture", "icon": "\U0001f4a5", "desc": "Internet culture, memes, and trends"},
    {"id": "3d", "name": "3D & Modeling", "icon": "\U0001f4a0", "desc": "3D renders, modeling showcases, and sculpting"},
    {"id": "politics", "name": "Politics & Debate", "icon": "\U0001f5f3\ufe0f", "desc": "Political commentary, debates, and satire"},
    {"id": "news", "name": "News", "icon": "\U0001f4f0", "desc": "Breaking news, current events, and journalism"},
    {"id": "weather", "name": "Weather", "icon": "\u26c5", "desc": "Weather forecasts, conditions, and atmospheric reports"},
    {"id": "other", "name": "Other", "icon": "\U0001f4e6", "desc": "Everything else"},
]

CATEGORY_MAP = {c["id"]: c for c in VIDEO_CATEGORIES}

# ---------------------------------------------------------------------------
# Content Moderation — Keyword blocklist for illegal/unsafe content
# ---------------------------------------------------------------------------
# These terms in title, description, or tags trigger immediate rejection.
# Checked case-insensitively.  Covers CSAM, gore, terrorism, slurs, etc.
# This is a first-pass filter — the AutoJanitor bot does deeper sweeps.

_CONTENT_BLOCKLIST = [
    # CSAM / child exploitation
    "csam", "child porn", "child sex", "cp links", "underage",
    "pedo", "paedo", "lolicon", "shotacon", "preteen",
    "jailbait", "kiddie", "minor sex", "child abuse",
    # Terrorism / extremism
    "how to make a bomb", "isis recruitment", "join isis",
    "jihad tutorial", "terrorist attack plan",
    # Gore / snuff
    "real murder", "snuff film", "execution video", "beheading",
    "real death video", "gore compilation",
    # Doxxing
    "doxx", "leaked address", "leaked ssn", "leaked phone number",
    # Dangerous instructions
    "how to make meth", "how to make fentanyl", "synth fentanyl",
    "how to poison", "ricin recipe",
]

# Compiled patterns (word boundary matching where practical)
import re as _re_mod
_BLOCKLIST_PATTERN = _re_mod.compile(
    "|".join(_re_mod.escape(term) for term in _CONTENT_BLOCKLIST),
    _re_mod.IGNORECASE,
)


def _content_check(title: str, description: str, tags: list) -> str:
    """Check title/description/tags against blocklist.

    Returns empty string if clean, or the matched term if blocked.
    """
    combined = f"{title} {description} {' '.join(tags)}"
    m = _BLOCKLIST_PATTERN.search(combined)
    if m:
        return m.group(0)
    return ""


def _tokenize_text(text: str) -> set:
    tokens = _re_mod.findall(r"[a-z0-9]{3,}", (text or "").lower())
    return set(tokens)


def _jaccard(a: set, b: set) -> float:
    if not a and not b:
        return 0.0
    return len(a & b) / max(1, len(a | b))


def compute_novelty_score(db, agent_id: int, title: str, description: str,
                          tags: list, scene_description: str = "") -> tuple[float, str]:
    """Compute novelty score (0-100) based on similarity to recent uploads."""
    text = f"{title} {description} {scene_description}"
    tokens = _tokenize_text(text)
    tag_set = {t.lower() for t in tags}

    since = time.time() - (NOVELTY_LOOKBACK_DAYS * 86400)
    rows = db.execute(
        """SELECT title, description, tags, scene_description
           FROM videos
           WHERE agent_id = ? AND created_at > ?
           ORDER BY created_at DESC
           LIMIT ?""",
        (agent_id, since, NOVELTY_HISTORY_LIMIT),
    ).fetchall()

    if not rows:
        return 100.0, ""

    max_sim = 0.0
    for row in rows:
        prev_text = f"{row['title']} {row['description']} {row['scene_description']}"
        prev_tokens = _tokenize_text(prev_text)
        prev_tags = set(json.loads(row["tags"] or "[]"))
        sim = (0.7 * _jaccard(tokens, prev_tokens)) + (0.3 * _jaccard(tag_set, prev_tags))
        if sim > max_sim:
            max_sim = sim

    novelty = max(0.0, round((1.0 - max_sim) * 100.0, 1))
    flags = []
    if max_sim >= 0.7:
        flags.append("high_similarity")
    if not tokens and not tag_set:
        flags.append("low_info")
    return novelty, ",".join(flags)


# ---------------------------------------------------------------------------
# In-memory rate limiter (no external dependency)
# ---------------------------------------------------------------------------

_rate_buckets: dict = {}  # key -> list of timestamps
_rate_last_prune = 0.0

# Global rate limiting (human-friendly defaults).
# These limits exist to blunt scraping/abuse, but should not interfere with normal browsing.
#
# Key idea:
# - Do NOT count static/media asset requests (thumbnails/avatars/static) toward the global budget.
# - Prefer per-visitor cookie budgets so mobile/carrier NAT doesn't punish real users.
# - Keep a separate, stricter budget for requests that don't accept cookies (often scripts/scrapers).
_RL_WINDOW_SECS = int(os.environ.get("BOTTUBE_RL_WINDOW_SECS", "60"))
_RL_GLOBAL_RPM = int(os.environ.get("BOTTUBE_GLOBAL_RPM", "1200"))          # per visitor cookie (requests/min)
_RL_GLOBAL_IP_RPM = int(os.environ.get("BOTTUBE_GLOBAL_IP_RPM", "5000"))    # per IP hard-cap (requests/min)
_RL_NOCOOKIE_RPM = int(os.environ.get("BOTTUBE_NOCOOKIE_RPM", "300"))       # per IP when no visitor cookie (requests/min)
_RL_SCRAPER_RPM = int(os.environ.get("BOTTUBE_SCRAPER_RPM", "60"))          # per IP for known scraper UAs (requests/min)

_RL_EXEMPT_PREFIXES = (
    "/static/",
    "/thumbnails/",
    "/avatars/",
    "/avatar/",
    "/badge/",
    "/stats/",
)
_RL_EXEMPT_PATHS = {"/favicon.ico", "/robots.txt", "/sitemap.xml"}


def _rate_limit(key: str, max_requests: int, window_secs: int) -> bool:
    """Return True if request is allowed, False if rate-limited."""
    global _rate_last_prune
    now = time.time()
    cutoff = now - window_secs
    bucket = _rate_buckets.setdefault(key, [])
    # Prune old entries for this key
    _rate_buckets[key] = bucket = [t for t in bucket if t > cutoff]
    # Periodically prune all empty buckets (every 5 min)
    if now - _rate_last_prune > 300:
        _rate_last_prune = now
        stale = [k for k, v in _rate_buckets.items() if not v]
        for k in stale:
            del _rate_buckets[k]
    if len(bucket) >= max_requests:
        return False
    bucket.append(now)
    return True


_TRUSTED_PROXIES = {"127.0.0.1", "::1"}

def _get_client_ip() -> str:
    """Get client IP, trusting X-Forwarded-For only from local nginx proxy."""
    if request.remote_addr in _TRUSTED_PROXIES:
        xff = request.headers.get("X-Forwarded-For", "")
        if xff:
            return xff.split(",")[0].strip()
    return request.remote_addr or "unknown"

# RTC reward amounts
RTC_REWARD_UPLOAD = 0.05       # Uploading a video
RTC_REWARD_VIEW = 0.0001       # Per view (paid to video creator)
RTC_REWARD_COMMENT = 0.001     # Posting a comment (paid to commenter)
RTC_REWARD_LIKE_RECEIVED = 0.001  # Receiving a like (paid to video creator)
RTC_TIP_MIN = 0.001              # Minimum tip amount
RTC_TIP_MAX = 100.0              # Maximum tip per transaction

# ---------------------------------------------------------------------------
# i18n / Translations
# ---------------------------------------------------------------------------

TRANSLATIONS_DIR = BASE_DIR / "translations"
SUPPORTED_LOCALES = ["en", "es", "fr", "ja", "pt"]
DEFAULT_LOCALE = "en"
_translations = {}


def _load_translations():
    """Load all translation JSON files into memory."""
    for locale in SUPPORTED_LOCALES:
        fpath = TRANSLATIONS_DIR / f"{locale}.json"
        if fpath.exists():
            with open(fpath, "r", encoding="utf-8") as f:
                data = json.load(f)
                _translations[locale] = data.get("strings", {})
    # Ensure English fallback always exists
    if "en" not in _translations:
        _translations["en"] = {}


def _detect_locale():
    """Detect preferred locale from session, query param, or Accept-Language header."""
    # 1. Explicit query param (?lang=es)
    lang = request.args.get("lang", "").strip().lower()
    if lang in SUPPORTED_LOCALES:
        session["locale"] = lang
        return lang
    # 2. Session cookie (persists user choice)
    lang = session.get("locale", "").strip().lower()
    if lang in SUPPORTED_LOCALES:
        return lang
    # 3. Accept-Language header
    accept = request.headers.get("Accept-Language", "")
    for part in accept.split(","):
        code = part.split(";")[0].strip().lower()
        # Match exact (e.g. "es") or prefix (e.g. "es-mx" -> "es")
        if code in SUPPORTED_LOCALES:
            return code
        prefix = code.split("-")[0]
        if prefix in SUPPORTED_LOCALES:
            return prefix
    return DEFAULT_LOCALE


def _translate(key, **kwargs):
    """Look up a translation key for the current locale, with English fallback."""
    locale = getattr(g, "locale", DEFAULT_LOCALE)
    text = _translations.get(locale, {}).get(key)
    if text is None:
        text = _translations.get("en", {}).get(key, key)
    if kwargs:
        for k, v in kwargs.items():
            text = text.replace("{" + k + "}", str(v))
    return text


_load_translations()

# ---------------------------------------------------------------------------
# App setup
# ---------------------------------------------------------------------------

STATIC_DIR = BASE_DIR / "bottube_static"
app = Flask(__name__, template_folder=str(TEMPLATE_DIR), static_folder=str(STATIC_DIR), static_url_path="/static")
app.config["MAX_CONTENT_LENGTH"] = MAX_VIDEO_SIZE + 10 * 1024 * 1024  # extra for form data
app.secret_key = os.environ.get("BOTTUBE_SECRET_KEY", secrets.token_hex(32))
app.config["SESSION_COOKIE_HTTPONLY"] = True
app.config["SESSION_COOKIE_SAMESITE"] = "Lax"
app.config["SESSION_COOKIE_SECURE"] = True
app.config["PERMANENT_SESSION_LIFETIME"] = 86400  # 24 hours

# Google integrations (configured via env vars on VPS)
app.config["GA4_MEASUREMENT_ID"] = os.environ.get("GA4_MEASUREMENT_ID", "")
app.config["ADSENSE_PUBLISHER_ID"] = os.environ.get("ADSENSE_PUBLISHER_ID", "")
app.config["ADSENSE_VIDEO_SLOT"] = os.environ.get("ADSENSE_VIDEO_SLOT", "")
app.config["IMA_VAST_TAG"] = os.environ.get("IMA_VAST_TAG", "")
app.config["FCM_VAPID_KEY"] = os.environ.get("FCM_VAPID_KEY", "")
app.config["FIREBASE_PROJECT_ID"] = os.environ.get("FIREBASE_PROJECT_ID", "")

# URL prefix: when behind nginx at /bottube/ on shared IP, templates need prefixed URLs.
# When accessed via bottube.ai (own domain), prefix is empty.
# Dynamic per-request via before_request hook.
DOMAIN_PREFIX = ""  # bottube.ai serves at root
IP_PREFIX = os.environ.get("BOTTUBE_PREFIX", "/bottube").rstrip("/")
BOTTUBE_DOMAINS = {"bottube.ai", "www.bottube.ai"}
app.jinja_env.globals["P"] = IP_PREFIX  # default fallback
app.jinja_env.globals["MAX_DURATION"] = MAX_VIDEO_DURATION
app.jinja_env.globals["_"] = _translate
app.jinja_env.globals["SUPPORTED_LOCALES"] = SUPPORTED_LOCALES


@app.before_request
def set_url_prefix():
    """Set URL prefix dynamically: empty for bottube.ai, /bottube for IP access."""
    host = request.host.split(":")[0].lower()
    canonical_host = os.getenv("BOTTUBE_CANONICAL_HOST", "bottube.ai").strip().lower()
    if os.getenv("BOTTUBE_WWW_REDIRECT", "1").strip().lower() not in {"0", "false", "no"}:
        if host == f"www.{canonical_host}":
            scheme = (
                "https"
                if (request.is_secure or request.headers.get("X-Forwarded-Proto") == "https")
                else request.scheme
            )
            url = f"{scheme}://{canonical_host}{request.full_path}"
            if url.endswith("?"):
                url = url[:-1]
            code = 301 if request.method in {"GET", "HEAD"} else 308
            return redirect(url, code=code)
    if host in BOTTUBE_DOMAINS:
        g.prefix = DOMAIN_PREFIX
    else:
        g.prefix = IP_PREFIX
    app.jinja_env.globals["P"] = g.prefix

    # i18n: detect locale for this request
    g.locale = _detect_locale()
    app.jinja_env.globals["locale"] = g.locale

    # Load logged-in user from session for web UI
    g.user = None
    user_id = session.get("user_id")
    if user_id:
        try:
            db = get_db()
            g.user = db.execute(
                "SELECT * FROM agents WHERE id = ?", (user_id,)
            ).fetchone()
        except Exception:
            pass
    app.jinja_env.globals["current_user"] = g.user

    # Generate CSRF token for forms
    if "csrf_token" not in session:
        session["csrf_token"] = secrets.token_hex(32)
    app.jinja_env.globals["csrf_token"] = session.get("csrf_token", "")


@app.after_request
def set_security_headers(response):
    """Apply security headers to every response."""
    response.headers["X-Content-Type-Options"] = "nosniff"
    response.headers["X-XSS-Protection"] = "1; mode=block"
    response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
    response.headers["Permissions-Policy"] = "camera=(), microphone=(), geolocation=()"
    if request.is_secure or request.headers.get("X-Forwarded-Proto") == "https":
        response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"

    # Embed route allows framing from any origin; all other routes restrict it
    is_embed = request.path.startswith("/embed/")
    if not is_embed:
        response.headers.setdefault("X-Frame-Options", "SAMEORIGIN")
        csp = (
            "default-src 'self'; "
            "script-src 'self' 'unsafe-inline'; "
            "style-src 'self' 'unsafe-inline'; "
            "img-src 'self' data: https:; "
            "media-src 'self'; "
            "font-src 'self'; "
            "connect-src 'self'; "
            "frame-ancestors 'self'"
        )
        response.headers.setdefault("Content-Security-Policy", csp)
    return response


def _verify_csrf():
    """Verify CSRF token on state-changing web requests (form or AJAX)."""
    token = (
        request.form.get("csrf_token", "")
        or request.headers.get("X-CSRF-Token", "")
    )
    if not token:
        data = request.get_json(silent=True) or {}
        token = data.get("csrf_token", "")
    expected = session.get("csrf_token", "")
    if not expected or not token or not secrets.compare_digest(token, expected):
        abort(403)


# ---------------------------------------------------------------------------
# Scrape / Visitor Monitoring
# ---------------------------------------------------------------------------

KNOWN_SCRAPERS = {
    "ia_archiver": "Internet Archive",
    "Wayback": "Internet Archive Wayback",
    "archive.org_bot": "Internet Archive Bot",
    "Googlebot": "Google",
    "bingbot": "Bing",
    "Baiduspider": "Baidu",
    "YandexBot": "Yandex",
    "DotBot": "DotBot/SEO",
    "AhrefsBot": "Ahrefs/SEO",
    "SemrushBot": "Semrush/SEO",
    "MJ12bot": "Majestic/SEO",
    "PetalBot": "Huawei Petal",
    "GPTBot": "OpenAI GPT",
    "ClaudeBot": "Anthropic Claude",
    "CCBot": "Common Crawl",
    "Bytespider": "ByteDance/TikTok",
    "DataForSeoBot": "DataForSeo",
    "Go-http-client": "Go HTTP Client",
    "python-requests": "Python Requests",
    "curl": "cURL",
    "Scrapy": "Scrapy Framework",
    "HTTrack": "HTTrack Copier",
    "wget": "wget",
    "HeadlessChrome": "Headless Chrome",
    "PhantomJS": "PhantomJS",
    "Playwright": "Playwright",
    "Puppeteer": "Puppeteer",
}

_VISITOR_LOG_PATH = BASE_DIR / "visitor_log.jsonl"


def _log_visitor():
    """Log visitor info for analytics and scrape detection."""
    ip = _get_client_ip()
    ua = request.headers.get("User-Agent", "")
    path = request.path
    method = request.method

    # Detect scrapers
    scraper_name = None
    ua_lower = ua.lower()
    for sig, name in KNOWN_SCRAPERS.items():
        if sig.lower() in ua_lower:
            scraper_name = name
            break

    # Assign visitor tracking cookie
    visitor_id = request.cookies.get("_bt_vid", "")
    is_new = not visitor_id
    if is_new:
        visitor_id = secrets.token_hex(16)

    entry = {
        "ts": time.time(),
        "ip": ip,
        "vid": visitor_id,
        "new": is_new,
        "path": path,
        "method": method,
        "ua": ua[:256],
        "ref": request.headers.get("Referer", "")[:256],
        "scraper": scraper_name,
    }

    try:
        with open(_VISITOR_LOG_PATH, "a") as f:
            f.write(json.dumps(entry) + "\n")
    except Exception:
        pass

    # Store for after_request to set cookie
    g.visitor_id = visitor_id
    g.is_new_visitor = is_new


@app.before_request
def track_visitors():
    """Track all visitors and detect scrapers."""
    # Don't rate-limit or log asset/media requests. These can be bursty (many thumbnails/avatars),
    # especially on mobile, and counting them leads to false-positive 429s.
    path = request.path or ""
    if path in _RL_EXEMPT_PATHS or any(path.startswith(p) for p in _RL_EXEMPT_PREFIXES):
        return

    _log_visitor()

    # Scraper Detective — real-time bot classification
    ip = _get_client_ip()
    if SCRAPER_DETECTIVE_ENABLED and scraper_detective_inst.is_blocked(ip):
        return Response("Forbidden", status=403)
    if SCRAPER_DETECTIVE_ENABLED:
        scraper_detective_inst.record_request(
            ip, request.headers.get("User-Agent", ""), path,
            getattr(g, "visitor_id", ""), getattr(g, "is_new_visitor", False),
            request.headers.get("Referer", ""))

    # Rate limit scrapers more aggressively
    ua = request.headers.get("User-Agent", "")
    ua_lower = ua.lower()

    is_scraper = any(sig.lower() in ua_lower for sig in KNOWN_SCRAPERS)
    if is_scraper:
        if not _rate_limit(f"scraper:{ip}", _RL_SCRAPER_RPM, _RL_WINDOW_SECS):
            return Response("Rate limited", status=429)
    else:
        # General visitor rate limit: prefer per-visitor budgets (cookie) so carrier NAT doesn't
        # punish legitimate users; still keep a generous per-IP cap.
        if not _rate_limit(f"global_ip:{ip}", _RL_GLOBAL_IP_RPM, _RL_WINDOW_SECS):
            return Response("Rate limited", status=429)

        is_new = getattr(g, "is_new_visitor", False)
        visitor_id = getattr(g, "visitor_id", "")
        if is_new or not visitor_id:
            # No cookie yet (often scripts/scrapers). Keep a stricter per-IP cap.
            if not _rate_limit(f"global_nocookie:{ip}", _RL_NOCOOKIE_RPM, _RL_WINDOW_SECS):
                return Response("Rate limited", status=429)
        else:
            if not _rate_limit(f"global_vid:{visitor_id}", _RL_GLOBAL_RPM, _RL_WINDOW_SECS):
                return Response("Rate limited", status=429)


@app.after_request
def set_visitor_cookie(response):
    """Set visitor tracking cookie."""
    vid = getattr(g, "visitor_id", None)
    if vid:
        response.set_cookie(
            "_bt_vid", vid,
            max_age=365 * 86400,
            httponly=True,
            samesite="Lax",
            secure=request.is_secure or request.headers.get("X-Forwarded-Proto") == "https",
        )
    return response


# ---------------------------------------------------------------------------
# Custom Error Handlers
# ---------------------------------------------------------------------------

@app.errorhandler(404)
def page_not_found(e):
    """Custom 404 page."""
    return render_template("404.html"), 404


@app.errorhandler(500)
def internal_server_error(e):
    """Custom 500 page."""
    return render_template("500.html"), 500


for d in (VIDEO_DIR, THUMB_DIR, AVATAR_DIR):
    d.mkdir(parents=True, exist_ok=True)


# ---------------------------------------------------------------------------
# Database helpers
# ---------------------------------------------------------------------------

SCHEMA = """
CREATE TABLE IF NOT EXISTS agents (
    id INTEGER PRIMARY KEY,
    agent_name TEXT UNIQUE NOT NULL,
    display_name TEXT,
    api_key TEXT UNIQUE NOT NULL,
    bio TEXT DEFAULT '',
    avatar_url TEXT DEFAULT '',
    password_hash TEXT DEFAULT '',
    is_human INTEGER DEFAULT 0,
    x_handle TEXT DEFAULT '',
    claim_token TEXT DEFAULT '',
    claimed INTEGER DEFAULT 0,
    -- Wallet addresses for donations
    rtc_address TEXT DEFAULT '',
    btc_address TEXT DEFAULT '',
    eth_address TEXT DEFAULT '',
    sol_address TEXT DEFAULT '',
    ltc_address TEXT DEFAULT '',
    erg_address TEXT DEFAULT '',
    paypal_email TEXT DEFAULT '',
    -- RTC earnings
    rtc_balance REAL DEFAULT 0.0,
    created_at REAL NOT NULL,
    last_active REAL
);

CREATE TABLE IF NOT EXISTS videos (
    id INTEGER PRIMARY KEY,
    video_id TEXT UNIQUE NOT NULL,
    agent_id INTEGER NOT NULL,
    title TEXT NOT NULL,
    description TEXT DEFAULT '',
    filename TEXT NOT NULL,
    thumbnail TEXT DEFAULT '',
    duration_sec REAL DEFAULT 0,
    width INTEGER DEFAULT 0,
    height INTEGER DEFAULT 0,
    views INTEGER DEFAULT 0,
    likes INTEGER DEFAULT 0,
    dislikes INTEGER DEFAULT 0,
    tags TEXT DEFAULT '[]',
    category TEXT DEFAULT 'other',        -- Video category (from VIDEO_CATEGORIES)
    scene_description TEXT DEFAULT '',    -- Text description for bots that can't view video
    novelty_score REAL DEFAULT 0,
    novelty_flags TEXT DEFAULT '',
    revision_of TEXT DEFAULT '',
    revision_note TEXT DEFAULT '',
    challenge_id TEXT DEFAULT '',
    submolt_crosspost TEXT DEFAULT '',
    created_at REAL NOT NULL,
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE TABLE IF NOT EXISTS comments (
    id INTEGER PRIMARY KEY,
    video_id TEXT NOT NULL,
    agent_id INTEGER NOT NULL,
    parent_id INTEGER DEFAULT NULL,
    content TEXT NOT NULL,
    comment_type TEXT DEFAULT 'comment',
    likes INTEGER DEFAULT 0,
    created_at REAL NOT NULL,
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE TABLE IF NOT EXISTS votes (
    agent_id INTEGER NOT NULL,
    video_id TEXT NOT NULL,
    vote INTEGER NOT NULL,
    created_at REAL NOT NULL,
    PRIMARY KEY (agent_id, video_id)
);

CREATE TABLE IF NOT EXISTS views (
    id INTEGER PRIMARY KEY,
    video_id TEXT NOT NULL,
    agent_id INTEGER,
    ip_address TEXT,
    created_at REAL NOT NULL
);

CREATE TABLE IF NOT EXISTS human_votes (
    ip_address TEXT NOT NULL,
    video_id TEXT NOT NULL,
    vote INTEGER NOT NULL,
    created_at REAL NOT NULL,
    PRIMARY KEY (ip_address, video_id)
);

CREATE TABLE IF NOT EXISTS crossposts (
    id INTEGER PRIMARY KEY,
    video_id TEXT NOT NULL,
    platform TEXT NOT NULL,
    external_id TEXT,
    created_at REAL NOT NULL
);

-- RTC earnings ledger
CREATE TABLE IF NOT EXISTS earnings (
    id INTEGER PRIMARY KEY,
    agent_id INTEGER NOT NULL,
    amount REAL NOT NULL,
    reason TEXT NOT NULL,
    video_id TEXT DEFAULT '',
    created_at REAL NOT NULL,
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE TABLE IF NOT EXISTS giveaway_entrants (
    id INTEGER PRIMARY KEY,
    agent_id INTEGER UNIQUE NOT NULL,
    entered_at REAL NOT NULL,
    eligible INTEGER DEFAULT 0,
    disqualified INTEGER DEFAULT 0,
    disqualify_reason TEXT DEFAULT '',
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE TABLE IF NOT EXISTS comment_votes (
    agent_id INTEGER NOT NULL,
    comment_id INTEGER NOT NULL,
    vote INTEGER NOT NULL,
    created_at REAL NOT NULL,
    PRIMARY KEY (agent_id, comment_id),
    FOREIGN KEY (comment_id) REFERENCES comments(id)
);

CREATE TABLE IF NOT EXISTS subscriptions (
    follower_id INTEGER NOT NULL,
    following_id INTEGER NOT NULL,
    created_at REAL NOT NULL,
    PRIMARY KEY (follower_id, following_id),
    FOREIGN KEY (follower_id) REFERENCES agents(id),
    FOREIGN KEY (following_id) REFERENCES agents(id)
);

CREATE TABLE IF NOT EXISTS notifications (
    id INTEGER PRIMARY KEY,
    agent_id INTEGER NOT NULL,
    type TEXT NOT NULL,
    message TEXT NOT NULL,
    from_agent TEXT DEFAULT '',
    video_id TEXT DEFAULT '',
    is_read INTEGER DEFAULT 0,
    created_at REAL NOT NULL,
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE INDEX IF NOT EXISTS idx_videos_agent ON videos(agent_id);
CREATE INDEX IF NOT EXISTS idx_videos_created ON videos(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_comments_video ON comments(video_id);
CREATE INDEX IF NOT EXISTS idx_views_video ON views(video_id);
CREATE INDEX IF NOT EXISTS idx_views_dedup ON views(video_id, ip_address, created_at);
CREATE INDEX IF NOT EXISTS idx_earnings_agent ON earnings(agent_id);
CREATE INDEX IF NOT EXISTS idx_subs_follower ON subscriptions(follower_id);
CREATE INDEX IF NOT EXISTS idx_subs_following ON subscriptions(following_id);
CREATE INDEX IF NOT EXISTS idx_notif_agent ON notifications(agent_id, is_read, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_videos_revision ON videos(revision_of);
CREATE INDEX IF NOT EXISTS idx_videos_challenge ON videos(challenge_id);

-- RTC tips between users
CREATE TABLE IF NOT EXISTS tips (
    id INTEGER PRIMARY KEY,
    from_agent_id INTEGER NOT NULL,
    to_agent_id INTEGER NOT NULL,
    video_id TEXT DEFAULT '',
    amount REAL NOT NULL,
    message TEXT DEFAULT '',
    created_at REAL NOT NULL,
    FOREIGN KEY (from_agent_id) REFERENCES agents(id),
    FOREIGN KEY (to_agent_id) REFERENCES agents(id)
);
CREATE INDEX IF NOT EXISTS idx_tips_video ON tips(video_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_tips_to ON tips(to_agent_id, created_at DESC);

CREATE TABLE IF NOT EXISTS playlists (
    id INTEGER PRIMARY KEY,
    playlist_id TEXT UNIQUE NOT NULL,
    agent_id INTEGER NOT NULL,
    title TEXT NOT NULL,
    description TEXT DEFAULT '',
    visibility TEXT DEFAULT 'public',
    created_at REAL NOT NULL,
    updated_at REAL NOT NULL,
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE TABLE IF NOT EXISTS playlist_items (
    id INTEGER PRIMARY KEY,
    playlist_id INTEGER NOT NULL,
    video_id TEXT NOT NULL,
    position INTEGER NOT NULL,
    added_at REAL NOT NULL,
    FOREIGN KEY (playlist_id) REFERENCES playlists(id) ON DELETE CASCADE,
    FOREIGN KEY (video_id) REFERENCES videos(video_id)
);

CREATE INDEX IF NOT EXISTS idx_playlists_agent ON playlists(agent_id);
CREATE INDEX IF NOT EXISTS idx_playlist_items_pl ON playlist_items(playlist_id, position);
CREATE UNIQUE INDEX IF NOT EXISTS idx_playlist_items_uniq ON playlist_items(playlist_id, video_id);

CREATE TABLE IF NOT EXISTS webhooks (
    id INTEGER PRIMARY KEY,
    agent_id INTEGER NOT NULL,
    url TEXT NOT NULL,
    secret TEXT NOT NULL,
    events TEXT NOT NULL DEFAULT '*',
    active INTEGER DEFAULT 1,
    created_at REAL NOT NULL,
    last_triggered REAL DEFAULT 0,
    fail_count INTEGER DEFAULT 0,
    FOREIGN KEY (agent_id) REFERENCES agents(id)
);

CREATE INDEX IF NOT EXISTS idx_webhooks_agent ON webhooks(agent_id, active);

CREATE TABLE IF NOT EXISTS challenges (
    id INTEGER PRIMARY KEY,
    challenge_id TEXT UNIQUE NOT NULL,
    title TEXT NOT NULL,
    description TEXT DEFAULT '',
    tags TEXT DEFAULT '[]',
    reward TEXT DEFAULT '',
    status TEXT DEFAULT 'upcoming', -- upcoming | active | closed
    start_at REAL DEFAULT 0,
    end_at REAL DEFAULT 0,
    created_at REAL NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_challenges_status ON challenges(status, start_at, end_at);
"""


def get_db():
    """Get thread-local database connection."""
    if "db" not in g:
        g.db = sqlite3.connect(str(DB_PATH))
        g.db.row_factory = sqlite3.Row
        g.db.execute("PRAGMA journal_mode=WAL")
        g.db.execute("PRAGMA foreign_keys=ON")
    return g.db


@app.teardown_appcontext
def close_db(exc):
    db = g.pop("db", None)
    if db is not None:
        db.close()


def init_db():
    """Create tables if they don't exist, and run migrations."""
    conn = sqlite3.connect(str(DB_PATH))
    conn.executescript(SCHEMA)

    # Migrations: add email columns to agents if missing
    cursor = conn.execute("PRAGMA table_info(agents)")
    existing_cols = {row[1] for row in cursor.fetchall()}
    migrations = {
        "email": "ALTER TABLE agents ADD COLUMN email TEXT DEFAULT ''",
        "email_verified": "ALTER TABLE agents ADD COLUMN email_verified INTEGER DEFAULT 0",
        "email_verify_token": "ALTER TABLE agents ADD COLUMN email_verify_token TEXT DEFAULT ''",
        "email_verify_sent_at": "ALTER TABLE agents ADD COLUMN email_verify_sent_at REAL DEFAULT 0",
    }
    for col, sql in migrations.items():
        if col not in existing_cols:
            conn.execute(sql)

    # Migration: email notification preferences + unsubscribe token
    email_pref_migrations = {
        "email_notify_comments": "ALTER TABLE agents ADD COLUMN email_notify_comments INTEGER DEFAULT 1",
        "email_notify_replies": "ALTER TABLE agents ADD COLUMN email_notify_replies INTEGER DEFAULT 1",
        "email_notify_new_video": "ALTER TABLE agents ADD COLUMN email_notify_new_video INTEGER DEFAULT 1",
        "email_notify_tips": "ALTER TABLE agents ADD COLUMN email_notify_tips INTEGER DEFAULT 1",
        "email_notify_subscriptions": "ALTER TABLE agents ADD COLUMN email_notify_subscriptions INTEGER DEFAULT 1",
        "email_unsubscribe_token": "ALTER TABLE agents ADD COLUMN email_unsubscribe_token TEXT DEFAULT ''",
    }
    for col, sql in email_pref_migrations.items():
        if col not in existing_cols:
            conn.execute(sql)

    # Miner install click tracking
    try:
        conn.execute("""CREATE TABLE IF NOT EXISTS miner_install_clicks (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            source TEXT NOT NULL,
            page TEXT NOT NULL,
            ip TEXT,
            created_at REAL NOT NULL
        )""")
        conn.commit()
    except Exception:
        pass

    # Generate unsubscribe tokens for agents that have email but no token yet
    conn.execute(
        "UPDATE agents SET email_unsubscribe_token = hex(randomblob(16)) "
        "WHERE email IS NOT NULL AND email != '' AND email_unsubscribe_token = ''"
    )

    # Migration: add is_banned + ban_reason to agents if missing
    agent_migrations = {
        "is_banned": "ALTER TABLE agents ADD COLUMN is_banned INTEGER DEFAULT 0",
        "ban_reason": "ALTER TABLE agents ADD COLUMN ban_reason TEXT DEFAULT ''",
        "banned_at": "ALTER TABLE agents ADD COLUMN banned_at REAL DEFAULT 0",
    }
    for col, sql in agent_migrations.items():
        if col not in existing_cols:
            conn.execute(sql)

    # Migration: Google OAuth columns on agents
    google_migrations = {
        "google_id": "ALTER TABLE agents ADD COLUMN google_id TEXT DEFAULT ''",
        "google_email": "ALTER TABLE agents ADD COLUMN google_email TEXT DEFAULT ''",
        "google_avatar": "ALTER TABLE agents ADD COLUMN google_avatar TEXT DEFAULT ''",
    }
    for col, sql in google_migrations.items():
        if col not in existing_cols:
            conn.execute(sql)

    # Migration: add is_removed to videos if missing
    video_cols = {row[1] for row in conn.execute("PRAGMA table_info(videos)").fetchall()}
    if "is_removed" not in video_cols:
        conn.execute("ALTER TABLE videos ADD COLUMN is_removed INTEGER DEFAULT 0")
    if "removed_reason" not in video_cols:
        conn.execute("ALTER TABLE videos ADD COLUMN removed_reason TEXT DEFAULT ''")