From 6c3a55d70d537abaf87d5e31080ad39b37dfb5ae Mon Sep 17 00:00:00 2001 From: Jacob Babor Date: Sat, 18 Jan 2025 23:06:34 -0600 Subject: [PATCH] Add initial work on odysee scraper doohickey --- odyseescraper/models.py | 13 +++++++++- odyseescraper/odysee.py | 55 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 odyseescraper/odysee.py diff --git a/odyseescraper/models.py b/odyseescraper/models.py index 71a8362..92c2eef 100644 --- a/odyseescraper/models.py +++ b/odyseescraper/models.py @@ -1,3 +1,14 @@ +import uuid from django.db import models +from . import odysee -# Create your models here. +class OdyseeChannel(models.Model): + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + name = models.CharField(max_length=1024) + handle = models.CharField(max_length=1024) + +class OdyseeRelease(models.Model): + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + channel = models.ForeignKey(OdyseeChannel, on_delete=models.CASCADE) + name = models.CharField(max_length=1024) + url = models.URLField(max_length=512) diff --git a/odyseescraper/odysee.py b/odyseescraper/odysee.py new file mode 100644 index 0000000..2021ba1 --- /dev/null +++ b/odyseescraper/odysee.py @@ -0,0 +1,55 @@ +#! /usr/bin/env python3 +import json +import requests + +odysee_url = r'https://odysee.com' +odysee_api_url = r'https://api.na-backend.odysee.com/api/v1/proxy' + +def odysee_get_channel_url(handle): + return f'{odysee_url}/{handle}' + +def odysee_get_releases(handle): + releases = {} + try: + for i in range(1,20): + print(f'Examining page {i} for handle {handle}') + payload = { + "method": "claim_search", + "params": { + "channel": handle, + "page_size": 20, + "page": i + } + } + response = requests.post(odysee_api_url, json=payload) + response.raise_for_status() + data = response.json() + lastpage = data.get("result", {}).get("total_pages", 1) + items = data.get("result", {}).get("items", []) + for raw_item in items: + item = raw_item + if item["value_type"] == "repost": + item = raw_item["reposted_claim"] + elif item["value_type"] == "stream": + # This is known to be a zip file + pass + else: + print(f'Unknown value type, continuing: item["value_type"]') + print(f'Importing item {item["claim_id"]}') + releases[item["claim_id"]] = { + "name": item.get("name", "Unnamed Release"), + "title": item["value"].get("title", "Untitled Release"), + "description": item["value"].get("description", "No description provided for this release"), + "thumbnail": item["value"].get("thumbnail", {}).get("url", ""), + "url": f"{odysee_url}/{handle}/{item['name']}" + } + if i == lastpage: + break + except requests.RequestException as e: + print(f'RequestException occurred while getting releases for {handle}: {e}') + return None + except KeyError as e: + print(f'KeyError occurred while getting releases for {handle}: {e}') + print(f'Nonzero chance Odysee updated their API out from under you') + return None + return releases