Add initial work on odysee scraper doohickey

This commit is contained in:
Salt 2025-01-18 23:06:34 -06:00
parent 3c2905153e
commit 6c3a55d70d
2 changed files with 67 additions and 1 deletions

View File

@ -1,3 +1,14 @@
import uuid
from django.db import models
from . import odysee
# Create your models here.
class OdyseeChannel(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=1024)
handle = models.CharField(max_length=1024)
class OdyseeRelease(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
channel = models.ForeignKey(OdyseeChannel, on_delete=models.CASCADE)
name = models.CharField(max_length=1024)
url = models.URLField(max_length=512)

55
odyseescraper/odysee.py Normal file
View File

@ -0,0 +1,55 @@
#! /usr/bin/env python3
import json
import requests
odysee_url = r'https://odysee.com'
odysee_api_url = r'https://api.na-backend.odysee.com/api/v1/proxy'
def odysee_get_channel_url(handle):
return f'{odysee_url}/{handle}'
def odysee_get_releases(handle):
releases = {}
try:
for i in range(1,20):
print(f'Examining page {i} for handle {handle}')
payload = {
"method": "claim_search",
"params": {
"channel": handle,
"page_size": 20,
"page": i
}
}
response = requests.post(odysee_api_url, json=payload)
response.raise_for_status()
data = response.json()
lastpage = data.get("result", {}).get("total_pages", 1)
items = data.get("result", {}).get("items", [])
for raw_item in items:
item = raw_item
if item["value_type"] == "repost":
item = raw_item["reposted_claim"]
elif item["value_type"] == "stream":
# This is known to be a zip file
pass
else:
print(f'Unknown value type, continuing: item["value_type"]')
print(f'Importing item {item["claim_id"]}')
releases[item["claim_id"]] = {
"name": item.get("name", "Unnamed Release"),
"title": item["value"].get("title", "Untitled Release"),
"description": item["value"].get("description", "No description provided for this release"),
"thumbnail": item["value"].get("thumbnail", {}).get("url", ""),
"url": f"{odysee_url}/{handle}/{item['name']}"
}
if i == lastpage:
break
except requests.RequestException as e:
print(f'RequestException occurred while getting releases for {handle}: {e}')
return None
except KeyError as e:
print(f'KeyError occurred while getting releases for {handle}: {e}')
print(f'Nonzero chance Odysee updated their API out from under you')
return None
return releases