Initial working implementation
This commit is contained in:
@@ -1 +1,2 @@
|
|||||||
|
ollama
|
||||||
yt_dlp
|
yt_dlp
|
||||||
|
99
summarize.py
99
summarize.py
@@ -1,2 +1,101 @@
|
|||||||
#! /usr/bin/env python3
|
#! /usr/bin/env python3
|
||||||
|
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import yt_dlp
|
||||||
|
from ollama import chat, ChatResponse, Client
|
||||||
|
|
||||||
|
#video_url = 'https://youtu.be/jl4HOY8ZaEA'
|
||||||
|
video_url = 'https://www.youtube.com/watch?v=kTctVqjhDEw'
|
||||||
|
|
||||||
|
ydl_opts = {
|
||||||
|
'writesubtitles': True, # Enable downloading subtitles
|
||||||
|
'subtitleslangs': ['en'], # Specify subtitle language(s)
|
||||||
|
'skip_download': True, # Skip downloading the video itself
|
||||||
|
'outtmpl': '-', # Use '-' to avoid writing to a file
|
||||||
|
'quiet': True, # Suppress console output
|
||||||
|
'format': 'bestaudio/best', # Minimal format setting for metadata extraction
|
||||||
|
'writeinfojson': True # Store metadata, including subtitle URLs
|
||||||
|
}
|
||||||
|
|
||||||
|
ol_client = Client(
|
||||||
|
host='http://localhost:11434'
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_summary(subtitles):
|
||||||
|
"""
|
||||||
|
Gets a summary from a local ollama installation given a string with subtitles in it.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subtitles (str): A string with subs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A string with the AI's response in it.
|
||||||
|
"""
|
||||||
|
response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
|
||||||
|
{
|
||||||
|
'role': 'system',
|
||||||
|
'content': 'Your job is to summarize YouTube videos given a (potentially auto-generated) transcript. Summarize the video, cutting out sponsor segments and advertisements. Include all core points in the video. Be as detailed as possible.'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'role': 'user',
|
||||||
|
'content': 'Please summarize this video: ' + str(subtitles)
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
return(response['message']['content'])
|
||||||
|
|
||||||
|
def concatenate_subtitles(subtitle_json):
|
||||||
|
"""
|
||||||
|
Concatenates all subtitle text from the given JSON object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subtitle_json (dict): A dictionary containing subtitle data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A single string with all concatenated subtitle text.
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
|
||||||
|
# Check if the 'events' key is in the JSON object
|
||||||
|
if 'events' in subtitle_json:
|
||||||
|
for event in subtitle_json['events']:
|
||||||
|
# Check if 'segs' is in the event and concatenate 'utf8' text from each segment
|
||||||
|
if 'segs' in event:
|
||||||
|
for seg in event['segs']:
|
||||||
|
if 'utf8' in seg:
|
||||||
|
result.append(seg['utf8'])
|
||||||
|
|
||||||
|
# Join all collected text with spaces and return
|
||||||
|
return ' '.join(result)
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
# Extract metadata without downloading the video
|
||||||
|
info = ydl.extract_info(video_url, download=False)
|
||||||
|
|
||||||
|
# Check if subtitles are available
|
||||||
|
subtitle_lang = 'en' # Change this to your desired language
|
||||||
|
subtitles_available = info.get('subtitles', {})
|
||||||
|
automatic_subtitles_available = info.get('automatic_captions', {})
|
||||||
|
|
||||||
|
if subtitle_lang in subtitles_available:
|
||||||
|
print(f"Downloading manual subtitles for language: {subtitle_lang}")
|
||||||
|
subtitle_url = subtitles_available[subtitle_lang][0]['url']
|
||||||
|
elif subtitle_lang in automatic_subtitles_available:
|
||||||
|
print(f"No manual subtitles available. Falling back to auto-generated subtitles for language: {subtitle_lang}")
|
||||||
|
subtitle_url = automatic_subtitles_available[subtitle_lang][0]['url']
|
||||||
|
else:
|
||||||
|
print(f"No subtitles (manual or auto-generated) available for language: {subtitle_lang}")
|
||||||
|
subtitle_url = None
|
||||||
|
|
||||||
|
# If a subtitle URL was found, download the subtitles
|
||||||
|
if subtitle_url:
|
||||||
|
subtitle_data = ydl.urlopen(subtitle_url).read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
print("Failed to download subtitles.")
|
||||||
|
exit(50)
|
||||||
|
|
||||||
|
subs = concatenate_subtitles(json.loads(subtitle_data))
|
||||||
|
print("Getting summary...")
|
||||||
|
summary = get_summary(subs)
|
||||||
|
print(summary)
|
||||||
|
Reference in New Issue
Block a user