.env
.gitignore
requirements.txt
summarize.py
165 lines
6.3 KiB
Python
Executable File
165 lines
6.3 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
|
|
import argparse
|
|
import io
|
|
import json
|
|
import yt_dlp
|
|
from ollama import chat, ChatResponse, Client
|
|
from textwrap import wrap
|
|
|
|
ydl_opts = {
|
|
'writesubtitles': True, # Enable downloading subtitles
|
|
'subtitleslangs': ['en'], # Specify subtitle language(s)
|
|
'skip_download': True, # Skip downloading the video itself
|
|
'outtmpl': '-', # Use '-' to avoid writing to a file
|
|
'quiet': True, # Suppress console output
|
|
'format': 'bestaudio/best', # Minimal format setting for metadata extraction
|
|
'writeinfojson': True # Store metadata, including subtitle URLs
|
|
}
|
|
|
|
ol_client = Client(
|
|
host='http://localhost:11434'
|
|
)
|
|
|
|
def refine(subtitles):
|
|
response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
|
|
{
|
|
'role': 'system',
|
|
'content': 'Your job is to refine auto-generated subtitles from YouTube. You will be given a snippet of a transcript of a YouTube video that may or may not split at a sentence boundary. You are to ONLY correct grammar and spelling mistakes with that transcript. If you encounter a "[ __ ]" segment, a swear has been redacted. Your text will be concatenated with other snippets, so it is important that you only spit back the corrected transcript and not any notes, headers, etc.'
|
|
},
|
|
{
|
|
'role': 'user',
|
|
'content': str(subtitles)
|
|
}
|
|
])
|
|
|
|
return(response['message']['content'])
|
|
|
|
def get_pre_summary(subtitles):
|
|
response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
|
|
{
|
|
'role': 'system',
|
|
'content': 'Your job is to summarize a snippet of a YouTube video given a chunk of its transcript. Summarize the snippet to the best of your ability.'
|
|
},
|
|
{
|
|
'role': 'user',
|
|
'content': str(subtitles)
|
|
}
|
|
])
|
|
|
|
return(response['message']['content'])
|
|
|
|
def get_summary(subtitles):
|
|
response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
|
|
{
|
|
'role': 'system',
|
|
'content': 'Your job is to summarize YouTube videos given a series of summaries of snippets of the YouTube video. Given those snippets, summarize the YouTube video.'
|
|
},
|
|
{
|
|
'role': 'user',
|
|
'content': str(subtitles)
|
|
}
|
|
])
|
|
|
|
return(response['message']['content'])
|
|
|
|
def concatenate_subtitles(subtitle_json):
|
|
"""
|
|
Concatenates all subtitle text from the given JSON object.
|
|
|
|
Args:
|
|
subtitle_json (dict): A dictionary containing subtitle data.
|
|
|
|
Returns:
|
|
str: A single string with all concatenated subtitle text.
|
|
"""
|
|
result = []
|
|
|
|
# Check if the 'events' key is in the JSON object
|
|
if 'events' in subtitle_json:
|
|
for event in subtitle_json['events']:
|
|
# Check if 'segs' is in the event and concatenate 'utf8' text from each segment
|
|
if 'segs' in event:
|
|
for seg in event['segs']:
|
|
if 'utf8' in seg:
|
|
result.append(seg['utf8'])
|
|
|
|
# Join all collected text with spaces and return
|
|
return ' '.join(result)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Download subtitles from a video and summarize it using a local Ollama instance."
|
|
)
|
|
parser.add_argument('url', metavar='URL', type=str, help="The URL of the video to process.")
|
|
parser.add_argument('-v', '--verbose', action='store_true', help="Enable verbose output.")
|
|
|
|
# Parse out arguments
|
|
args = parser.parse_args()
|
|
video_url = args.url
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
# Extract metadata without downloading the video
|
|
info = ydl.extract_info(video_url, download=False)
|
|
print(f"Summarizing video: {info.get('title', 'Unknown Title')}...")
|
|
|
|
# Check if subtitles are available
|
|
subtitle_lang = 'en' # Change this to your desired language
|
|
subtitles_available = info.get('subtitles', {})
|
|
automatic_subtitles_available = info.get('automatic_captions', {})
|
|
autogenned = False
|
|
|
|
if subtitle_lang in subtitles_available:
|
|
print(f"Downloading manual subtitles for language: {subtitle_lang}...")
|
|
subtitle_url = subtitles_available[subtitle_lang][0]['url']
|
|
elif subtitle_lang in automatic_subtitles_available:
|
|
print(f"No manual subtitles available. Falling back to auto-generated subtitles for language: {subtitle_lang}...")
|
|
subtitle_url = automatic_subtitles_available[subtitle_lang][0]['url']
|
|
autogenned = True
|
|
else:
|
|
print(f"No subtitles (manual or auto-generated) available for language: {subtitle_lang}!")
|
|
subtitle_url = None
|
|
exit(51)
|
|
|
|
# If a subtitle URL was found, download the subtitles
|
|
if subtitle_url:
|
|
subtitle_data = ydl.urlopen(subtitle_url).read().decode('utf-8')
|
|
else:
|
|
print("Failed to download subtitles!")
|
|
exit(50)
|
|
|
|
subs = concatenate_subtitles(json.loads(subtitle_data))
|
|
# If we have auto-generated subtitles, refine them a bit:
|
|
if autogenned:
|
|
print("Refining transcript...")
|
|
buffer = ""
|
|
# We split this into smaller chunks to urge the AI to only do small pieces
|
|
chunked = wrap(subs, 2048)
|
|
print(f"Splitting text into {len(chunked)} segments...")
|
|
for snippet in chunked:
|
|
if args.verbose:
|
|
print(f"Unrefined: {snippet}")
|
|
ref = refine(snippet)
|
|
if args.verbose:
|
|
print(f"Refined: {ref}")
|
|
buffer += ref
|
|
subs = buffer
|
|
if args.verbose:
|
|
print(subs)
|
|
if args.verbose:
|
|
print("Getting summary...")
|
|
# Now chunk the subs up and get summaries of segments
|
|
firstpass = ""
|
|
chunked = wrap(subs, 4096)
|
|
print(f"Splitting text into {len(chunked)} segments...")
|
|
for snippet in chunked:
|
|
pre_summary = get_pre_summary(snippet)
|
|
if args.verbose:
|
|
print(f"Presummary: {pre_summary}")
|
|
firstpass += pre_summary
|
|
# And a summary of the whole
|
|
summary = get_summary(firstpass)
|
|
print(f"Summary: {summary}")
|
|
|
|
main()
|