summarizeyt/summarize.py

#! /usr/bin/env python3

import argparse
import io
import json
import yt_dlp
from ollama import chat, ChatResponse, Client
from textwrap import wrap

ydl_opts = {
    'writesubtitles': True,        # Enable downloading subtitles
    'subtitleslangs': ['en'],      # Specify subtitle language(s)
    'skip_download': True,         # Skip downloading the video itself
    'outtmpl': '-',                # Use '-' to avoid writing to a file
    'quiet': True,                 # Suppress console output
    'format': 'bestaudio/best',    # Minimal format setting for metadata extraction
    'writeinfojson': True          # Store metadata, including subtitle URLs
}

ol_client = Client(
        host='http://localhost:11434'
)

def refine(subtitles):
    response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
        {
            'role': 'system',
            'content': 'Your job is to refine auto-generated subtitles from YouTube. You will be given a snippet of a transcript of a YouTube video that may or may not split at a sentence boundary. You are to ONLY correct grammar and spelling mistakes with that transcript. If you encounter a "[ __ ]" segment, a swear has been redacted. Your text will be concatenated with other snippets, so it is important that you only spit back the corrected transcript and not any notes, headers, etc.'
        },
        {
            'role': 'user',
            'content': str(subtitles)
        }
    ])

    return(response['message']['content'])

def get_pre_summary(subtitles):
    response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
        {
            'role': 'system',
            'content': 'Your job is to summarize a snippet of a YouTube video given a chunk of its transcript. Summarize the snippet to the best of your ability.'
        },
        {
            'role': 'user',
            'content': str(subtitles)
        }
    ])

    return(response['message']['content'])

def get_summary(subtitles):
    response : ChatResponse = ol_client.chat(model='frowning/llama3-nymeria:15b-q6_k', messages=[
        {
            'role': 'system',
            'content': 'Your job is to summarize YouTube videos given a series of summaries of snippets of the YouTube video. Given those snippets, summarize the YouTube video.'
        },
        {
            'role': 'user',
            'content': str(subtitles)
        }
    ])

    return(response['message']['content'])

def concatenate_subtitles(subtitle_json):
    """
    Concatenates all subtitle text from the given JSON object.

    Args:
        subtitle_json (dict): A dictionary containing subtitle data.

    Returns:
        str: A single string with all concatenated subtitle text.
    """
    result = []

    # Check if the 'events' key is in the JSON object
    if 'events' in subtitle_json:
        for event in subtitle_json['events']:
            # Check if 'segs' is in the event and concatenate 'utf8' text from each segment
            if 'segs' in event:
                for seg in event['segs']:
                    if 'utf8' in seg:
                        result.append(seg['utf8'])

    # Join all collected text with spaces and return
    return ' '.join(result)

def main():
    parser = argparse.ArgumentParser(
            description="Download subtitles from a video and summarize it using a local Ollama instance."
    )
    parser.add_argument('url', metavar='URL', type=str, help="The URL of the video to process.")
    parser.add_argument('-v', '--verbose', action='store_true', help="Enable verbose output.")

    # Parse out arguments
    args = parser.parse_args()
    video_url = args.url

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        # Extract metadata without downloading the video
        info = ydl.extract_info(video_url, download=False)
        print(f"Summarizing video: {info.get('title', 'Unknown Title')}...")

        # Check if subtitles are available
        subtitle_lang = 'en'  # Change this to your desired language
        subtitles_available = info.get('subtitles', {})
        automatic_subtitles_available = info.get('automatic_captions', {})
        autogenned = False

        if subtitle_lang in subtitles_available:
            print(f"Downloading manual subtitles for language: {subtitle_lang}...")
            subtitle_url = subtitles_available[subtitle_lang][0]['url']
        elif subtitle_lang in automatic_subtitles_available:
            print(f"No manual subtitles available. Falling back to auto-generated subtitles for language: {subtitle_lang}...")
            subtitle_url = automatic_subtitles_available[subtitle_lang][0]['url']
            autogenned = True
        else:
            print(f"No subtitles (manual or auto-generated) available for language: {subtitle_lang}!")
            subtitle_url = None
            exit(51)

        # If a subtitle URL was found, download the subtitles
        if subtitle_url:
            subtitle_data = ydl.urlopen(subtitle_url).read().decode('utf-8')
        else:
            print("Failed to download subtitles!")
            exit(50)

        subs = concatenate_subtitles(json.loads(subtitle_data))
        # If we have auto-generated subtitles, refine them a bit:
        if autogenned:
            print("Refining transcript...")
            buffer = ""
            # We split this into smaller chunks to urge the AI to only do small pieces
            chunked = wrap(subs, 2048)
            print(f"Splitting text into {len(chunked)} segments...")
            for snippet in chunked:
                if args.verbose:
                    print(f"Unrefined: {snippet}")
                ref = refine(snippet)
                if args.verbose:
                    print(f"Refined: {ref}")
                buffer += ref
                if not args.verbose:
                    print("#", end="")
            subs = buffer
        if args.verbose:
            print(subs)
        if args.verbose:
            print("Getting summary...")
        # Now chunk the subs up and get summaries of segments
        firstpass = ""
        chunked = wrap(subs, 4096)
        print(f"Splitting text into {len(chunked)} segments...")
        for snippet in chunked:
            pre_summary = get_pre_summary(snippet)
            if args.verbose:
                print(f"Presummary: {pre_summary}")
            firstpass += pre_summary
            if not args.verbose:
                print("#", end="")
        # And a summary of the whole
        summary = get_summary(firstpass)
        print(f"Summary: {summary}")

main()