Step 1: Detect Content Type
```bash
URL="$1"
# Check for YouTube
if [[ "$URL" =~ youtube\.com/watch || "$URL" =~ youtu\.be/ || "$URL" =~ youtube\.com/shorts ]]; then
CONTENT_TYPE="youtube"
# Check for PDF
elif [[ "$URL" =~ \.pdf$ ]]; then
CONTENT_TYPE="pdf"
# Check if URL returns PDF
elif curl -sI "$URL" | grep -i "Content-Type: application/pdf" > /dev/null; then
CONTENT_TYPE="pdf"
# Default to article
else
CONTENT_TYPE="article"
fi
echo "π Detected: $CONTENT_TYPE"
```
Step 2: Extract Content (by Type)
#### YouTube Video
```bash
# Use youtube-transcript skill workflow
echo "πΊ Extracting YouTube transcript..."
# 1. Check for yt-dlp
if ! command -v yt-dlp &> /dev/null; then
echo "Installing yt-dlp..."
brew install yt-dlp
fi
# 2. Get video title
VIDEO_TITLE=$(yt-dlp --print "%(title)s" "$URL" | tr '/' '_' | tr ':' '-' | tr '?' '' | tr '"' '')
# 3. Download transcript
yt-dlp --write-auto-sub --skip-download --sub-langs en --output "temp_transcript" "$URL"
# 4. Convert to clean text (deduplicate)
python3 -c "
import sys, re
seen = set()
vtt_file = 'temp_transcript.en.vtt'
try:
with open(vtt_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('WEBVTT') and not line.startswith('Kind:') and not line.startswith('Language:') and '-->' not in line:
clean = re.sub('<[^>]*>', '', line)
clean = clean.replace('&', '&').replace('>', '>').replace('<', '<')
if clean and clean not in seen:
print(clean)
seen.add(clean)
except FileNotFoundError:
print('Error: Could not find transcript file', file=sys.stderr)
sys.exit(1)
" > "${VIDEO_TITLE}.txt"
# 5. Cleanup
rm -f temp_transcript.en.vtt
CONTENT_FILE="${VIDEO_TITLE}.txt"
echo "β Saved transcript: $CONTENT_FILE"
```
#### Article/Blog Post
```bash
# Use article-extractor skill workflow
echo "π Extracting article content..."
# 1. Check for extraction tools
if command -v reader &> /dev/null; then
TOOL="reader"
elif command -v trafilatura &> /dev/null; then
TOOL="trafilatura"
else
TOOL="fallback"
fi
echo "Using: $TOOL"
# 2. Extract based on tool
case $TOOL in
reader)
reader "$URL" > temp_article.txt
ARTICLE_TITLE=$(head -n 1 temp_article.txt | sed 's/^# //')
;;
trafilatura)
METADATA=$(trafilatura --URL "$URL" --json)
ARTICLE_TITLE=$(echo "$METADATA" | python3 -c "import json, sys; print(json.load(sys.stdin).get('title', 'Article'))")
trafilatura --URL "$URL" --output-format txt --no-comments > temp_article.txt
;;
fallback)
ARTICLE_TITLE=$(curl -s "$URL" | grep -oP '\K[^<]+' | head -n 1)
ARTICLE_TITLE=${ARTICLE_TITLE%% - *}
curl -s "$URL" | python3 -c "
from html.parser import HTMLParser
import sys
class ArticleExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.content = []
self.skip_tags = {'script', 'style', 'nav', 'header', 'footer', 'aside', 'form'}
self.in_content = False
def handle_starttag(self, tag, attrs):
if tag not in self.skip_tags and tag in {'p', 'article', 'main'}:
self.in_content = True
def handle_data(self, data):
if self.in_content and data.strip():
self.content.append(data.strip())
def get_content(self):
return '\n\n'.join(self.content)
parser = ArticleExtractor()
parser.feed(sys.stdin.read())
print(parser.get_content())
" > temp_article.txt
;;
esac
# 3. Clean filename
FILENAME=$(echo "$ARTICLE_TITLE" | tr '/' '-' | tr ':' '-' | tr '?' '' | tr '"' '' | cut -c 1-80 | sed 's/ *$//')
CONTENT_FILE="${FILENAME}.txt"
mv temp_article.txt "$CONTENT_FILE"
echo "β Saved article: $CONTENT_FILE"
```
#### PDF Document
```bash
# Download and extract PDF
echo "π Downloading PDF..."
# 1. Download PDF
PDF_FILENAME=$(basename "$URL")
curl -L -o "$PDF_FILENAME" "$URL"
# 2. Extract text using pdftotext (if available)
if command -v pdftotext &> /dev/null; then
pdftotext "$PDF_FILENAME" temp_pdf.txt
CONTENT_FILE="${PDF_FILENAME%.pdf}.txt"
mv temp_pdf.txt "$CONTENT_FILE"
echo "β Extracted text from PDF: $CONTENT_FILE"
# Optionally keep PDF
echo "Keep original PDF? (y/n)"
read -r KEEP_PDF
if [[ ! "$KEEP_PDF" =~ ^[Yy]$ ]]; then
rm "$PDF_FILENAME"
fi
else
# No pdftotext available
echo "β οΈ pdftotext not found. PDF downloaded but not extracted."
echo " Install with: brew install poppler"
CONTENT_FILE="$PDF_FILENAME"
fi
```
Step 3: Create Ship-Learn-Next Action Plan
IMPORTANT: Always create an action plan after extracting content.
```bash
# Read the extracted content
CONTENT_FILE="[from previous step]"
# Invoke ship-learn-next skill logic:
# 1. Read the content file
# 2. Extract core actionable lessons
# 3. Create 5-rep progression plan
# 4. Save as: Ship-Learn-Next Plan - [Quest Title].md
# See ship-learn-next/SKILL.md for full details
```
Key points for plan creation:
- Extract actionable lessons (not just summaries)
- Define a specific 4-8 week quest
- Create Rep 1 (shippable this week)
- Design Reps 2-5 (progressive iterations)
- Save plan to markdown file
- Use format:
Ship-Learn-Next Plan - [Brief Quest Title].md
Step 4: Present Results
Show user:
```
β
Tapestry Workflow Complete!
π₯ Content Extracted:
β [Content type]: [Title]
β Saved to: [filename.txt]
β [X] words extracted
π Action Plan Created:
β Quest: [Quest title]
β Saved to: Ship-Learn-Next Plan - [Title].md
π― Your Quest: [One-line summary]
π Rep 1 (This Week): [Rep 1 goal]
When will you ship Rep 1?
```