Add generate_archive.py

This commit is contained in:
Andreas Andersen 2026-04-11 18:54:45 +00:00
parent cb9b132feb
commit 4f7f10b827

484
generate_archive.py Normal file
View file

@ -0,0 +1,484 @@
import zipfile
import json
import os
import shutil
import re
from datetime import datetime
def parse_twitter_js(content):
"""Fjerner JavaScript variabel-tildelingen og returnerer ren JSON."""
start_idx = content.find('[')
if start_idx == -1:
start_idx = content.find('{')
if start_idx == -1:
return []
try:
cleaned_content = re.sub(r'^\/\*.*?\*\/', '', content, flags=re.DOTALL).strip()
start_match = re.search(r'[\[\{]', cleaned_content)
if not start_match:
return []
return json.loads(cleaned_content[start_match.start():])
except json.JSONDecodeError as e:
print(f"Fejl ved JSON-parsing: {e}")
return []
def main():
zip_path = 'twitter.zip'
if not os.path.exists(zip_path):
print("Fejl: Fandt ikke twitter.zip i samme mappe.")
return
if not os.path.exists('avatar.png'):
print("Fejl: Fandt ikke avatar.png i samme mappe. Brug venligst et billede med dette navn.")
return
print("Analyserer twitter.zip...")
media_dir = 'media'
if not os.path.exists(media_dir):
os.makedirs(media_dir)
profile_data = {}
tweets_data = []
with zipfile.ZipFile(zip_path, 'r') as z:
file_list = z.namelist()
profile_file = next((f for f in file_list if f.endswith('data/profile.js')), None)
account_file = next((f for f in file_list if f.endswith('data/account.js')), None)
followers_file = next((f for f in file_list if f.endswith('data/follower.js')), None)
following_file = next((f for f in file_list if f.endswith('data/following.js')), None)
profile_data['name'] = 'Andreas Andersen'
profile_data['bio'] = ''
if profile_file:
prof_json = parse_twitter_js(z.read(profile_file).decode('utf-8'))
if prof_json and len(prof_json) > 0:
p = prof_json[0].get('profile', {})
profile_data['bio'] = p.get('description', {}).get('bio', '')
if account_file:
acc_json = parse_twitter_js(z.read(account_file).decode('utf-8'))
if acc_json and len(acc_json) > 0:
acc = acc_json[0].get('account', {})
profile_data['handle'] = '@' + acc.get('username', 'ukendt')
profile_data['created'] = acc.get('createdAt', '')
profile_data['followersCount'] = len(parse_twitter_js(z.read(followers_file).decode('utf-8'))) if followers_file else 0
profile_data['followingCount'] = len(parse_twitter_js(z.read(following_file).decode('utf-8'))) if following_file else 0
# Opdateret søgning: Find ALLE tweet-filer (f.eks. tweets.js, tweets-part1.js, tweets-part2.js)
tweet_files = [f for f in file_list if re.match(r'^data/tweets?(?:-part\d+)?\.js$', f)]
if tweet_files:
print(f"Fandt {len(tweet_files)} fil(er) med tweets. Udtrækker og behandler...")
for tweets_file in tweet_files:
print(f"- Læser {tweets_file}...")
raw_tweets = parse_twitter_js(z.read(tweets_file).decode('utf-8'))
for item in raw_tweets:
t = item.get('tweet', item)
is_reply = t.get('in_reply_to_status_id') is not None or t.get('in_reply_to_user_id') is not None
text = t.get('full_text', '')
has_links = 'http' in text
if 'entities' in t:
if 'media' in t['entities']:
for m_ent in t['entities']['media']:
tco_media = m_ent.get('url', '')
if tco_media:
text = text.replace(tco_media, '').strip()
if 'urls' in t['entities']:
for u_ent in t['entities']['urls']:
tco_url = u_ent.get('url', '')
expanded_url = u_ent.get('expanded_url', '')
display_url = u_ent.get('display_url', expanded_url)
if tco_url and expanded_url:
html_link = f'<a href="{expanded_url}" target="_blank">{display_url}</a>'
text = text.replace(tco_url, html_link)
tags = [tag['text'].lower() for tag in t.get('entities', {}).get('hashtags', [])]
media_files = []
if 'extended_entities' in t and 'media' in t['extended_entities']:
for m in t['extended_entities']['media']:
media_url = m.get('media_url', '')
if media_url:
filename = media_url.split('/')[-1]
tweet_id = t.get('id_str', '')
zip_media_path = f"data/tweets_media/{tweet_id}-{filename}"
try:
with z.open(zip_media_path) as source, open(os.path.join(media_dir, filename), 'wb') as target:
shutil.copyfileobj(source, target)
media_files.append(f"{media_dir}/{filename}")
except:
pass
try:
date_obj = datetime.strptime(t.get('created_at'), '%a %b %d %H:%M:%S +0000 %Y')
except ValueError:
date_obj = datetime.now()
tweets_data.append({
'id': t.get('id_str', ''),
'text': text,
'date': date_obj.strftime('%d. %b %Y, %H:%M'),
'year': date_obj.strftime('%Y'),
'timestamp': date_obj.timestamp(),
'is_reply': is_reply,
'has_links': has_links,
'tags': tags,
'media': media_files,
'likes': int(t.get('favorite_count', 0)),
'retweets': int(t.get('retweet_count', 0))
})
profile_data['totalPosts'] = sum(1 for t in tweets_data if not t['is_reply'])
profile_data['totalReplies'] = sum(1 for t in tweets_data if t['is_reply'])
print("Genererer HTML...")
generate_html(profile_data, tweets_data)
print("Færdig! Åbn index.html i din browser.")
def generate_html(profile, tweets):
html_content = f"""<!DOCTYPE html>
<html lang="da">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Twitter/X Arkiv</title>
<style>
:root {{ --bg: #f7f9f9; --border: #eff3f4; --text: #0f1419; --gray: #536471; --blue: #1d9bf0; }}
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; background-color: var(--bg); color: var(--text); margin: 0; padding: 0; }}
header {{ background: white; padding: 15px 20px; border-bottom: 1px solid var(--border); position: sticky; top: 0; z-index: 100; display: flex; align-items: center; gap: 15px; }}
.header-logo-container {{ display: flex; align-items: center; gap: 8px; }}
.header-logo-container svg {{ fill: var(--text); }}
.header-logo-container .old-bird {{ fill: #1da1f2; width: 28px; height: 28px; }}
.header-logo-container .new-x {{ fill: #000; width: 22px; height: 22px; }}
header h1 {{ margin: 0; font-size: 20px; }}
.container {{ display: flex; max-width: 1000px; margin: 0 auto; padding: 20px; gap: 20px; }}
main {{ flex: 1; min-width: 0; background: white; border-radius: 12px; border: 1px solid var(--border); overflow: hidden; }}
aside {{ width: 300px; flex-shrink: 0; display: flex; flex-direction: column; }}
/* Profil sektion */
.profile-header {{ padding: 20px; border-bottom: 1px solid var(--border); display: flex; gap: 20px; align-items: flex-start; }}
.profile-pic-container {{ flex-shrink: 0; }}
.profile-pic {{ width: 120px; height: 120px; border-radius: 50%; object-fit: cover; border: 4px solid white; background: #ccc; }}
.profile-info {{ flex: 1; min-width: 0; }}
.profile-name {{ font-size: 20px; font-weight: bold; margin: 5px 0 0 0; }}
.profile-handle {{ color: var(--gray); margin: 2px 0 15px 0; font-size: 15px; }}
.profile-bio {{ font-size: 15px; margin-bottom: 15px; line-height: 1.4; white-space: pre-wrap; }}
.profile-stats {{ display: flex; gap: 15px; color: var(--gray); font-size: 14px; flex-wrap: wrap; }}
.profile-stats span strong {{ color: var(--text); }}
/* Tweets */
.tweet {{ padding: 15px 20px; border-bottom: 1px solid var(--border); display: flex; gap: 12px; }}
.tweet-avatar {{ width: 48px; height: 48px; border-radius: 50%; object-fit: cover; background: #ccc; flex-shrink: 0; }}
.tweet-content {{ flex: 1; min-width: 0; }}
.tweet-name {{ font-weight: bold; font-size: 15px; }}
.tweet-text {{ margin: 5px 0 10px 0; font-size: 15px; line-height: 1.4; word-wrap: break-word; }}
.tweet-text a {{ color: var(--blue); text-decoration: none; }}
.tweet-text a:hover {{ text-decoration: underline; }}
.tweet-media-container {{ display: flex; flex-wrap: wrap; gap: 5px; margin-top: 10px; }}
.tweet-media {{ max-width: calc(100% - 10px); max-height: 400px; border-radius: 12px; border: 1px solid var(--border); object-fit: cover; }}
/* Stats (Likes, RTs) */
.tweet-metrics {{ display: flex; gap: 24px; margin: 12px 0; color: var(--gray); font-size: 13px; font-weight: 500; }}
.tweet-metrics span {{ display: flex; align-items: center; gap: 6px; }}
.tweet-metrics svg {{ width: 18px; height: 18px; fill: currentColor; }}
.tweet-footer {{ color: var(--gray); font-size: 13px; display: flex; gap: 10px; align-items: center; }}
.tweet-footer a {{ color: var(--blue); text-decoration: none; }}
.tweet-footer a:hover {{ text-decoration: underline; }}
.badge {{ background: #eee; padding: 2px 6px; border-radius: 4px; font-size: 11px; font-weight: bold; text-transform: uppercase; }}
/* Sidebar */
.sidebar-box {{ background: white; border-radius: 12px; padding: 15px; margin-bottom: 15px; border: 1px solid var(--border); }}
.search-row {{ display: flex; gap: 10px; margin-bottom: 15px; }}
.search-row input {{ flex: 1; padding: 10px; border: 1px solid var(--border); border-radius: 20px; outline: none; }}
.search-row button {{ background: var(--blue); color: white; border: none; padding: 10px 15px; border-radius: 20px; cursor: pointer; font-weight: bold; }}
.search-row button:hover {{ background: #1a8cd8; }}
.visible-count {{ font-size: 14px; color: var(--gray); text-align: center; margin-bottom: 10px; }}
.filter-section {{ margin-bottom: 20px; }}
.filter-section h3 {{ font-size: 16px; margin: 0 0 10px 0; }}
.sort-select {{ width: 100%; padding: 10px; border-radius: 6px; border: 1px solid var(--border); font-size: 14px; outline: none; font-family: inherit; color: var(--text); background: white; cursor: pointer; }}
.sort-select:hover {{ border-color: #cfd9de; }}
.filter-btn {{ display: block; width: 100%; text-align: left; background: none; border: none; padding: 8px 10px; cursor: pointer; font-size: 14px; color: var(--text); border-radius: 6px; }}
.filter-btn:hover {{ background: var(--border); }}
.filter-btn.active {{ background: var(--blue); color: white; font-weight: bold; }}
.filter-btn .count {{ float: right; color: var(--gray); font-size: 13px;}}
.filter-btn.active .count {{ color: #e1e8ed; }}
.tags-list {{ max-height: 250px; overflow-y: auto; }}
.sidebar-footer {{ text-align: center; font-size: 13px; color: var(--gray); padding: 10px; margin-top: auto; }}
@media (max-width: 768px) {{
.container {{ flex-direction: column; }}
aside {{ width: 100%; }}
.profile-header {{ flex-direction: column; align-items: center; text-align: center; }}
.profile-stats {{ justify-content: center; }}
}}
</style>
</head>
<body>
<header>
<div class="header-logo-container">
<svg class="old-bird" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M23.643 4.937c-.835.37-1.732.62-2.675.733.962-.576 1.7-1.49 2.048-2.578-.9.534-1.897.922-2.958 1.13-.85-.904-2.06-1.47-3.4-1.47-2.572 0-4.658 2.086-4.658 4.66 0 .364.042.718.12 1.06-3.873-.195-7.304-2.05-9.602-4.868-.4.69-.63 1.49-.63 2.342 0 1.616.823 3.043 2.072 3.878-.764-.025-1.482-.234-2.11-.583v.06c0 2.257 1.605 4.14 3.737 4.568-.392.106-.803.162-1.227.162-.3 0-.593-.028-.877-.082.593 1.85 2.313 3.198 4.352 3.237-1.594 1.25-3.607 1.995-5.792 1.995-.375 0-.745-.022-1.112-.065 2.062 1.323 4.51 2.093 7.14 2.093 8.57 0 13.255-7.098 13.255-13.254 0-.2-.005-.402-.014-.602.91-.658 1.7-1.477 2.323-2.41z"></path></g></svg>
<svg class="new-x" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M18.244 2.25h3.308l-7.227 8.26 8.502 11.24H16.17l-5.214-6.817L4.99 21.75H1.68l7.73-8.835L1.25 2.25h6.763l4.717 6.233L18.244 2.25zm-1.161 17.52h1.833L7.084 4.126H5.117L17.083 19.77z"></path></g></svg>
</div>
<h1>Twitter/X arkiv</h1>
</header>
<div class="container">
<main>
<div class="profile-header">
<div class="profile-pic-container">
<img src="avatar.png" class="profile-pic" alt="Profilbillede">
</div>
<div class="profile-info">
<div class="profile-name">Andreas Andersen</div>
<div class="profile-handle">{profile.get('handle', '@andreas')}</div>
<div class="profile-bio">{profile.get('bio', '')}</div>
<div class="profile-stats">
<span><strong>{profile.get('totalPosts', 0)}</strong> Posts</span>
<span><strong>{profile.get('totalReplies', 0)}</strong> Svar</span>
<span><strong>{profile.get('followersCount', 0)}</strong> Følgere</span>
<span><strong>{profile.get('followingCount', 0)}</strong> Fulgte</span>
<span>Profil oprettet: {profile.get('created', '')[:10] if profile.get('created') else 'Ukendt'}</span>
</div>
</div>
</div>
<div id="tweet-feed"></div>
</main>
<aside>
<div class="sidebar-box">
<div class="search-row">
<input type="text" id="searchInput" placeholder="Søg i tweets...">
<button id="searchBtn">Søg</button>
</div>
<div class="visible-count" id="visibleCount">Viser 0 indlæg</div>
<div class="filter-section">
<h3>Sortering</h3>
<select id="sortSelect" class="sort-select">
<option value="newest">Nyeste først</option>
<option value="likes">Flest likes</option>
<option value="retweets">Flest retweets</option>
</select>
</div>
<div class="filter-section">
<h3>Type</h3>
<button class="filter-btn active" data-group="type" data-val="all">Alle <span class="count" id="count-all">0</span></button>
<button class="filter-btn" data-group="type" data-val="posts">Posts <span class="count" id="count-posts">0</span></button>
<button class="filter-btn" data-group="type" data-val="replies">Svar <span class="count" id="count-replies">0</span></button>
</div>
<div class="filter-section">
<h3>Indhold</h3>
<button class="filter-btn active" data-group="content" data-val="all">Alle <span class="count">-</span></button>
<button class="filter-btn" data-group="content" data-val="images">Med billeder <span class="count" id="count-images">0</span></button>
<button class="filter-btn" data-group="content" data-val="links">Med links <span class="count" id="count-links">0</span></button>
<button class="filter-btn" data-group="content" data-val="tags">Med tags <span class="count" id="count-tags">0</span></button>
<button class="filter-btn" data-group="content" data-val="notags">Uden tags <span class="count" id="count-notags">0</span></button>
</div>
<div class="filter-section">
<h3>Årstal</h3>
<div id="yearFilters"></div>
</div>
<div class="filter-section">
<h3>Tags</h3>
<div class="tags-list" id="tagFilters"></div>
</div>
</div>
<div class="sidebar-footer">
&copy; Andreas Andersen
</div>
</aside>
</div>
<script>
const tweetsData = {json.dumps(tweets)};
const profile = {{
handle: "{profile.get('handle', '@andreas')}".replace('@', ''),
avatar: "avatar.png",
name: "Andreas Andersen"
}};
let activeFilters = {{
type: 'all',
content: 'all',
year: 'all',
tag: 'all',
searchQuery: ''
}};
let currentSort = 'newest';
function initFilters() {{
document.getElementById('count-all').innerText = tweetsData.length;
document.getElementById('count-posts').innerText = tweetsData.filter(t => !t.is_reply).length;
document.getElementById('count-replies').innerText = tweetsData.filter(t => t.is_reply).length;
document.getElementById('count-images').innerText = tweetsData.filter(t => t.media.length > 0).length;
document.getElementById('count-links').innerText = tweetsData.filter(t => t.has_links).length;
document.getElementById('count-tags').innerText = tweetsData.filter(t => t.tags.length > 0).length;
document.getElementById('count-notags').innerText = tweetsData.filter(t => t.tags.length === 0).length;
const years = {{}};
const tags = {{}};
tweetsData.forEach(t => {{
years[t.year] = (years[t.year] || 0) + 1;
t.tags.forEach(tag => {{
tags[tag] = (tags[tag] || 0) + 1;
}});
}});
const yearContainer = document.getElementById('yearFilters');
yearContainer.innerHTML = `<button class="filter-btn active" data-group="year" data-val="all">Alle År <span class="count">${{tweetsData.length}}</span></button>`;
Object.keys(years).sort().reverse().forEach(year => {{
yearContainer.innerHTML += `<button class="filter-btn" data-group="year" data-val="${{year}}">${{year}} <span class="count">(${{years[year]}})</span></button>`;
}});
const tagContainer = document.getElementById('tagFilters');
tagContainer.innerHTML = `<button class="filter-btn active" data-group="tag" data-val="all">Alle Tags <span class="count">${{tweetsData.length}}</span></button>`;
const sortedTags = Object.entries(tags).sort((a, b) => b[1] - a[1]);
sortedTags.forEach(([tag, count]) => {{
tagContainer.innerHTML += `<button class="filter-btn" data-group="tag" data-val="${{tag}}">#${{tag}} <span class="count">(${{count}})</span></button>`;
}});
document.querySelectorAll('.filter-btn').forEach(btn => {{
btn.addEventListener('click', (e) => {{
const group = e.currentTarget.dataset.group;
const val = e.currentTarget.dataset.val;
document.querySelectorAll(`.filter-btn[data-group="${{group}}"]`).forEach(b => b.classList.remove('active'));
e.currentTarget.classList.add('active');
activeFilters[group] = val;
renderTweets();
}});
}});
document.getElementById('searchBtn').addEventListener('click', () => {{
activeFilters.searchQuery = document.getElementById('searchInput').value.toLowerCase();
renderTweets();
}});
document.getElementById('searchInput').addEventListener('keypress', (e) => {{
if(e.key === 'Enter') document.getElementById('searchBtn').click();
}});
document.getElementById('sortSelect').addEventListener('change', (e) => {{
currentSort = e.target.value;
renderTweets();
}});
}}
function renderTweets() {{
const feed = document.getElementById('tweet-feed');
feed.innerHTML = '';
let filtered = tweetsData.filter(t => {{
if(activeFilters.type === 'posts' && t.is_reply) return false;
if(activeFilters.type === 'replies' && !t.is_reply) return false;
if(activeFilters.content === 'images' && t.media.length === 0) return false;
if(activeFilters.content === 'links' && !t.has_links) return false;
if(activeFilters.content === 'tags' && t.tags.length === 0) return false;
if(activeFilters.content === 'notags' && t.tags.length > 0) return false;
if(activeFilters.year !== 'all' && t.year !== activeFilters.year) return false;
if(activeFilters.tag !== 'all' && !t.tags.includes(activeFilters.tag)) return false;
const plainText = t.text.replace(/<[^>]*>?/gm, '');
if(activeFilters.searchQuery && !plainText.toLowerCase().includes(activeFilters.searchQuery)) return false;
return true;
}});
filtered.sort((a, b) => {{
if (currentSort === 'likes') {{
return b.likes - a.likes || b.timestamp - a.timestamp;
}} else if (currentSort === 'retweets') {{
return b.retweets - a.retweets || b.timestamp - a.timestamp;
}} else {{
return b.timestamp - a.timestamp;
}}
}});
document.getElementById('visibleCount').innerText = `Viser ${{filtered.length}} indlæg`;
filtered.forEach(t => {{
const div = document.createElement('div');
div.className = 'tweet';
let mediaHtml = '';
if(t.media.length > 0) {{
mediaHtml = '<div class="tweet-media-container">';
mediaHtml += t.media.map(m => `<img src="${{m}}" class="tweet-media" loading="lazy">`).join('');
mediaHtml += '</div>';
}}
const originalUrl = `https://x.com/${{profile.handle}}/status/${{t.id}}`;
div.innerHTML = `
<img src="${{profile.avatar}}" class="tweet-avatar">
<div class="tweet-content">
<div class="tweet-name">${{profile.name}}</div>
<div class="tweet-text">${{t.text.replace(/\\n/g, '<br>')}}</div>
${{mediaHtml}}
<div class="tweet-metrics">
<span title="Retweets">
<svg viewBox="0 0 24 24"><path d="M4.5 3.88l4.432 4.14-1.364 1.46L5.5 7.55V16c0 1.1.896 2 2 2H13v2H7.5c-2.209 0-4-1.79-4-4V7.55L1.432 9.48.068 8.02 4.5 3.88zM16.5 6H11V4h5.5c2.209 0 4 1.79 4 4v8.45l2.068-1.93 1.364 1.46-4.432 4.14-4.432-4.14 1.364-1.46 2.068 1.93V8c0-1.1-.896-2-2-2z"></path></svg>
${{t.retweets}}
</span>
<span title="Likes">
<svg viewBox="0 0 24 24"><path d="M16.697 5.5c-1.222-.06-2.679.51-3.89 2.16l-.805 1.09-.806-1.09C9.984 6.01 8.526 5.44 7.304 5.5c-1.243.07-2.349.78-2.91 1.91-.552 1.12-.633 2.78.479 4.82 1.074 1.97 3.257 4.27 7.129 6.61 3.87-2.34 6.052-4.64 7.126-6.61 1.111-2.04 1.03-3.7.477-4.82-.561-1.13-1.666-1.84-2.908-1.91zm4.187 7.69c-1.351 2.48-4.001 5.12-8.379 7.67l-.503.3-.504-.3c-4.379-2.55-7.029-5.19-8.382-7.67-1.36-2.5-1.41-4.86-.514-6.67.887-1.79 2.647-2.91 4.601-3.01 1.651-.09 3.368.56 4.798 2.01 1.429-1.45 3.146-2.1 4.796-2.01 1.954.1 3.714 1.22 4.601 3.01.896 1.81.846 4.17-.514 6.67z"></path></svg>
${{t.likes}}
</span>
</div>
<div class="tweet-footer">
<span class="badge">${{t.is_reply ? 'Svar' : 'Post'}}</span>
<span>${{t.date}}</span>
<span>·</span>
<a href="${{originalUrl}}" target="_blank">Se X</a>
</div>
</div>
`;
feed.appendChild(div);
}});
}}
initFilters();
renderTweets();
</script>
</body>
</html>
"""
with open('index.html', 'w', encoding='utf-8') as f:
f.write(html_content)
if __name__ == "__main__":
main()