import zipfile import json import os import shutil import re from datetime import datetime def parse_twitter_js(content): """Fjerner JavaScript variabel-tildelingen og returnerer ren JSON.""" start_idx = content.find('[') if start_idx == -1: start_idx = content.find('{') if start_idx == -1: return [] try: cleaned_content = re.sub(r'^\/\*.*?\*\/', '', content, flags=re.DOTALL).strip() start_match = re.search(r'[\[\{]', cleaned_content) if not start_match: return [] return json.loads(cleaned_content[start_match.start():]) except json.JSONDecodeError as e: print(f"Fejl ved JSON-parsing: {e}") return [] def main(): zip_path = 'twitter.zip' if not os.path.exists(zip_path): print("Fejl: Fandt ikke twitter.zip i samme mappe.") return if not os.path.exists('avatar.png'): print("Fejl: Fandt ikke avatar.png i samme mappe. Brug venligst et billede med dette navn.") return print("Analyserer twitter.zip...") media_dir = 'media' if not os.path.exists(media_dir): os.makedirs(media_dir) profile_data = {} tweets_data = [] with zipfile.ZipFile(zip_path, 'r') as z: file_list = z.namelist() profile_file = next((f for f in file_list if f.endswith('data/profile.js')), None) account_file = next((f for f in file_list if f.endswith('data/account.js')), None) followers_file = next((f for f in file_list if f.endswith('data/follower.js')), None) following_file = next((f for f in file_list if f.endswith('data/following.js')), None) profile_data['name'] = 'Andreas Andersen' profile_data['bio'] = '' if profile_file: prof_json = parse_twitter_js(z.read(profile_file).decode('utf-8')) if prof_json and len(prof_json) > 0: p = prof_json[0].get('profile', {}) profile_data['bio'] = p.get('description', {}).get('bio', '') if account_file: acc_json = parse_twitter_js(z.read(account_file).decode('utf-8')) if acc_json and len(acc_json) > 0: acc = acc_json[0].get('account', {}) profile_data['handle'] = '@' + acc.get('username', 'ukendt') profile_data['created'] = acc.get('createdAt', '') profile_data['followersCount'] = len(parse_twitter_js(z.read(followers_file).decode('utf-8'))) if followers_file else 0 profile_data['followingCount'] = len(parse_twitter_js(z.read(following_file).decode('utf-8'))) if following_file else 0 # Opdateret søgning: Find ALLE tweet-filer (f.eks. tweets.js, tweets-part1.js, tweets-part2.js) tweet_files = [f for f in file_list if re.match(r'^data/tweets?(?:-part\d+)?\.js$', f)] if tweet_files: print(f"Fandt {len(tweet_files)} fil(er) med tweets. Udtrækker og behandler...") for tweets_file in tweet_files: print(f"- Læser {tweets_file}...") raw_tweets = parse_twitter_js(z.read(tweets_file).decode('utf-8')) for item in raw_tweets: t = item.get('tweet', item) is_reply = t.get('in_reply_to_status_id') is not None or t.get('in_reply_to_user_id') is not None text = t.get('full_text', '') has_links = 'http' in text if 'entities' in t: if 'media' in t['entities']: for m_ent in t['entities']['media']: tco_media = m_ent.get('url', '') if tco_media: text = text.replace(tco_media, '').strip() if 'urls' in t['entities']: for u_ent in t['entities']['urls']: tco_url = u_ent.get('url', '') expanded_url = u_ent.get('expanded_url', '') display_url = u_ent.get('display_url', expanded_url) if tco_url and expanded_url: html_link = f'{display_url}' text = text.replace(tco_url, html_link) tags = [tag['text'].lower() for tag in t.get('entities', {}).get('hashtags', [])] media_files = [] if 'extended_entities' in t and 'media' in t['extended_entities']: for m in t['extended_entities']['media']: media_url = m.get('media_url', '') if media_url: filename = media_url.split('/')[-1] tweet_id = t.get('id_str', '') zip_media_path = f"data/tweets_media/{tweet_id}-{filename}" try: with z.open(zip_media_path) as source, open(os.path.join(media_dir, filename), 'wb') as target: shutil.copyfileobj(source, target) media_files.append(f"{media_dir}/{filename}") except: pass try: date_obj = datetime.strptime(t.get('created_at'), '%a %b %d %H:%M:%S +0000 %Y') except ValueError: date_obj = datetime.now() tweets_data.append({ 'id': t.get('id_str', ''), 'text': text, 'date': date_obj.strftime('%d. %b %Y, %H:%M'), 'year': date_obj.strftime('%Y'), 'timestamp': date_obj.timestamp(), 'is_reply': is_reply, 'has_links': has_links, 'tags': tags, 'media': media_files, 'likes': int(t.get('favorite_count', 0)), 'retweets': int(t.get('retweet_count', 0)) }) profile_data['totalPosts'] = sum(1 for t in tweets_data if not t['is_reply']) profile_data['totalReplies'] = sum(1 for t in tweets_data if t['is_reply']) print("Genererer HTML...") generate_html(profile_data, tweets_data) print("Færdig! Åbn index.html i din browser.") def generate_html(profile, tweets): html_content = f""" Twitter/X Arkiv

Twitter/X arkiv

Profilbillede
Andreas Andersen
{profile.get('handle', '@andreas')}
{profile.get('bio', '')}
{profile.get('totalPosts', 0)} Posts {profile.get('totalReplies', 0)} Svar {profile.get('followersCount', 0)} Følgere {profile.get('followingCount', 0)} Fulgte Profil oprettet: {profile.get('created', '')[:10] if profile.get('created') else 'Ukendt'}
""" with open('index.html', 'w', encoding='utf-8') as f: f.write(html_content) if __name__ == "__main__": main()