diff --git a/generate_archive.py b/generate_archive.py new file mode 100644 index 0000000..7f4950f --- /dev/null +++ b/generate_archive.py @@ -0,0 +1,484 @@ +import zipfile +import json +import os +import shutil +import re +from datetime import datetime + +def parse_twitter_js(content): + """Fjerner JavaScript variabel-tildelingen og returnerer ren JSON.""" + start_idx = content.find('[') + if start_idx == -1: + start_idx = content.find('{') + if start_idx == -1: + return [] + try: + cleaned_content = re.sub(r'^\/\*.*?\*\/', '', content, flags=re.DOTALL).strip() + start_match = re.search(r'[\[\{]', cleaned_content) + if not start_match: + return [] + return json.loads(cleaned_content[start_match.start():]) + except json.JSONDecodeError as e: + print(f"Fejl ved JSON-parsing: {e}") + return [] + +def main(): + zip_path = 'twitter.zip' + if not os.path.exists(zip_path): + print("Fejl: Fandt ikke twitter.zip i samme mappe.") + return + + if not os.path.exists('avatar.png'): + print("Fejl: Fandt ikke avatar.png i samme mappe. Brug venligst et billede med dette navn.") + return + + print("Analyserer twitter.zip...") + + media_dir = 'media' + if not os.path.exists(media_dir): + os.makedirs(media_dir) + + profile_data = {} + tweets_data = [] + + with zipfile.ZipFile(zip_path, 'r') as z: + file_list = z.namelist() + + profile_file = next((f for f in file_list if f.endswith('data/profile.js')), None) + account_file = next((f for f in file_list if f.endswith('data/account.js')), None) + followers_file = next((f for f in file_list if f.endswith('data/follower.js')), None) + following_file = next((f for f in file_list if f.endswith('data/following.js')), None) + + profile_data['name'] = 'Andreas Andersen' + profile_data['bio'] = '' + + if profile_file: + prof_json = parse_twitter_js(z.read(profile_file).decode('utf-8')) + if prof_json and len(prof_json) > 0: + p = prof_json[0].get('profile', {}) + profile_data['bio'] = p.get('description', {}).get('bio', '') + + if account_file: + acc_json = parse_twitter_js(z.read(account_file).decode('utf-8')) + if acc_json and len(acc_json) > 0: + acc = acc_json[0].get('account', {}) + profile_data['handle'] = '@' + acc.get('username', 'ukendt') + profile_data['created'] = acc.get('createdAt', '') + + profile_data['followersCount'] = len(parse_twitter_js(z.read(followers_file).decode('utf-8'))) if followers_file else 0 + profile_data['followingCount'] = len(parse_twitter_js(z.read(following_file).decode('utf-8'))) if following_file else 0 + + # Opdateret søgning: Find ALLE tweet-filer (f.eks. tweets.js, tweets-part1.js, tweets-part2.js) + tweet_files = [f for f in file_list if re.match(r'^data/tweets?(?:-part\d+)?\.js$', f)] + + if tweet_files: + print(f"Fandt {len(tweet_files)} fil(er) med tweets. Udtrækker og behandler...") + + for tweets_file in tweet_files: + print(f"- Læser {tweets_file}...") + raw_tweets = parse_twitter_js(z.read(tweets_file).decode('utf-8')) + + for item in raw_tweets: + t = item.get('tweet', item) + + is_reply = t.get('in_reply_to_status_id') is not None or t.get('in_reply_to_user_id') is not None + + text = t.get('full_text', '') + has_links = 'http' in text + + if 'entities' in t: + if 'media' in t['entities']: + for m_ent in t['entities']['media']: + tco_media = m_ent.get('url', '') + if tco_media: + text = text.replace(tco_media, '').strip() + + if 'urls' in t['entities']: + for u_ent in t['entities']['urls']: + tco_url = u_ent.get('url', '') + expanded_url = u_ent.get('expanded_url', '') + display_url = u_ent.get('display_url', expanded_url) + if tco_url and expanded_url: + html_link = f'{display_url}' + text = text.replace(tco_url, html_link) + + tags = [tag['text'].lower() for tag in t.get('entities', {}).get('hashtags', [])] + + media_files = [] + if 'extended_entities' in t and 'media' in t['extended_entities']: + for m in t['extended_entities']['media']: + media_url = m.get('media_url', '') + if media_url: + filename = media_url.split('/')[-1] + tweet_id = t.get('id_str', '') + zip_media_path = f"data/tweets_media/{tweet_id}-{filename}" + try: + with z.open(zip_media_path) as source, open(os.path.join(media_dir, filename), 'wb') as target: + shutil.copyfileobj(source, target) + media_files.append(f"{media_dir}/{filename}") + except: + pass + + try: + date_obj = datetime.strptime(t.get('created_at'), '%a %b %d %H:%M:%S +0000 %Y') + except ValueError: + date_obj = datetime.now() + + tweets_data.append({ + 'id': t.get('id_str', ''), + 'text': text, + 'date': date_obj.strftime('%d. %b %Y, %H:%M'), + 'year': date_obj.strftime('%Y'), + 'timestamp': date_obj.timestamp(), + 'is_reply': is_reply, + 'has_links': has_links, + 'tags': tags, + 'media': media_files, + 'likes': int(t.get('favorite_count', 0)), + 'retweets': int(t.get('retweet_count', 0)) + }) + + profile_data['totalPosts'] = sum(1 for t in tweets_data if not t['is_reply']) + profile_data['totalReplies'] = sum(1 for t in tweets_data if t['is_reply']) + + print("Genererer HTML...") + generate_html(profile_data, tweets_data) + print("Færdig! Åbn index.html i din browser.") + +def generate_html(profile, tweets): + html_content = f""" + + + + + Twitter/X Arkiv + + + + +
+
+ + +
+

Twitter/X arkiv

+
+ +
+
+
+
+ Profilbillede +
+
+
Andreas Andersen
+
{profile.get('handle', '@andreas')}
+
{profile.get('bio', '')}
+
+ {profile.get('totalPosts', 0)} Posts + {profile.get('totalReplies', 0)} Svar + {profile.get('followersCount', 0)} Følgere + {profile.get('followingCount', 0)} Fulgte + Profil oprettet: {profile.get('created', '')[:10] if profile.get('created') else 'Ukendt'} +
+
+
+ +
+
+ + +
+ + + + +""" + with open('index.html', 'w', encoding='utf-8') as f: + f.write(html_content) + +if __name__ == "__main__": + main() \ No newline at end of file