{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "initial_id", "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import plotly.express as px\n", "\n", "# Load the data\n", "df = pd.read_csv(\"scholars.csv\", encoding='utf-8')\n", "\n", "# Initialize a list to track the last dateOfDeath in each row to manage overlaps\n", "last_dates = []\n", "\n", "# Function to find the appropriate row for each scholar\n", "def find_row(last_dates, start_date):\n", " for i, last_date in enumerate(last_dates):\n", " if start_date > last_date:\n", " return i\n", " return len(last_dates)\n", "\n", "# Assign rows without overlaps and sort by the earliest dateOfBirth\n", "df['row'] = 0\n", "for index, scholar in df.iterrows():\n", " row = find_row(last_dates, scholar['dateOfBirth'])\n", " if row < len(last_dates):\n", " last_dates[row] = scholar['dateOfDeath']\n", " else:\n", " last_dates.append(scholar['dateOfDeath'])\n", " df.at[index, 'row'] = row\n", "\n", "# Now plotting without row labels\n", "fig = px.timeline(df, x_start=\"dateOfBirth\", x_end=\"dateOfDeath\", y=\"row\", text=\"fullName\", title=\"Scholars' Life Spans Timeline\")\n", "\n", "# Update layout\n", "fig.update_layout(yaxis=dict(tickmode='array', tickvals=[], ticktext=[]))\n", "fig.update_yaxes(autorange=\"reversed\") # This reverses the y-axis to match your requirement\n", "\n", "fig.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import pandas as pd\n", "from datetime import datetime\n", "\n", "# Assuming df is your existing DataFrame\n", "\n", "# Convert dateOfBirth and dateOfDeath to just the year, handle NaT/NaN appropriately\n", "df['Year'] = pd.to_datetime(df['dateOfBirth'], errors='coerce').dt.year.astype('Int64')\n", "df['End Year'] = pd.to_datetime(df['dateOfDeath'], errors='coerce').dt.year.astype('Int64')\n", "\n", "# Create 'Display Date' as \"dateOfBirth - dateOfDeath\"\n", "df['Display Date'] = df['Year'].astype(str).replace('<NA>','') + ' - ' + df['End Year'].astype(str).replace('<NA>','')\n", "\n", "# Create 'Headline' as \"fullName (dateOfBirth - dateOfDeath)\"\n", "df['Headline'] = df['fullName'] + ' (' + df['Display Date'] + ')'\n", "\n", "# Create 'Text' column by combining occupation, fieldOfWork, employer\n", "df['Text'] = df[['occupation', 'fieldOfWork']].apply(lambda x: '<br>'.join(x.dropna()), axis=1)\n", "\n", "# Use the image directly; assuming the URLs are already correctly formed in the 'image' column\n", "df['Media'] = df['image']\n", "\n", "# Add a \"Group\" column with the value \"actors\" for all rows\n", "df['Group'] = 'actors'\n", "\n", "# fix date columns\n", "df['Display Date'] = df['Display Date'].fillna('') # Ensure no NaNs in Display Date\n", "df['Headline'] = df['Headline'].fillna('') # Ensure no NaNs in Headline\n", "df['Text'] = df['Text'].fillna('') # Ensure no NaNs in Text\n", "df['Media'] = df['Media'].fillna('') # Ensure no NaNs in Media\n", "\n", "# Now select and order the DataFrame according to the TimelineJS template requirements\n", "columns = \"Year\tMonth\tDay\tTime\tEnd Year\tEnd Month\tEnd Day\tEnd Time\tDisplay Date\tHeadline\tText\tMedia\tMedia Credit\tMedia Caption\tMedia Thumbnail\tType\tGroup\tBackground\tLink\".split(\"\\t\")\n", "for col in columns:\n", " if col not in df:\n", " df[col] = ''\n", "timeline_df = df[columns]\n", "\n", "timeline_df.to_excel(\"timeline_data.xlsx\", index=False)\n" ], "metadata": { "collapsed": false }, "id": "f774e82925504bd" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }