Skip to content
Snippets Groups Projects
plot-timeline.ipynb 4.28 KiB
Newer Older
  • Learn to ignore specific revisions
  • cboulanger's avatar
    cboulanger committed
    {
     "cells": [
      {
       "cell_type": "code",
       "execution_count": null,
       "id": "initial_id",
       "metadata": {
        "collapsed": true
       },
       "outputs": [],
       "source": [
        "import pandas as pd\n",
        "import plotly.express as px\n",
        "\n",
        "# Load the data\n",
        "df = pd.read_csv(\"scholars.csv\", encoding='utf-8')\n",
        "\n",
        "# Initialize a list to track the last dateOfDeath in each row to manage overlaps\n",
        "last_dates = []\n",
        "\n",
        "# Function to find the appropriate row for each scholar\n",
        "def find_row(last_dates, start_date):\n",
        "    for i, last_date in enumerate(last_dates):\n",
        "        if start_date > last_date:\n",
        "            return i\n",
        "    return len(last_dates)\n",
        "\n",
        "# Assign rows without overlaps and sort by the earliest dateOfBirth\n",
        "df['row'] = 0\n",
        "for index, scholar in df.iterrows():\n",
        "    row = find_row(last_dates, scholar['dateOfBirth'])\n",
        "    if row < len(last_dates):\n",
        "        last_dates[row] = scholar['dateOfDeath']\n",
        "    else:\n",
        "        last_dates.append(scholar['dateOfDeath'])\n",
        "    df.at[index, 'row'] = row\n",
        "\n",
        "# Now plotting without row labels\n",
        "fig = px.timeline(df, x_start=\"dateOfBirth\", x_end=\"dateOfDeath\", y=\"row\", text=\"fullName\", title=\"Scholars' Life Spans Timeline\")\n",
        "\n",
        "# Update layout\n",
        "fig.update_layout(yaxis=dict(tickmode='array', tickvals=[], ticktext=[]))\n",
        "fig.update_yaxes(autorange=\"reversed\")  # This reverses the y-axis to match your requirement\n",
        "\n",
        "fig.show()\n",
        "\n"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "outputs": [],
       "source": [
        "import pandas as pd\n",
        "from datetime import datetime\n",
        "\n",
        "# Assuming df is your existing DataFrame\n",
        "\n",
        "# Convert dateOfBirth and dateOfDeath to just the year, handle NaT/NaN appropriately\n",
        "df['Year'] = pd.to_datetime(df['dateOfBirth'], errors='coerce').dt.year.astype('Int64')\n",
        "df['End Year'] = pd.to_datetime(df['dateOfDeath'], errors='coerce').dt.year.astype('Int64')\n",
        "\n",
        "# Create 'Display Date' as \"dateOfBirth - dateOfDeath\"\n",
        "df['Display Date'] = df['Year'].astype(str).replace('<NA>','')  + ' - ' + df['End Year'].astype(str).replace('<NA>','')\n",
        "\n",
        "# Create 'Headline' as \"fullName (dateOfBirth - dateOfDeath)\"\n",
        "df['Headline'] = df['fullName'] + ' (' + df['Display Date'] + ')'\n",
        "\n",
        "# Create 'Text' column by combining occupation, fieldOfWork, employer\n",
        "df['Text'] = df[['occupation', 'fieldOfWork']].apply(lambda x: '<br>'.join(x.dropna()), axis=1)\n",
        "\n",
        "# Use the image directly; assuming the URLs are already correctly formed in the 'image' column\n",
        "df['Media'] = df['image']\n",
        "\n",
        "# Add a \"Group\" column with the value \"actors\" for all rows\n",
        "df['Group'] = 'actors'\n",
        "\n",
        "# fix date columns\n",
        "df['Display Date'] = df['Display Date'].fillna('')  # Ensure no NaNs in Display Date\n",
        "df['Headline'] = df['Headline'].fillna('')  # Ensure no NaNs in Headline\n",
        "df['Text'] = df['Text'].fillna('')  # Ensure no NaNs in Text\n",
        "df['Media'] = df['Media'].fillna('')  # Ensure no NaNs in Media\n",
        "\n",
        "# Now select and order the DataFrame according to the TimelineJS template requirements\n",
        "columns = \"Year\tMonth\tDay\tTime\tEnd Year\tEnd Month\tEnd Day\tEnd Time\tDisplay Date\tHeadline\tText\tMedia\tMedia Credit\tMedia Caption\tMedia Thumbnail\tType\tGroup\tBackground\tLink\".split(\"\\t\")\n",
        "for col in columns:\n",
        "    if col not in df:\n",
        "        df[col] = ''\n",
        "timeline_df = df[columns]\n",
        "\n",
        "timeline_df.to_excel(\"timeline_data.xlsx\", index=False)\n"
       ],
       "metadata": {
        "collapsed": false
       },
       "id": "f774e82925504bd"
      }
     ],
     "metadata": {
      "kernelspec": {
       "display_name": "Python 3",
       "language": "python",
       "name": "python3"
      },
      "language_info": {
       "codemirror_mode": {
        "name": "ipython",
        "version": 2
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython2",
       "version": "2.7.6"
      }
     },
     "nbformat": 4,
     "nbformat_minor": 5
    }