In [None]:
import pandas as pd
import plotly.express as px

# Load the data
df = pd.read_csv("scholars.csv", encoding='utf-8')

# Initialize a list to track the last dateOfDeath in each row to manage overlaps
last_dates = []

# Function to find the appropriate row for each scholar
def find_row(last_dates, start_date):
    for i, last_date in enumerate(last_dates):
        if start_date > last_date:
            return i
    return len(last_dates)

# Assign rows without overlaps and sort by the earliest dateOfBirth
df['row'] = 0
for index, scholar in df.iterrows():
    row = find_row(last_dates, scholar['dateOfBirth'])
    if row < len(last_dates):
        last_dates[row] = scholar['dateOfDeath']
    else:
        last_dates.append(scholar['dateOfDeath'])
    df.at[index, 'row'] = row

# Now plotting without row labels
fig = px.timeline(df, x_start="dateOfBirth", x_end="dateOfDeath", y="row", text="fullName", title="Scholars' Life Spans Timeline")

# Update layout
fig.update_layout(yaxis=dict(tickmode='array', tickvals=[], ticktext=[]))
fig.update_yaxes(autorange="reversed")  # This reverses the y-axis to match your requirement

fig.show()



In [None]:
import pandas as pd
from datetime import datetime

# Assuming df is your existing DataFrame

# Convert dateOfBirth and dateOfDeath to just the year, handle NaT/NaN appropriately
df['Year'] = pd.to_datetime(df['dateOfBirth'], errors='coerce').dt.year.astype('Int64')
df['End Year'] = pd.to_datetime(df['dateOfDeath'], errors='coerce').dt.year.astype('Int64')

# Create 'Display Date' as "dateOfBirth - dateOfDeath"
df['Display Date'] = df['Year'].astype(str).replace('<NA>','')  + ' - ' + df['End Year'].astype(str).replace('<NA>','')

# Create 'Headline' as "fullName (dateOfBirth - dateOfDeath)"
df['Headline'] = df['fullName'] + ' (' + df['Display Date'] + ')'

# Create 'Text' column by combining occupation, fieldOfWork, employer
df['Text'] = df[['occupation', 'fieldOfWork']].apply(lambda x: '<br>'.join(x.dropna()), axis=1)

# Use the image directly; assuming the URLs are already correctly formed in the 'image' column
df['Media'] = df['image']

# Add a "Group" column with the value "actors" for all rows
df['Group'] = 'actors'

# fix date columns
df['Display Date'] = df['Display Date'].fillna('')  # Ensure no NaNs in Display Date
df['Headline'] = df['Headline'].fillna('')  # Ensure no NaNs in Headline
df['Text'] = df['Text'].fillna('')  # Ensure no NaNs in Text
df['Media'] = df['Media'].fillna('')  # Ensure no NaNs in Media

# Now select and order the DataFrame according to the TimelineJS template requirements
columns = "Year	Month	Day	Time	End Year	End Month	End Day	End Time	Display Date	Headline	Text	Media	Media Credit	Media Caption	Media Thumbnail	Type	Group	Background	Link".split("\t")
for col in columns:
    if col not in df:
        df[col] = ''
timeline_df = df[columns]

timeline_df.to_excel("timeline_data.xlsx", index=False)
