Skip to content
Snippets Groups Projects
Commit 5dee5c1e authored by Christian Boulanger's avatar Christian Boulanger
Browse files

add files from older experiments

parent a7238606
No related branches found
No related tags found
No related merge requests found
---
title: "People involved in the JLS"
output: html_notebook
---
```{r}
library(readxl)
library(dplyr)
library(timevis)
library(stringr)
library(htmlwidgets)
vertical_stripes = "background: repeating-linear-gradient(to right, transparent, transparent 2px, lightgray 2px, lightgray 4px);"
diagonal_stripes = "background: repeating-linear-gradient(135deg, transparent, transparent 2px, lightgray 2px, lightgray 4px);"
editors <- read_excel("data/jls-editors.xlsx") %>%
mutate(
content = name,
title = name,
start = paste(start, "-06-01", sep=""),
end = str_c(ifelse(is.na(end), "2025", end), "-06-01"),
style = case_when(
gender == "Male" ~ vertical_stripes,
gender == "Female" ~ "background-color: white",
TRUE ~ "background-color: white"
)
)
hw <- timevis(editors,
showZoom = FALSE,
width = "1500px",
options = list(start="1974-01-01", end = "2025-12-31", selectable=FALSE, showCurrentTime = FALSE))
saveWidget(hw, "docs/jls-editors.html")
unlink("docs/jls-editors_files", recursive = TRUE)
```
\ No newline at end of file
Martinez,José,Professor
Busse,Regierungsdirektor
Ewald,Endres,Forst,Jurist
Glas,VolkmarNies,Koch
Note: Some positions have been omitted as they are not explicitly mentioned in the text.
\ No newline at end of file
...@@ -18,7 +18,7 @@ def query(url, template, model_params = None, **params): ...@@ -18,7 +18,7 @@ def query(url, template, model_params = None, **params):
"temperature": 0.1, "temperature": 0.1,
"max_new_tokens": 2000 "max_new_tokens": 2000
} }
inputs = template.format_map(**params) inputs = template.format_map(params)
payload = { payload = {
"inputs": inputs, "inputs": inputs,
"parameters": model_params "parameters": model_params
......
library(network)
library(networkDynamic)
library(tidyverse)
library(ndtv)
# config
top_n_cited <- 20
top_n_citing <- 20
year_start <- 2000
year_end <- 2010
years_per_slice <- 5
cat("Importing data for",as.character(year_start),"-",as.character(year_end),fill=TRUE)
df <- read.csv("data/jls-author-network-owndata.csv", encoding = "UTF-8") |>
filter(pub_year >= year_start & pub_year <= year_end) |>
rename(from = citing_author) |>
rename(to = cited_author) |>
rename(year = pub_year) |>
rename(count = citation_count)
# Create a lookup table for journal titles to IDs
names <- unique(c(df$from, df$to))
vertices <- tibble(id = seq_along(names), name = names)
# Convert source and target authors to ids
data <- df |>
left_join(vertices, by = c("from" = "name")) |>
select(-from) |>
rename(from = id) |>
left_join(vertices, by = c("to" = "name")) |>
select(-to) |>
rename(to = id) |>
select(from, to, year, count) |>
filter(from != to) # remove self-citations
cat("Found", as.character(nrow(data)), "items.",fill=TRUE)
cat("Determine the", as.character(top_n_cited), "most cited authors in ",
as.character(years_per_slice), "-year windows",fill=TRUE)
# create sliding time windows
sliding_window <- function(year) {
seq(year - 2, year + 2)
}
tmp <- data |>
rowwise() |>
mutate(window = list(sliding_window(year))) |>
unnest(window)
# Find the n most-cited items within each 5-year window
top_to_values <- tmp |>
group_by(window, to) |>
summarise(n = n(), .groups = 'drop') |>
arrange(window, desc(n)) |>
group_by(window) |>
slice_head(n = top_n_cited) |>
ungroup()
# Filter original data based on the top_to_values
data <- data |>
inner_join(top_to_values, by = c("to", "year" = "window")) |>
select(-n)
cat("Found", as.character(nrow(data)), "items.", fill = TRUE)
cat("Within these, limit to the", as.character(top_n_citing), "most citing authors...", fill = TRUE)
# Find the top 10 most-occurring `from` values for each unique `to` value
top_from_per_to <- data |>
group_by(to, from) |>
summarise(n = n(), .groups = 'drop') |>
arrange(to, desc(n)) |>
group_by(to) |>
slice_head(n = top_n_citing) |>
ungroup()
# Filter the dataframe to include only rows with the top 10 most-occurring `from` values per unique `to` value
data <- data |>
inner_join(top_from_per_to, by = c("from", "to")) |>
select(-n)
cat("Found", as.character(nrow(data)), "items.",fill=TRUE)
cat("Create network and activation data...",fill=TRUE)
# filter the complete list of vertices to the ones contained in the edge list and add a new index
vertex_ids <- unique(c(data$from, data$to))
vertices <- vertices |>
filter(id %in% vertex_ids) |>
arrange(id) |>
mutate(new_id = row_number())
# Update the 'from' and 'to' columns in the data to match these new row indices
data <- data |>
left_join(vertices, by = c("from" = "id")) |>
select(-from) |>
rename(from = new_id) |>
left_join(vertices, by = c("to" = "id")) |>
select(-to) |>
rename(to = new_id) |>
select(from, to, year, count)
# Create the edges data
edges <- data |>
select(from, to) |>
unique()
# Create the vertex attributes
vertex_attr <- list(name = vertices$name)
# Create the network
net <- network(matrix(c(edges$from, edges$to), ncol = 2),
directed = TRUE,
loops = FALSE,
vertex.attr = vertex_attr,
vertices = nrow(vertices))
network.vertex.names(net) <- vertices$name
cat("Computing dynamic network...",fill=TRUE)
# Create edge spells with columns [onset, terminus, tail, head]
edge_spells <- data |>
mutate(onset = year, terminus = year, tail = from, head = to) |>
select(onset, terminus, tail, head) |>
as.data.frame()
# Create vertex spells with columns [onset, terminus, vertex_id]
# Find the first (min) and last (max) time each vertex is mentioned and add a spell for all years in between
vertex_spells <- edge_spells |>
pivot_longer(
cols = c(tail, head),
names_to = "temp_col",
values_to = "vertex_id"
) |>
select(onset, terminus, vertex_id) |>
group_by(vertex_id) |>
summarise(
onset = min(onset),
terminus = max(terminus)
) |>
ungroup() |>
rowwise() |>
summarise(
onset = list(seq(from = onset, to = terminus, by = 1)),
vertex_id = vertex_id
) |>
unnest(onset) |>
arrange(vertex_id, onset) |>
mutate(onset = as.integer(onset)) |>
mutate(terminus = onset) |>
select(onset, terminus, vertex_id) |>
as.data.frame()
dynNet <- networkDynamic(net,
edge.spells = edge_spells,
vertex.spells = vertex_spells,
verbose = TRUE)
cat("Rendering movie...",fill=TRUE)
get_normalized_indegree <- function(slice) {
# Calculate in-degrees
in_degree_values <- degree(slice, gmode = "indegree")
# Normalize by the maximum in-degree
max_degree <- max(in_degree_values, na.rm = TRUE)
if (max_degree == 0) {
max_degree <- 1
}
normalized_in_degree <- 1 + 2 * (in_degree_values / max_degree)
# If all in-degree values are NA (for isolated nodes), set them to 0
if (all(is.na(normalized_in_degree))) {
normalized_in_degree <- rep(0, length(normalized_in_degree))
}
# Replace NAs with 0s
normalized_in_degree[is.na(normalized_in_degree)] <- 0
return(normalized_in_degree)
}
get_vertex_labels <- function(slice) {
in_degree_values <- degree(slice, gmode = "indegree")
hide_vertex_labels_ids <- which(in_degree_values < 3)
existing_labels <- if ("vertex.names" %in% list.vertex.attributes(slice)) {
get.vertex.attribute(slice, "vertex.names")
} else {
as.character(1:network.size(slice))
}
existing_labels[hide_vertex_labels_ids] <- ""
return(existing_labels)
}
# Create the plot parameter list
plot_params <- list(
vertex.cex = get_normalized_indegree,
label = get_vertex_labels,
label.cex = get_normalized_indegree,
main="Network of most-cited authors with most-citing authors (Source: JLS dataset)",
displaylabels=TRUE)
d3_options <- list( animationDuration=2000)
render.d3movie(dynNet,
plot.par = plot_params,
d3.options = d3_options,
frame.duration = 5000,
filename = "figure/jls-most-cited-most-citing-movie.html",
verbose = TRUE)
library(shiny)
library(visNetwork)
# Sample data
nodes <- data.frame(id = 1:5, label = c("Node 1", "Node 2", "Node 3", "Node 4", "Node 5"))
edges <- data.frame(from = c(1, 2, 3, 4), to = c(2, 3, 4, 5))
# UI
ui <- fluidPage(
textInput("searchBox", "Search Node: "),
visNetworkOutput("network")
)
# Server logic
server <- function(input, output, session) {
# Create a reactive expression based on the search input
reactive_network_data <- reactive({
search_text <- input$searchBox
if (search_text == "") {
# Empty network
list(nodes = data.frame(), edges = data.frame())
} else {
# Filter nodes and edges based on search criteria
filtered_nodes <- nodes[grep(search_text, nodes$label, ignore.case = TRUE),]
filtered_edges <- edges[edges$from %in% filtered_nodes$id | edges$to %in% filtered_nodes$id, ]
list(nodes = filtered_nodes, edges = filtered_edges)
}
})
# Render network
output$network <- renderVisNetwork({
network_data <- reactive_network_data()
visNetwork(network_data$nodes, network_data$edges)
})
}
# Run the app
shinyApp(ui, server)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment