8.network_functional_comparisons.R

#
#8.network_functional_comparisons.R

#This script makes the functional modular comparison of coexpression graphs 
#constructed for people with AD and people without pathological AD.

#Modules will be compared in terms of the associated biological functions identified by an enrichment analysis.
#This comparison involves answering three complementary questions: 

#1.How similar are the sets of biological functions that are associated to the whole network, through the enrichment of individual modules?
#2.How similar are the modules found in each network, in terms of the sets of associated biological functions?
#3.In how many modules is represented each biological process?

#paulinapglz.99@gmail.com
#Part of code adapted from https://github.com/guillermodeandajauregui/BiologicalModuleComparison/blob/master/comparisonParameters.R

#Libraries --- ---

pacman::p_load("igraph", 
               "tidyverse",
               "clusterProfiler", 
               "gridExtra", 
               "biomaRt", 
               "ggraph",
               "tidyheatmaps")

library("org.Hs.eg.db", character.only = TRUE)

#Set seed for modularity algorithm --- ---

set.seed(10)

# Define functions --- ---

#Define similarity of Enriched Processes, Jaccard Index function 

jaccard_simplex <- function(a,b){
  length(intersect(a,b))/length(union(a,b))
}

# Crear una función para calcular el Jaccard Index
jaccard_index <- function(set1, set2) {
  intersection <- length(intersect(set1, set2))
  union <- length(union(set1, set2))
  return(intersection / union)
}

#Get data --- --- 

graphAD <- read_graph(file =  '/datos/rosmap/data_by_counts/ROSMAP_counts/counts_by_tissue/DLFPC/counts_by_NIA_Reagan/graphs_NIA_Reagan/ROSMAP_RNAseq_DLPFC_AD_MutualInfograph_percentile99.99_trad.graphml',
                      format = 'graphml')

graphnoAD <- read_graph(file = '/datos/rosmap/data_by_counts/ROSMAP_counts/counts_by_tissue/DLFPC/counts_by_NIA_Reagan/graphs_NIA_Reagan/ROSMAP_RNAseq_DLPFC_noAD_MutualInfograph_percentile99.99_trad.graphml',
                        format = 'graphml')

universe <- scan(file = "/datos/rosmap/data_by_counts/ROSMAP_counts/counts_by_tissue/DLFPC/counts_by_NIA_Reagan/graphs_NIA_Reagan/universe.txt", what = character())

#Save graphs in a list

graphLists <- list(graphAD = graphAD,
                   graphnoAD = graphnoAD)

#Comparison of biological function sets associated to the overall network --- ---

#Enrichment of full graphs

#For AD graph

enrichment_fullnet_AD <- enrichGO(gene = V(graphAD)$name,
                                  OrgDb = "org.Hs.eg.db", 
                                  keyType = 'ENSEMBL',
                                  readable = TRUE,
                                  universe = universe, 
                                  ont = "BP",          #type of GO(Biological Process (BP), cellular Component (CC), Molecular Function (MF)
                                  pvalueCutoff = 0.05, 
                                  qvalueCutoff = 0.10)

#Gene concept network enrichment

enrichment_fullnet_AD_cnet <- cnetplot(enrichment_fullnet_AD, showCategory= 5)

#Dotplot enrichment

enrichment_fullnet_AD_dot <- dotplot(enrichment_fullnet_AD)

#Save plots

ggsave("enrichment_fullnet_AD_cnet.png", enrichment_fullnet_AD_cnet, width = 15, height = 8)

#For noAD graph

enrichment_fullnet_noAD <- enrichGO(gene = V(graphnoAD)$name,
                                    OrgDb = "org.Hs.eg.db", 
                                    keyType = 'ENSEMBL',
                                    readable = TRUE,
                                    universe = universe, 
                                    ont = "BP",          #type of GO(Biological Process (BP), cellular Component (CC), Molecular Function (MF)
                                    pvalueCutoff = 0.05, 
                                    qvalueCutoff = 0.10)

#Gene concept network enrichment

enrichment_fullnet_noAD_cnet <- cnetplot(enrichment_fullnet_noAD, showCategory= 5)

#Dotplot enrichment

enrichment_fullnet_noAD_dot <- dotplot(enrichment_fullnet_noAD)

#Comparison of enrichments --- ---

OverallEnrichedProcessJ <- jaccard_simplex(names(enrichment_fullnet_AD@geneSets), names(enrichment_fullnet_noAD@geneSets))
#[1] 0.8042686

#This answers the question 2. How similar are the modules found in each network, in terms of the sets of associated biological functions?
  
#Similarity of modules in terms of associated biological functions --- ---

#Modularity algorithm 

modularities <- sapply(X = graphLists, FUN = cluster_infomap)

#Count modules

len_mod <- sapply(X = modularities, FUN = length)
# graphAD graphnoAD 
# 67        72

#Calculate Q score 

Qscore <- sapply(modularities, FUN = modularity)
# graphAD graphnoAD 
# 0.2825230 0.2027528 

#Calculate clustering coefficient

clus_coe <- sapply(graphLists, FUN = transitivity)
 
# graphAD graphnoAD 
# 0.5956005 0.6252799 

#Split lists of nodes by module

nodes_membership <- sapply(modularities, FUN = membership)

#Create df
nodes_membership_AD.df <- data.frame(ensembl_gene_id = names(nodes_membership$graphAD),  membership = nodes_membership$graphAD)

nodes_membership_noAD.df <- data.frame(ensembl_gene_id = names(nodes_membership$graphnoAD),  membership = nodes_membership$graphnoAD)

# Assign membership to the nodes of each network
for (i in seq_along(graphLists)) {
  V(graphLists[[i]])$community <- nodes_membership[[i]]
}

#Save graphs  --- ----

# Loop through each graph and export it with community membership as GraphML
for (i in seq_along(graphLists)) {
  # Generate filename for each graph (e.g., graphAD.graphml, graphnoAD.graphml)
  graph_name <- ifelse(i == 1, "graphAD", "graphnoAD")
  filename <- paste0(graph_name, "nodes_membership.graphml")

  # Export graph to GraphML format
  write_graph(graphLists[[i]], file = filename, format = "graphml")
}

#If you already have the networks


graphAD <- read_graph(file =  '~/redesROSMAP/graphADnodes_membership.graphml',
                      format = 'graphml')

graphnoAD <- read_graph(file = '~/redesROSMAP/graphnoADnodes_membership.graphml',
                        format = 'graphml')

universe <- scan(file = "/datos/rosmap/data_by_counts/ROSMAP_counts/counts_by_tissue/DLFPC/counts_by_NIA_Reagan/graphs_NIA_Reagan/universe.txt", what = character())

#Save graphs in a list

graphLists <- list(graphAD = graphAD,
                   graphnoAD = graphnoAD)

#Plot networks
# Attach communities to relevant vertices
graphLists <- lapply(seq_along(graphLists), function(i) {
  V(graphLists[[i]])$color <- modularities[[i]]$membership
  graphLists[[i]]  # Devolver el grafo modificado
})

# Asignar tamaños de nodos según el grado para cada grafo en graphLists
graphLists <- lapply(graphLists, function(g) {
  V(g)$size <- degree(g)  # Asigna el grado como tamaño de los nodos
  g  # Devuelve el grafo modificado
})

# Extract list of nodes by community for each graph

nodes_by_community_list <- lapply(seq_along(nodes_membership), function(i) {
  split(V(graphLists[[i]])$name, nodes_membership[[i]])
})

# Extraer los genes de cada módulo
modules_AD <- split(nodes_membership_AD.df$ensembl_gene_id, nodes_membership_AD.df$membership)
modules_noAD <- split(nodes_membership_noAD.df$ensembl_gene_id, nodes_membership_noAD.df$membership)

# Crear la matriz de similitud
similarity_matrix <- matrix(0, nrow = length(modules_AD), ncol = length(modules_noAD))

# Rellenar la matriz con los índices de similitud Jaccard
for (i in seq_along(modules_AD)) {
  for (j in seq_along(modules_noAD)) {
    similarity_matrix[i, j] <- jaccard_index(modules_AD[[i]], modules_noAD[[j]])
  }
}

# Nombrar filas y columnas según los módulos
rownames(similarity_matrix) <- paste0("AD_", seq_along(modules_AD))
colnames(similarity_matrix) <- paste0("control_", seq_along(modules_noAD))

# Visualizar la matriz de similitud
similarity_matrix
dim(similarity_matrix)
#[1] 68 71

#Is there any modules with perfect similitude?

length(similarity_matrix[similarity_matrix == 1])
#[1] 10

#Is there any modules with moderate similitude?

length(similarity_matrix[similarity_matrix > 0.5])
#[1]

# Buscar posiciones de valores iguales a 1
pairs_with_one <- which(similarity_matrix == 1, arr.ind = TRUE)

# Crear dataframe con los nombres de los módulos correspondientes
module_pairs <- data.frame(
  module_AD = rownames(similarity_matrix)[pairs_with_one[, "row"]],
  module_noAD = colnames(similarity_matrix)[pairs_with_one[, "col"]]
)

# Imprimir el dataframe corregido
print(module_pairs)

#Create heatmap
# Convertimos la matriz en un formato largo

sim_heatmap.df <- as.data.frame(as.table(similarity_matrix)) %>%
  rename(Var1 = "module_AD", Var2 = "module_noAD", Freq = "similarity")

# Crear el heatmap
sim_heatmap.p <- tidyheatmap(
  df = sim_heatmap.df,
  rows = module_AD,
  columns = module_noAD,
  values = similarity,
  scale = "none", # No queremos escalado adicional
  clustering_method = "average", # Opcional: Método de clustering
  annotation_col = NULL,    # Opcional: Anotaciones en columnas si es necesario
  annotation_row = NULL,     # Opcional: Anotaciones en filas si es necesario
  colors =  c("navy", "white", "firebrick"), 
  main = "Gene module correspondence"
)

ggsave(
  "sim_genes_heatmap.pdf",
  plot = sim_heatmap.p,
  device = "pdf",
  width = 15,
  height = 10,
  units = "in",
  dpi = 300
)

#Compare modularity between graphs, applying 
#variation of information "vi"
#normalized mutual information "nmi"
#split-join distance "split-join distance"
#Rand index "Rand index"
#adjusted Rand index "adjusted Rand index"

possible_algos <- c("vi", "nmi", "split.join", "rand", "adjusted.rand")

comparison_methods <- sapply(X = possible_algos, FUN = function(i){
  igraph::compare(comm1 = graphAD_plus_modules,
                  comm2 = graphnoAD_plus_modules,
                  method = i
  )
})

comparison_methods

#In which modules are found our hub genes? --- ---

#Get hub genes 

hub_genes <- scan("/datos/rosmap/data_by_counts/ROSMAP_counts/counts_by_tissue/DLFPC/counts_by_NIA_Reagan/graphs_NIA_Reagan/AD_hubs_notcontrolens.txt",  what = character())

hub_genes.x <- nodes_membership_AD.df %>% filter(ensembl_gene_id %in% hub_genes)

hub_genes.xy <- convert_ens_to_symbol(hub_genes.x)

hub_genes.x <- hub_genes.x %>% left_join(hub_genes.xy, by ="ensembl_gene_id")

#In which modules are found our high betweeness genes?--- ---

high_be_genes <-  scan(file = "/datos/rosmap/data_by_counts/ROSMAP_counts/counts_by_tissue/DLFPC/counts_by_NIA_Reagan/graphs_NIA_Reagan/high_be_genes_en_AD_no_noAD_ens.txt",   what = character())

high_be_genes.x <- nodes_membership_AD.df %>% filter(ensembl_gene_id %in% high_be_genes)

high_be_genes.xy <- convert_ens_to_symbol(high_be_genes.x)

high_be_genes.x <- high_be_genes.x %>% left_join(high_be_genes.xy, by ="ensembl_gene_id")

#Enrichment to all modules of a network --- ----

#I'd like to add names to lists of graphs again

names(nodes_by_community_list)[1] <- "graphAD"
names(nodes_by_community_list)[2] <- "graphnoAD"

#Extract list of nodes by community 

nodes_by_community_AD <- nodes_by_community_list[["graphAD"]]

nodes_by_community_noAD <- nodes_by_community_list[["graphnoAD"]]

#Define function that performs enrichment

enricher <- function(nodes_by_community) {
  enrichGO_result <- enrichGO(gene = nodes_by_community,
                              OrgDb = org.Hs.eg.db, 
                              universe = universe , 
                              keyType = 'ENSEMBL',
                              readable = TRUE,
                              ont = "BP",          #type of GO(Biological Process (BP), cellular Component (CC), Molecular Function (MF)
                              pvalueCutoff = 0.05, 
                              qvalueCutoff = 0.10)
  return(enrichGO_result)
}

#These functions may print a " --> No gene can be mapped...." on the console. 
#This happens when there are lists of genes that cannot be enriched. It will still generate a result

# Define a function to replace NULL by an empty S4 object "enrichResult" to handle NULLs in the enrichment lists

replace_null <- function(x) {
  if (is.null(x)) {
    return(new("enrichResult"))
  } else {
    return(x)
  }
}

#Enrichment of graphAD modules

enriched_results_AD <- lapply(nodes_by_community_AD, enricher) #slow
enriched_results_AD <- lapply(enriched_results_AD, replace_null) # Exchange NULLs for empty S4 objects

#Enrichment of graphAD modules
enriched_results_noAD <- lapply(nodes_by_community_noAD, enricher) #slow
enriched_results_noAD <- lapply(enriched_results_noAD, replace_null) # Exchange NULLs for empty S4 objects

#Functional module comparison between two graphs --- ---

#1.How similar are the sets of biological functions that are associated to the whole network, through the enrichment of individual modules?

#Similarity of modules in terms of associated biological functions

#It is possible to compare each module of the network of interest to the modules
#of another network, in terms of their associated biological function

# Create an empty array to store the results of the Jaccard index.
num_modules_AD <- length(enriched_results_AD)
num_modules_noAD <- length(enriched_results_noAD)

similarity_matrix_enri <- matrix(NA, nrow = num_modules_AD, ncol = num_modules_noAD)

# Assign row and column names
rownames(similarity_matrix_enri) <- paste("AD", 1:num_modules_AD, sep = "_")
colnames(similarity_matrix_enri) <- paste("control", 1:num_modules_noAD, sep = "_")

#Iterate to obtain similarity matrix 

for (i in 1:num_modules_AD) {
  for (j in 1:num_modules_noAD) {
    # Acceder a los geneSets de las redes AD y no AD
    geneSets_AD <- names(enriched_results_AD[[i]]@geneSets)
    geneSets_noAD <- names(enriched_results_noAD[[j]]@geneSets)
    similarity_matrix_enri[i, j] <- jaccard_simplex(geneSets_AD, geneSets_noAD)
  }
}

head(similarity_matrix_enri)

#Find the highest similarity of the matrix  
max(similarity_matrix_enri, na.rm = TRUE)

length(similarity_matrix_enri[similarity_matrix_enri == 1])

length(similarity_matrix_enri[similarity_matrix_enri > 0.5])

# Crear dataframe con los nombres de los módulos correspondientes
module_pairs_enri <- data.frame(
  module_AD = rownames(similarity_matrix_enri)[pairs_with_one[, "row"]],
  module_noAD = colnames(similarity_matrix_enri)[pairs_with_one[, "col"]]
)

# Imprimir el dataframe corregido
print(module_pairs_enri)

sim_enri_heatmap.df <- as.data.frame(as.table(similarity_matrix_enri)) %>%
  rename(Var1 = "module_AD", Var2 = "module_control", Freq = "similarity")

# Crear el heatmap
sim_enri_heatmap.p <- tidyheatmap(
  df = sim_enri_heatmap.df,
  rows = module_AD,
  columns = module_control,
  values = similarity,
  scale = "none", # No queremos escalado adicional
  clustering_method = "average", # Opcional: Método de clustering
  annotation_col = NULL,    # Opcional: Anotaciones en columnas si es necesario
  annotation_row = NULL,     # Opcional: Anotaciones en filas si es necesario
  colors =  c("navy", "white", "firebrick"), 
  main = "Biological Processes module correspondence"
)

ggsave(
  "sim_enri_genes_heatmap.pdf",
  plot = sim_enri_heatmap.p,
  device = "pdf",
  width = 15,
  height = 10,
  units = "in",
  dpi = 300
)


#Number of modules in networks that have Jaccard index J=1 with a module of the Main network --- ---

# Function to count the number of columns with value equal to 1
count_ones <- function(row) {
  sum(row == 1)
}

# Apply the function to each row of the matrix
ones_count <- apply(similarity_matrix_enri, 1, count_ones)

# Crear una tabla con los resultados
num_equal_nodes <- data.frame(module_number = rownames(similarity_matrix_enri), modules_with_jaccardindex1 = ones_count)

num_equal_nodes.x <- num_equal_nodes %>% filter(modules_with_jaccardindex1 ==1)
dim(num_equal_nodes.x)
#[1] 15  2

#This answers the question of 1. How similar are the sets of biological functions that are associated to the whole network, through the enrichment of individual modules?
  
#TABLE

# Función para contar genes y procesos enriquecidos
count_genes_and_processes <- function(enrichment_results) {
  # Lista para almacenar los resultados
  results_summary <- list()
  
  for (i in seq_along(enrichment_results)) {
    # Obtener el número de genes (esto asume que cada elemento tiene una lista de genes)
    num_genes <- length(enrichment_results[[i]]@gene)
    
    # Número de procesos enriquecidos (filtrados por pvalue significativo)
    if (nrow(enrichment_results[[i]]) > 0) {
      num_processes <- nrow(enrichment_results[[i]]@result[enrichment_results[[i]]@result$pvalue < 0.05, ])
    } else {
      num_processes <- 0
    }
    
    # Almacenar el resultado por comunidad
    results_summary[[i]] <- list(
      "num_genes" = num_genes,
      "num_processes" = num_processes
    )
  }
  return(results_summary)
}

# Resumen de enriquecimiento para AD y noAD
summary_AD <- count_genes_and_processes(enriched_results_AD)
summary_noAD <- count_genes_and_processes(enriched_results_noAD)

communities_AD <- names(nodes_by_community_AD)
communities_noAD <- names(nodes_by_community_noAD)

# Convertir los resultados a un data frame
df_AD <- data.frame(
  Community = communities_AD,
  Category = "AD",
  Number_of_Genes = sapply(summary_AD, function(x) x$num_genes),
  Number_of_Enriched_Processes = sapply(summary_AD, function(x) x$num_processes)
)


df_noAD <- data.frame(
  Community = communities_noAD,
  Category = "noAD",
  Number_of_Genes = sapply(summary_noAD, function(x) x$num_genes),
  Number_of_Enriched_Processes = sapply(summary_noAD, function(x) x$num_processes)
)

# Combinar ambas tablas en una sola
df_summary <- bind_rows(df_AD, df_noAD)

#Number of modules associated to a given biological function --- ---

#3.In how many modules is represented each biological process?

# Extraer los términos GO para cada módulo en AD
BP_terms_AD <- lapply(enriched_results_AD, function(result) {
  if (length(result@result) > 0) {
    return(result@result$Description)  # Extraer nombres de procesos biológicos
  } else {
    return(NA)  # Manejar módulos sin resultados
  }
})

# Extraer los términos GO para cada módulo en noAD
BP_terms_noAD <- lapply(enriched_results_noAD, function(result) {
  if (length(result@result) > 0) {
    return(result@result$Description)
  } else {
    return(NA)
  }
})

# Contar cuántos módulos están asociados a cada proceso biológico en AD
BP_count_AD <- table(unlist(BP_terms_AD)) %>% as.data.frame()
colnames(BP_count_AD) <- c("term", "in_modules_AD")

# Contar cuántos módulos están asociados a cada proceso biológico en noAD
BP_count_noAD <- table(unlist(BP_terms_noAD)) %>% as.data.frame()
colnames(BP_count_noAD) <- c("term", "in_modules_noAD")

# Unificar los términos de procesos biológicos de ambas redes
all_BP_terms <- BP_count_AD %>% left_join(BP_count_noAD, by = "term")
all_BP_terms[is.na(all_BP_terms)] <- 0  # Reemplazar NA por 0
# Calcular la diferencia absoluta entre AD y noAD
all_BP_terms <- all_BP_terms %>%
  mutate(difference = abs(in_modules_AD - in_modules_noAD))

# Seleccionar los 20 términos con la mayor diferencia
top_diff_BP_terms <- all_BP_terms %>%
  arrange(desc(difference)) %>%
  head(30)

diff_BP_terms.p <- ggplot(top_diff_BP_terms, aes(x = reorder(term, difference))) +  # Reordenar según la diferencia en orden descendente
  geom_col(aes(y = in_modules_AD, fill = "AD", alpha = 0.8), stat = "identity", position = "dodge") +
  geom_col(aes(y = in_modules_noAD, fill = "control", alpha = 0.8), stat = "identity", position = "dodge") +
  coord_flip() + 
  theme_minimal() +
  labs(x = "Biological Process (GO:BP)", y = "Number of Modules", fill = "Network") +
  ggtitle("Top 20 Biological Processes with Largest Differences (AD vs control)")+
  scale_fill_manual(values = c("AD" = "red", "control" = "blue"))  # Colores personalizados
diff_BP_terms.p

diff_BP_terms.p <- ggplot(top_diff_BP_terms, aes(x = reorder(term, difference))) +  
  geom_point(aes(y = in_modules_AD, color = "AD"), size = 4, alpha = 0.8) +
  geom_point(aes(y = in_modules_noAD, color = "control"), size = 4, alpha = 0.8) +
coord_flip() +
geom_segment(aes(xend = term, y = in_modules_AD, yend = 0, color = "AD"), size = 1, alpha = 0.8) +
  geom_segment(aes(xend = term, y = in_modules_noAD, yend = 0, color = "control"), size = 1, alpha = 0.8) +
  theme_minimal() +
  labs(x = "Biological Process (GO:BP)", y = "Number of Modules in which they are represented", color = "Network") +
  ggtitle("Top 20 Biological Processes with Largest Differences (AD vs control)") +
  scale_color_manual(values = c("AD" = "red", "control" = "blue"))  # Colores personalizados
diff_BP_terms.p

#Save plot

ggsave(filename = "diff_BP_terms.jpg",
       plot = diff_BP_terms.p,
       device = "jpg", width = 25,
       height = 20, units = "cm",dpi = 300)

# Asumamos que tienes un objeto llamado 'enriched_results' con los procesos enriquecidos

# Generar una lista con las conexiones entre módulos y procesos GO
network_edges <- data.frame()

for (i in seq_along(enriched_results_AD)) {
  module_name <- names(nodes_by_community_AD)[i]  # Nombre de la comunidad o módulo
  go_terms <- enriched_results_AD[[i]]@result$ID  # Procesos GO significativos para esa comunidad
  
  if (length(go_terms) > 0) {
    for (go in go_terms) {
      network_edges <- rbind(network_edges, data.frame(Module = module_name, GO = go))
    }
  }
}

# Crear el grafo a partir de los datos
g <- graph_from_data_frame(network_edges, directed = FALSE)

# Agregar propiedades a los nodos: si es un módulo o un GO term
V(g)$type <- ifelse(V(g)$name %in% names(nodes_by_community_AD), "Module", "GO Term")

# Colores para módulos y GO terms
module_color <- "dodgerblue"  # Color para módulos
go_color <- "orange"          # Color para GO terms

# Crear el gráfico
ggraph(g, layout = "fr") +  # Fruchterman-Reingold layout para redes
  geom_edge_link(aes(edge_alpha = 0.5), color = "gray", show.legend = FALSE) +  # Enlaces
  geom_node_point(aes(color = V(g)$type), size = 5) +  # Nodos
  geom_node_text(aes(label = V(g)$name), repel = TRUE, size = 3) +  # Etiquetas
  scale_color_manual(values = c("Module" = module_color, "GO Term" = go_color)) +  # Asignar colores
  theme_void() +  # Sin fondo
  theme(legend.position = "none")  # Sin leyenda

# Si ya tienes los datos de enriquecimiento para cada módulo, puedes ordenarlos por número de genes o procesos
# Supongamos que summary_AD y summary_noAD ya contienen el conteo de genes y procesos enriquecidos como antes

# Crear un data frame que contenga la información sobre los módulos y sus genes/procesos
df_AD <- data.frame(
  Module = names(nodes_by_community_AD),
  Number_of_Genes = sapply(summary_AD, function(x) x$num_genes),
  Number_of_Enriched_Processes = sapply(summary_AD, function(x) x$num_processes)
)

df_noAD <- data.frame(
  Module = names(nodes_by_community_noAD),
  Number_of_Genes = sapply(summary_noAD, function(x) x$num_genes),
  Number_of_Enriched_Processes = sapply(summary_noAD, function(x) x$num_processes)
)

# Ordenar por número de procesos enriquecidos o número de genes
df_AD <- df_AD[order(-df_AD$Number_of_Enriched_Processes), ]
df_noAD <- df_noAD[order(-df_noAD$Number_of_Enriched_Processes), ]

# Quedarse con los primeros 20 módulos de cada red
top_modules_AD <- df_AD[1:5, ]
top_modules_noAD <- df_noAD[1:5, ]

# Filtrar los resultados de enriquecimiento para los top módulos
filtered_enrichment_AD <- enriched_results_AD[names(enriched_results_AD) %in% top_modules_AD$Module]
filtered_enrichment_noAD <- enriched_results_noAD[names(enriched_results_noAD) %in% top_modules_noAD$Module]
# Generar una lista con las conexiones entre módulos filtrados y procesos GO
network_edges_filtered <- data.frame()

# Para AD
for (i in seq_along(filtered_enrichment_AD)) {
  module_name <- names(filtered_enrichment_AD)[i]  # Nombre de la comunidad o módulo
  go_terms <- filtered_enrichment_AD[[i]]@result$ID  # Procesos GO significativos para esa comunidad
  
  if (length(go_terms) > 0) {
    for (go in go_terms) {
      network_edges_filtered <- rbind(network_edges_filtered, data.frame(Module = module_name, GO = go))
    }
  }
}

# Para noAD (control)
for (i in seq_along(filtered_enrichment_noAD)) {
  module_name <- names(filtered_enrichment_noAD)[i]  # Nombre de la comunidad o módulo
  go_terms <- filtered_enrichment_noAD[[i]]@result$ID  # Procesos GO significativos para esa comunidad
  
  if (length(go_terms) > 0) {
    for (go in go_terms) {
      network_edges_filtered <- rbind(network_edges_filtered, data.frame(Module = module_name, GO = go))
    }
  }
}

# Crear el grafo filtrado
g_filtered <- graph_from_data_frame(network_edges_filtered, directed = FALSE)

# Agregar propiedades a los nodos: si es un módulo o un GO term
V(g_filtered)$type <- ifelse(V(g_filtered)$name %in% c(top_modules_AD$Module, top_modules_noAD$Module), "Module", "GO Term")

# Visualización usando ggraph (ver los pasos anteriores para detalles de visualización)
ggraph(g_filtered, layout = "fr") +  # Fruchterman-Reingold layout para redes
  geom_edge_link(aes(edge_alpha = 0.5), color = "gray", show.legend = FALSE) +  # Enlaces
  geom_node_point(aes(color = V(g_filtered)$type), size = 5) +  # Nodos
  geom_node_text(aes(label = V(g_filtered)$name), repel = TRUE, size = 3) +  # Etiquetas
  scale_color_manual(values = c("Module" = module_color, "GO Term" = go_color)) +  # Asignar colores
  theme_void() +  # Sin fondo
  theme(legend.position = "none")  # Sin leyenda