How to use my own ligand-receptor database (Cellchat V2 database)? #11

smk5g5 · 2024-11-01T21:01:21Z

This is cross-referenced from the other closed issue.

Hi,

Is the following correct approach for creating our own ligand receptor database? I tried to create this from cellchat v2 database and apply it to my Xenium data.

Below is the code for the same, The starting cellchat db file is also attached, (For creating this file I broke down complex Ligand receptor interactions into non-complex ones e.g. interaction (in cellchat) TGFB1_TGFBR1_TGFBR2 is broken down into 2 simple interactions i.e.

TGFB1 TGFBR1
TGFB1 TGFBR2

Attached is the cellchat DB file (simplified) that I started from along with the code used for generating your database files. On the first pass while the database works it only outputs few Ligand receptor pairs as significant (33) of which at least 150 genes are present in the Xenium data.

I am not sure if this is because of the way it was put in or something else?

Kindly let me know what you think about it?

Attached is the input (broken down cellchat database file used for the code below).
CellChatDB_df_noncomp2_withannot.csv

Below is the code I used to arrive at the database input I used to arrive at the custom cytosignal database.

library(Seurat)
library(ggplot2)
library(rcartocolor)
library(rcartocolor)
library(scCustomize)
library(stringr)
library(stringi)
cellchat_DB_file = read.csv('/n/scratch/users/s/sak4832/stlearn_cci/CellChatDB_df_noncomp2_withannot.csv')

#

outdir <- "/n/data1/mgh/neuro/petti/lab/Users/Anthony.Wang/GBM_Spatial/GBM_Xenium/Xenium_Second_Run/Aggregated_Sequential/Final/CytoSpace"
inter.index.new <- read.csv(sprintf("%s/inter.index.csv",outdir),row.names=1)
db.cont.new <- readRDS(sprintf("%s/db.cont.rds",outdir))


db.diff <- readRDS(sprintf("%s/db.diff.rds",outdir))

g_to_u_new <- read.csv(sprintf("%s/g_to_u.csv",outdir))


library(httr)
library(jsonlite)

# Function to retrieve the UniProt ID for a given gene symbol
get_uniprot_id <- function(gene_symbol) {
  
  # Define UniProt API URL
  base_url <- "https://rest.uniprot.org/uniprotkb/search"
  
  # Define query with the gene symbol and specifying reviewed entries
  query <- paste(gene_symbol, "AND reviewed:true")
  
  # Define fields to retrieve (accession only)
  fields <- "accession"
  
  # Make GET request to UniProt API
  response <- GET(base_url, query = list(query = query, format = "json", fields = fields), timeout(10))
  
  # Check if the response is successful
  if (status_code(response) != 200) {
    stop("Failed to retrieve data from UniProt. Please check the gene symbol or the API endpoint.")
  }
  
  # Parse the JSON response
  data <- content(response, as = "parsed", type = "application/json")
  
  # Check if results are returned
  if (!"results" %in% names(data) || length(data$results) == 0) {
    stop("No UniProt ID found for the given gene symbol.")
  }
  
  # Extract the UniProt ID (accession)
  uniprot_id <- data$results[[1]]$primaryAccession
  return(uniprot_id)
}

# # Example Usage
# gene_symbol <- "TGFB1"  # Replace with your gene of interest
# uniprot_id <- get_uniprot_id(gene_symbol)
# cat("UniProt ID for", gene_symbol, "is:", uniprot_id)

# 
cellchat_DB_file$Lig_Uniprot = 'NA'
cellchat_DB_file$Sym_Uniprot = 'NA'

for(i in 1:nrow(cellchat_DB_file)){
	gene_symbol_lig = get_uniprot_id(str_trim(cellchat_DB_file$ligand[i],side = "both"))
	gene_symbol_receptor = get_uniprot_id(str_trim(cellchat_DB_file$receptor[i],side = "both"))
	cellchat_DB_file$Lig_Uniprot[i] = gene_symbol_lig[1]
	cellchat_DB_file$Sym_Uniprot[i] = gene_symbol_receptor[1]
	}

# saveRDS(cellchat_DB_file,'cellchat_DB_file_withuniprot.rds')

#remove Non-protein Signaling

cellchat_DB_file_sub = subset(cellchat_DB_file,!(annotation %in% 'Non-protein Signaling'))


cellchat_DB_file_sub2 = unique(cellchat_DB_file_sub[c('ligand','receptor','annotation','Lig_Uniprot','Sym_Uniprot')])

cellchat_DB_file_sub2$Rec_Uniprot = cellchat_DB_file_sub2$Sym_Uniprot
cellchat_DB_file_sub2$Sym_Uniprot = NULL
# id_cp_interaction	partner_a	partner_b	protein_name_a	protein_name_b	annotation_strategy	source	partner_a_is_recep	partner_b_is_recep	recep_flag	order_flag


cellchat_DB_file_sub2_unique = cellchat_DB_file_sub2[!duplicated(cellchat_DB_file_sub2[c('ligand','receptor')]),]

cellchat_DB_file_sub2_unique$id_cp_interaction = paste0('CPI-',stri_rand_strings(nrow(cellchat_DB_file_sub2_unique), 11, pattern = "[A-Z0-9]"))

cellchat_DB_file_sub2_unique$source = 'CellChat_V2'

cellchat_DB_file_sub2_unique$partner_a_is_recep = FALSE
cellchat_DB_file_sub2_unique$partner_b_is_recep = TRUE

cellchat_DB_file_sub2_unique$protein_name_a = paste0(cellchat_DB_file_sub2_unique$Lig_Uniprot,'_HUMAN')
cellchat_DB_file_sub2_unique$protein_name_b = paste0(cellchat_DB_file_sub2_unique$Rec_Uniprot,'_HUMAN')

cellchat_DB_file_sub2_unique$recep_flag = 1
cellchat_DB_file_sub2_unique$order_flag = 0 

colnames(cellchat_DB_file_sub2_unique) = c('partner_a','partner_b','annotation_strategy','Lig_Uniprot','Rec_Uniprot','id_cp_interaction','source','partner_a_is_recep','partner_b_is_recep','protein_name_a','protein_name_b','recep_flag','order_flag')

sel_cols = intersect(c("id_cp_interaction","partner_a","partner_b","protein_name_a","protein_name_b","annotation_strategy","source","partner_a_is_recep","partner_b_is_recep","recep_flag","order_flag"),colnames(cellchat_DB_file_sub2_unique))

cellchat_DB_file_sub2_unique_ord = cellchat_DB_file_sub2_unique[sel_cols]

rownames(cellchat_DB_file_sub2_unique_ord) = cellchat_DB_file_sub2_unique_ord$id_cp_interaction

write.csv(cellchat_DB_file_sub2_unique_ord, file = 'inter.index.CellChatV2.csv', col.names = T,quote = F,row.names = TRUE)

g_to_u_new_lig = data.frame(gene_name=cellchat_DB_file_sub2_unique_ord$partner_a,uniprot=str_split_i(cellchat_DB_file_sub2_unique_ord$protein_name_a,'_HUMAN',1))
g_to_u_new_rec = data.frame(gene_name=cellchat_DB_file_sub2_unique_ord$partner_b,uniprot=str_split_i(cellchat_DB_file_sub2_unique_ord$protein_name_b,'_HUMAN',1))

# g_to_u_new <- read.csv(sprintf("%s/g_to_u.csv",outdir))

g_to_u_new = unique(rbind(g_to_u_new_lig,g_to_u_new_rec))

g_to_u_new$hgnc_symbol = g_to_u_new$gene_name

write.csv(g_to_u_new, file = 'g_to_u_new.CellChatV2.csv', col.names = T,quote = F,row.names = F)

# db.cont.new <- readRDS(sprintf("%s/db.cont.rds",outdir))

db.cont.new <- readRDS(sprintf("%s/db.cont.rds",outdir))

diffusion_dependent_Ligs = subset(cellchat_DB_file_sub2_unique_ord,annotation_strategy %in% c("Secreted Signaling","ECM-Receptor"))

contact_dependent_Ligs = subset(cellchat_DB_file_sub2_unique_ord,annotation_strategy %in% c("Cell-Cell Contact"))

db.diff_list = list()

ligand_factor =  factor(diffusion_dependent_Ligs$id_cp_interaction)
names(ligand_factor) <- str_split_i(diffusion_dependent_Ligs$protein_name_a,'_',1)
db.diff_list$ligands <- ligand_factor

rec_factor =  factor(diffusion_dependent_Ligs$id_cp_interaction)
names(rec_factor) <- str_split_i(diffusion_dependent_Ligs$protein_name_b,'_',1)
db.diff_list$receptors <- rec_factor
db.diff_list$combined = c(db.diff_list$ligands,db.diff_list$receptors)



db.cont_list = list()

ligand_factor =  factor(contact_dependent_Ligs$id_cp_interaction)
names(ligand_factor) <- str_split_i(contact_dependent_Ligs$protein_name_a,'_',1)
db.cont_list$ligands <- ligand_factor

rec_factor =  factor(contact_dependent_Ligs$id_cp_interaction)
names(rec_factor) <- str_split_i(contact_dependent_Ligs$protein_name_b,'_',1)
db.cont_list$receptors <- rec_factor
db.cont_list$combined = c(db.cont_list$ligands,db.cont_list$receptors)

Let me know if this looks accurate to you or otherwise?

Thank you!

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to use my own ligand-receptor database (Cellchat V2 database)? #11

How to use my own ligand-receptor database (Cellchat V2 database)? #11

smk5g5 commented Nov 1, 2024

How to use my own ligand-receptor database (Cellchat V2 database)? #11

How to use my own ligand-receptor database (Cellchat V2 database)? #11

Comments

smk5g5 commented Nov 1, 2024