Skip to content

Commit

Permalink
i sailuh#302 Refactors file and entity options for graph CLI
Browse files Browse the repository at this point in the history
Similar to the git CLI, users may want to choose different
configurations for file and entity network construction. Thus, add
separate options to the CLI configuration file.

Signed-off-by: Nicole Hoess <nicole.hoess@oth-regensburg.de>
  • Loading branch information
nicolehoess committed Aug 7, 2024
1 parent 0c375d9 commit fe46ff2
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 70 deletions.
100 changes: 67 additions & 33 deletions conf/kaiaulu_cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,38 +72,72 @@ git:

# Options for the graph CLI.
graph:
# Bipartite networks.
bipartite:
# When creating bipartite networks, you can choose between different
# combinations of authors, committers, files and entities to connect.
# Make sure to prepare and pass a suitable parsed git log to the CLI.
# File network options: author-file, commit-file
# Entity network options: author-entity, committer-entity,
# commit-entity, author-committer
network_type: author-entity
# When creating a bipartite projection, you can choose whether to
# apply it to the first or second node.
mode: TRUE # TRUE: first node
# Networks can be directed or undirected.
directed: TRUE
# The weight scheme will determine how the edge weights between nodes
# are calculated.
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges,
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal
weight_scheme: weight_scheme_sum_edges
# File-based analysis mode.
file:
# When creating bipartite networks, you can choose between different
# combinations of authors, commits and files to connect.
# Make sure to prepare and pass a suitable parsed git log to the CLI.
# Bipartite file network options: author-file, commit-file
network_type: author-file
# When creating a bipartite projection, you can choose whether to
# apply it to the first or second node.
mode: TRUE # TRUE: first node
# Networks can be directed or undirected.
directed: TRUE
# The weight scheme will determine how the edge weights between nodes
# are calculated.
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges,
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal
weight_scheme: weight_scheme_sum_edges
# Entity-based analysis mode.
entity:
# When creating bipartite networks, you can choose between different
# combinations of authors, committers, commits and entities to connect.
# Make sure to prepare and pass a suitable parsed git log to the CLI.
# Entity network options: author-entity, committer-entity,
# commit-entity, author-committer
network_type: author-entity
# When creating a bipartite projection, you can choose whether to
# apply it to the first or second node.
mode: TRUE # TRUE: first node
# Networks can be directed or undirected.
directed: TRUE
# The weight scheme will determine how the edge weights between nodes
# are calculated.
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges,
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal
weight_scheme: weight_scheme_sum_edges
# Temporal networks.
temporal:
# When calculating the temporal collaboration network, you can
# choose whether to calculate collaborations based on files or entities.
network_type: entity
# You can choose between author or committer collaboration.
mode: author
# Networks can be directed or undirected.
directed: TRUE
# You may consider only the last or all preceding developers to
# calculate the temporal network's edge weights.
# Options: one_lag, all_lag
lag: all_lag
# The weight scheme will determine how the edge weights between
# nodes are calculated.
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges,
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal
weight_scheme: weight_scheme_pairwise_cum_temporal
# File-based analysis mode.
file:
# You can choose between author or committer collaboration.
mode: author
# Networks can be directed or undirected.
directed: TRUE
# You may consider only the last or all preceding developers to
# calculate the temporal network's edge weights.
# Options: one_lag, all_lag
lag: all_lag
# The weight scheme will determine how the edge weights between
# nodes are calculated.
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges,
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal
weight_scheme: weight_scheme_cum_temporal
# Entity-based analysis mode.
entity:
# You can choose between author or committer collaboration.
mode: author
# Networks can be directed or undirected.
directed: TRUE
# You may consider only the last or all preceding developers to
# calculate the temporal network's edge weights.
# Options: one_lag, all_lag
lag: all_lag
# The weight scheme will determine how the edge weights between
# nodes are calculated.
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges,
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal
weight_scheme: weight_scheme_cum_temporal
162 changes: 125 additions & 37 deletions exec/graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# file, You can obtain one at https://mozilla.org/MPL/2.0/.


require(data.table,quietly=TRUE)
require(yaml,quietly=TRUE)
require(cli,quietly=TRUE)
require(docopt,quietly=TRUE)
Expand All @@ -18,9 +19,11 @@ require(igraph,quietly=TRUE)
doc <- "
USAGE:
graph.R bipartite help
graph.R bipartite <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path>
graph.R bipartite file <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path>
graph.R bipartite entity <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path>
graph.R temporal help
graph.R temporal <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path>
graph.R temporal file <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path>
graph.R temporal entity <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path>
graph.R (-h | --help)
graph.R --version
Expand Down Expand Up @@ -48,7 +51,7 @@ if(arguments[["bipartite"]] & arguments[["help"]]){
transform_gitlog_to_bipartite_network(),
transform_gitlog_to_entity_bipartite_network()
and bipartite_graph_projection().")
}else if(arguments[["bipartite"]]){
}else if(arguments[["bipartite"]] & arguments[["file"]]){

tools_path <- arguments[["<tools.yml>"]]
conf_path <- arguments[["<project_conf.yml>"]]
Expand All @@ -60,24 +63,18 @@ if(arguments[["bipartite"]] & arguments[["help"]]){
conf <- yaml::read_yaml(conf_path)
cli <- yaml::read_yaml(cli_path)

network_type <- cli[["graph"]][["bipartite"]][["network_type"]]
mode <- cli[["graph"]][["bipartite"]][["mode"]]
directed <- cli[["graph"]][["bipartite"]][["directed"]]
weight_scheme <- cli[["graph"]][["bipartite"]][["weight_scheme"]]
network_type <- cli[["graph"]][["bipartite"]][["file"]][["network_type"]]
mode <- cli[["graph"]][["bipartite"]][["file"]][["mode"]]
directed <- cli[["graph"]][["bipartite"]][["file"]][["directed"]]
weight_scheme <- cli[["graph"]][["bipartite"]][["file"]][["weight_scheme"]]

# Read git log
project_git <- data.table::fread(gitlog_path)

if (nrow(project_git) > 0){
# Bipartite network
if (endsWith(network_type, "file")) {
bipartite_network <- transform_gitlog_to_bipartite_network(project_git,
mode = network_type)
}else{
bipartite_network <- transform_gitlog_to_entity_bipartite_network(project_git,
mode = network_type)
}

bipartite_network <- transform_gitlog_to_bipartite_network(project_git,
mode = network_type)
if (length(bipartite_network[["edgelist"]]) > 1){
# Bipartite projection
bipartite_projection <- bipartite_graph_projection(bipartite_network,
Expand All @@ -97,20 +94,69 @@ if(arguments[["bipartite"]] & arguments[["help"]]){
cli_alert_success(paste0("Adjacency matrix for bipartite projection
was saved at: ",save_path))
}else{
cli_alert_warning(paste0("Egde list is empty. Empty adjacency matrix
was not saved."))
cli_alert_warning(paste0("Egde list is empty."))
write.csv(data.frame(),save_path)
}
}else{
cli_alert_warning(paste0("Git log is empty. Empty adjacency matrix
was not saved."))
cli_alert_warning(paste0("Git log is empty."))
write.csv(data.frame(),save_path)
}
}else if(arguments[["bipartite"]] & arguments[["entity"]]){

tools_path <- arguments[["<tools.yml>"]]
conf_path <- arguments[["<project_conf.yml>"]]
cli_path <- arguments[["<cli_conf.yml>"]]
gitlog_path <- arguments[["<gitlog_file_name_path>"]]
save_path <- arguments[["<save_file_name_path>"]]

tool <- yaml::read_yaml(tools_path)
conf <- yaml::read_yaml(conf_path)
cli <- yaml::read_yaml(cli_path)

network_type <- cli[["graph"]][["bipartite"]][["entity"]][["network_type"]]
mode <- cli[["graph"]][["bipartite"]][["entity"]][["mode"]]
directed <- cli[["graph"]][["bipartite"]][["entity"]][["directed"]]
weight_scheme <- cli[["graph"]][["bipartite"]][["entity"]][["weight_scheme"]]

# Read git log
project_git <- data.table::fread(gitlog_path)

if (nrow(project_git) > 0){
# Bipartite network
bipartite_network <- transform_gitlog_to_entity_bipartite_network(project_git,
mode = network_type)
if (length(bipartite_network[["edgelist"]]) > 1){
# Bipartite projection
bipartite_projection <- bipartite_graph_projection(bipartite_network,
mode=mode,
weight_scheme_function=get(weight_scheme))

# Save adjacency matrix
graph_bipartite_projection <- igraph::graph_from_data_frame(d=bipartite_projection[["edgelist"]],
directed = directed,
vertices = bipartite_projection[["nodes"]])
adjacency_matrix <- as_adjacency_matrix(graph_bipartite_projection,
attr = "weight", sparse = F)
adjacency_matrix <- as.data.frame(adjacency_matrix)
rownames(adjacency_matrix) <- colnames(adjacency_matrix)

data.table::fwrite(adjacency_matrix,save_path,row.names=T)
cli_alert_success(paste0("Adjacency matrix for bipartite projection
was saved at: ",save_path))
}else{
cli_alert_warning(paste0("Egde list is empty."))
write.csv(data.frame(),save_path)
}
}else{
cli_alert_warning(paste0("Git log is empty."))
write.csv(data.frame(),save_path)
}
}else if(arguments[["temporal"]] & arguments[["help"]]){
cli_alert_info("Creates a temporal collaboration network from a
parsed git (entity) log using
transform_gitlog_to_temporal_network() and
transform_gitlog_to_entity_temporal_network().")
}else if(arguments[["temporal"]]){
}else if(arguments[["temporal"]] & arguments[["file"]]){

tools_path <- arguments[["<tools.yml>"]]
conf_path <- arguments[["<project_conf.yml>"]]
Expand All @@ -122,27 +168,69 @@ if(arguments[["bipartite"]] & arguments[["help"]]){
conf <- yaml::read_yaml(conf_path)
cli <- yaml::read_yaml(cli_path)

network_type <- cli[["graph"]][["temporal"]][["network_type"]]
mode <- cli[["graph"]][["temporal"]][["mode"]]
directed <- cli[["graph"]][["temporal"]][["directed"]]
lag <- cli[["graph"]][["temporal"]][["lag"]]
weight_scheme <- cli[["graph"]][["temporal"]][["weight_scheme"]]
network_type <- cli[["graph"]][["temporal"]][["file"]][["network_type"]]
mode <- cli[["graph"]][["temporal"]][["file"]][["mode"]]
directed <- cli[["graph"]][["temporal"]][["file"]][["directed"]]
lag <- cli[["graph"]][["temporal"]][["file"]][["lag"]]
weight_scheme <- cli[["graph"]][["temporal"]][["file"]][["weight_scheme"]]

# Read git log
project_git <- data.table::fread(gitlog_path)

if (nrow(project_git) > 0){
# Temporal network
if (network_type=="file"){
temporal_network <- transform_gitlog_to_temporal_network(project_git,
mode = mode, lag = lag,
weight_scheme_function = get(weight_scheme))
temporal_network <- transform_gitlog_to_temporal_network(project_git,
mode = mode, lag = lag,
weight_scheme_function = get(weight_scheme))

if (length(temporal_network[["edgelist"]]) > 1){
# Save adjacency matrix
graph_temporal_network <- igraph::graph_from_data_frame(d=temporal_network[["edgelist"]],
directed = directed,
vertices = temporal_network[["nodes"]])
adjacency_matrix <- as_adjacency_matrix(graph_temporal_network,
attr = "weight", sparse = F)
adjacency_matrix <- as.data.frame(adjacency_matrix)
rownames(adjacency_matrix) <- colnames(adjacency_matrix)

data.table::fwrite(adjacency_matrix,save_path,row.names=T)

cli_alert_success(paste0("Adjacency matrix for temporal network was
saved at: ",save_path))
}else{
temporal_network <- transform_gitlog_to_entity_temporal_network(project_git,
mode = mode, lag = lag,
weight_scheme_function = get(weight_scheme))
cli_alert_warning(paste0("Egde list is empty."))
write.csv(data.frame(),save_path)
}
}else{
cli_alert_warning(paste0("Git log is empty."))
write.csv(data.frame(),save_path)
}
}else if(arguments[["temporal"]] & arguments[["entity"]]){

tools_path <- arguments[["<tools.yml>"]]
conf_path <- arguments[["<project_conf.yml>"]]
cli_path <- arguments[["<cli_conf.yml>"]]
gitlog_path <- arguments[["<gitlog_file_name_path>"]]
save_path <- arguments[["<save_file_name_path>"]]

tool <- yaml::read_yaml(tools_path)
conf <- yaml::read_yaml(conf_path)
cli <- yaml::read_yaml(cli_path)

network_type <- cli[["graph"]][["temporal"]][["entity"]][["network_type"]]
mode <- cli[["graph"]][["temporal"]][["entity"]][["mode"]]
directed <- cli[["graph"]][["temporal"]][["entity"]][["directed"]]
lag <- cli[["graph"]][["temporal"]][["entity"]][["lag"]]
weight_scheme <- cli[["graph"]][["temporal"]][["entity"]][["weight_scheme"]]

# Read git log
project_git <- data.table::fread(gitlog_path)

if (nrow(project_git) > 0){
# Temporal network
temporal_network <- transform_gitlog_to_entity_temporal_network(project_git,
mode = mode, lag = lag,
weight_scheme_function = get(weight_scheme))

if (length(temporal_network[["edgelist"]]) > 1){
# Save adjacency matrix
Expand All @@ -157,13 +245,13 @@ if(arguments[["bipartite"]] & arguments[["help"]]){
data.table::fwrite(adjacency_matrix,save_path,row.names=T)

cli_alert_success(paste0("Adjacency matrix for temporal network was
saved at: ",save_path))
saved at: ",save_path))
}else{
cli_alert_warning(paste0("Egde list is empty. Empty adjacency matrix
was not saved."))
cli_alert_warning(paste0("Egde list is empty."))
write.csv(data.frame(),save_path)
}
}else{
cli_alert_warning(paste0("Git log is empty. Empty adjacency matrix
was not saved."))
cli_alert_warning(paste0("Git log is empty."))
write.csv(data.frame(),save_path)
}
}

0 comments on commit fe46ff2

Please sign in to comment.