| Title: | Hypothesis Testing for Populations of Brain Networks |
|---|---|
| Description: | Non-parametric hypothesis testing for populations of brain networks represented as graphs, following the L1-distance ANOVA framework of Fraiman and Fraiman (2018) <doi:10.1038/s41598-018-21688-0>. The package builds on this nonparametric graph-comparison framework, extending it with procedures for edge-level inference and identification of the specific connections driving group differences. In particular, it provides utilities to compute central (mean) graphs, pairwise Manhattan distances between adjacency matrices, the group test statistic T, and a fast permutation procedure to identify the critical edges that drive between-group differences. Helper functions to generate synthetic community-structured graphs and to visualise brain networks with communities are also included. |
| Authors: | Maximiliano Martino [aut, cre] (ORCID: <https://orcid.org/0000-0002-2437-4387>), Daniel Fraiman [aut] (ORCID: <https://orcid.org/0000-0002-0482-9137>) |
| Maintainer: | Maximiliano Martino <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.2.1 |
| Built: | 2026-06-05 14:39:49 UTC |
| Source: | https://github.com/mmaximiliano/brainnettest |
This function computes the central graph for a given population by averaging the adjacency matrices of all graphs within that population.
compute_central_graph(graph_list)compute_central_graph(graph_list)
graph_list |
A list of adjacency matrices representing brain networks. |
A single adjacency matrix representing the central graph of the population.
# Generate synthetic Control data Control <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.1), generate_random_graph(n_nodes = 5, edge_prob = 0.1) ) central_graph <- compute_central_graph(Control)# Generate synthetic Control data Control <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.1), generate_random_graph(n_nodes = 5, edge_prob = 0.1) ) central_graph <- compute_central_graph(Control)
This function calculates the Manhattan norm (L1 norm) distance between two adjacency matrices representing brain networks.
compute_distance(G, M)compute_distance(G, M)
G |
A square adjacency matrix representing a brain network. |
M |
A square adjacency matrix representing the central brain network. |
A numeric value representing the Manhattan distance between G and M.
# Generate synthetic data G1 <- generate_random_graph(n_nodes = 5, edge_prob = 0.1) G2 <- generate_random_graph(n_nodes = 5, edge_prob = 0.1) central_graph <- compute_central_graph(list(G1, G2)) distance <- compute_distance(G1, central_graph)# Generate synthetic data G1 <- generate_random_graph(n_nodes = 5, edge_prob = 0.1) G2 <- generate_random_graph(n_nodes = 5, edge_prob = 0.1) central_graph <- compute_central_graph(list(G1, G2)) distance <- compute_distance(G1, central_graph)
This function computes the frequency of each edge (connection between nodes) across all graphs within each population.
compute_edge_frequencies(populations)compute_edge_frequencies(populations)
populations |
A list where each element is a population containing a list of graphs (adjacency matrices). |
A list containing edge counts and edge proportions for each population.
# Generate synthetic populations control_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 1 ) patient_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.6, base_inter_prob = 0.4, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 2 ) populations <- list(Control = control_graphs, Patient = patient_graphs) # Compute edge frequencies frequencies <- compute_edge_frequencies(populations) # View edge counts for the first population edge_counts_control <- frequencies$edge_counts[,,1] print(edge_counts_control)# Generate synthetic populations control_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 1 ) patient_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.6, base_inter_prob = 0.4, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 2 ) populations <- list(Control = control_graphs, Patient = patient_graphs) # Compute edge frequencies frequencies <- compute_edge_frequencies(populations) # View edge counts for the first population edge_counts_control <- frequencies$edge_counts[,,1] print(edge_counts_control)
This function computes p-values for the differences in edge presence between populations using statistical tests (e.g., Fisher's exact test, chi-squared test).
compute_edge_pvalues(edge_counts, N, method = "fisher", adjust_method = "none")compute_edge_pvalues(edge_counts, N, method = "fisher", adjust_method = "none")
edge_counts |
An array of edge counts from |
N |
A vector of sample sizes for each population. |
method |
The statistical test method to use: "fisher", "chi.squared", or "prop". Default is "fisher". |
adjust_method |
The method for p-value adjustment for multiple testing. Default is "none". |
A matrix of p-values for each edge.
# Generate synthetic populations control_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 1 ) patient_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.6, base_inter_prob = 0.4, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 2 ) populations <- list(Control = control_graphs, Patient = patient_graphs) # Compute edge frequencies frequencies <- compute_edge_frequencies(populations) edge_counts <- frequencies$edge_counts N <- sapply(populations, length) # Compute p-values for edge differences edge_pvalues <- compute_edge_pvalues(edge_counts, N) # View p-values for the first few edges print(edge_pvalues[1:5, 1:5])# Generate synthetic populations control_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 1 ) patient_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.6, base_inter_prob = 0.4, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = 2 ) populations <- list(Control = control_graphs, Patient = patient_graphs) # Compute edge frequencies frequencies <- compute_edge_frequencies(populations) edge_counts <- frequencies$edge_counts N <- sapply(populations, length) # Compute p-values for edge differences edge_pvalues <- compute_edge_pvalues(edge_counts, N) # View p-values for the first few edges print(edge_pvalues[1:5, 1:5])
This function computes the test statistic T to assess whether different populations of brain networks originate from the same distribution.
compute_test_statistic(populations, a = 1)compute_test_statistic(populations, a = 1)
populations |
A named list where each element is a list of adjacency
matrices for a population. Example:
|
a |
A normalization constant. Default is 1. |
A numeric value representing the test statistic T.
# Generate synthetic populations data Control <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.1), generate_random_graph(n_nodes = 5, edge_prob = 0.1) ) PatientA <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.15), generate_random_graph(n_nodes = 5, edge_prob = 0.15) ) PatientB <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.2), generate_random_graph(n_nodes = 5, edge_prob = 0.2) ) populations <- list(Control = Control, PatientA = PatientA, PatientB = PatientB) # Compute the test statistic T T_value <- compute_test_statistic(populations, a = 1) print(T_value)# Generate synthetic populations data Control <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.1), generate_random_graph(n_nodes = 5, edge_prob = 0.1) ) PatientA <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.15), generate_random_graph(n_nodes = 5, edge_prob = 0.15) ) PatientB <- list( generate_random_graph(n_nodes = 5, edge_prob = 0.2), generate_random_graph(n_nodes = 5, edge_prob = 0.2) ) populations <- list(Control = Control, PatientA = PatientA, PatientB = PatientB) # Compute the test statistic T T_value <- compute_test_statistic(populations, a = 1) print(T_value)
This function generates a list of adjacency matrices representing brain networks belonging to the same category (e.g., Control group). The generated graphs have similar community structures but vary slightly in their intra-community and inter-community connection probabilities to reflect natural variability.
generate_category_graphs( n_graphs = 10, n_nodes = 100, n_communities = 4, community_sizes = NULL, base_intra_prob = 0.8, base_inter_prob = 0.2, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = NULL )generate_category_graphs( n_graphs = 10, n_nodes = 100, n_communities = 4, community_sizes = NULL, base_intra_prob = 0.8, base_inter_prob = 0.2, intra_prob_variation = 0.05, inter_prob_variation = 0.05, seed = NULL )
n_graphs |
An integer specifying the number of graphs to generate. Default is 10. |
n_nodes |
An integer specifying the total number of nodes (brain regions). Default is 100. |
n_communities |
An integer specifying the number of communities. Default is 4. |
community_sizes |
An integer vector specifying the sizes of each community. If NULL, communities are of equal size. Default is NULL. |
base_intra_prob |
A numeric value between 0 and 1, or a numeric vector
of length |
base_inter_prob |
A numeric value between 0 and 1 specifying the base probability of an edge existing between nodes from different communities. Default is 0.2. |
intra_prob_variation |
A numeric value specifying the maximum variation to apply to the intra-community probability for each graph. Default is 0.05. |
inter_prob_variation |
A numeric value specifying the maximum variation to apply to the inter-community probability for each graph. Default is 0.05. |
seed |
An optional integer for setting the random seed to ensure reproducibility. Default is NULL. |
A list of symmetric binary adjacency matrices with no self-loops, representing brain networks with similar community structures.
# Generate a set of 5 graphs for the Control category control_graphs <- generate_category_graphs(n_graphs = 5, n_nodes = 100, n_communities = 4, base_intra_prob = 0.8, base_inter_prob = 0.2)# Generate a set of 5 graphs for the Control category control_graphs <- generate_category_graphs(n_graphs = 5, n_nodes = 100, n_communities = 4, base_intra_prob = 0.8, base_inter_prob = 0.2)
This function generates a random symmetric adjacency matrix representing a brain network with community structure. Nodes within the same community have a higher probability of being connected compared to nodes from different communities.
generate_community_graph( n_nodes = 100, n_communities = 4, community_sizes = NULL, intra_prob = 0.8, inter_prob = 0.2, seed = NULL )generate_community_graph( n_nodes = 100, n_communities = 4, community_sizes = NULL, intra_prob = 0.8, inter_prob = 0.2, seed = NULL )
n_nodes |
An integer specifying the total number of nodes (brain regions). Default is 100. |
n_communities |
An integer specifying the number of communities. Default is 4. |
community_sizes |
An integer vector specifying the sizes of each community. If NULL, communities are of equal size. Default is NULL. |
intra_prob |
A numeric value between 0 and 1, or a numeric vector of
length |
inter_prob |
A numeric value between 0 and 1 specifying the probability of an edge existing between nodes from different communities. Default is 0.2. |
seed |
An optional integer for setting the random seed to ensure reproducibility. Default is NULL. |
A symmetric binary adjacency matrix with no self-loops, representing a brain network with community structure.
# Generate a brain network with community structure G <- generate_community_graph(n_nodes = 100, n_communities = 4, intra_prob = 0.8, inter_prob = 0.2)# Generate a brain network with community structure G <- generate_community_graph(n_nodes = 100, n_communities = 4, intra_prob = 0.8, inter_prob = 0.2)
This function generates a random symmetric adjacency matrix representing a brain network. The adjacency matrix is binary, with edges present based on a specified probability.
generate_random_graph(n_nodes, edge_prob = 0.1)generate_random_graph(n_nodes, edge_prob = 0.1)
n_nodes |
An integer specifying the number of nodes (brain regions). |
edge_prob |
A numeric value between 0 and 1 specifying the probability of an edge existing between any two nodes. |
A symmetric binary adjacency matrix with no self-loops.
G <- generate_random_graph(n_nodes = 10, edge_prob = 0.1)G <- generate_random_graph(n_nodes = 10, edge_prob = 0.1)
Given the output of identify_critical_links, this function
identifies the unique nodes involved in the critical edges and summarizes
their participation. Each node is reported with the number of critical edges
it participates in (its critical degree). If node labels are supplied,
they are included in the output.
get_critical_nodes(result, node_labels = NULL)get_critical_nodes(result, node_labels = NULL)
result |
The list returned by |
node_labels |
An optional character vector of length |
A data frame with the following columns, ordered by decreasing
critical_degree:
Integer node index.
Character label for the node (only present when
node_labels is not NULL).
Number of critical edges incident on this node.
If no critical edges were found (result$critical_edges is
NULL or has zero rows), an empty data frame with the same columns
is returned.
# Generate two synthetic populations with different community structure control <- generate_category_graphs(n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, seed = 1) patient <- generate_category_graphs(n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.5, base_inter_prob = 0.5, seed = 2) populations <- list(Control = control, Patient = patient) # Identify critical edges result <- identify_critical_links(populations, alpha = 0.05, method = "fisher", n_permutations = 200, seed = 42) # Extract critical nodes (no labels) get_critical_nodes(result) # With labels labels <- paste0("Region_", 1:10) get_critical_nodes(result, node_labels = labels)# Generate two synthetic populations with different community structure control <- generate_category_graphs(n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, seed = 1) patient <- generate_category_graphs(n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.5, base_inter_prob = 0.5, seed = 2) populations <- list(Control = control, Patient = patient) # Identify critical edges result <- identify_critical_links(populations, alpha = 0.05, method = "fisher", n_permutations = 200, seed = 42) # Extract critical nodes (no labels) get_critical_nodes(result) # With labels labels <- paste0("Region_", 1:10) get_critical_nodes(result, node_labels = labels)
Iteratively identifies the edges that drive the observed difference between two or more populations of brain networks, using the permutation-based L1-distance ANOVA test of Fraiman and Fraiman (2018). Edges are ranked by their marginal p-value, then removed in batches until the global test is no longer significant. The permutation null is re-evaluated incrementally via a prefix-sum decomposition of T, which avoids recomputing the statistic from scratch at every step.
identify_critical_links( populations, alpha = 0.05, method = "fisher", adjust_method = "none", batch_size = 1, n_permutations = 1000, a = 1, seed = NULL )identify_critical_links( populations, alpha = 0.05, method = "fisher", adjust_method = "none", batch_size = 1, n_permutations = 1000, a = 1, seed = NULL )
populations |
A named |
alpha |
Numeric significance level used both for the global test and
for the iterative edge-removal stopping rule. Default |
method |
Character string selecting the marginal edge test used to
rank candidate edges. One of |
adjust_method |
Character string passed to |
batch_size |
Positive integer giving the number of edges to drop per
iteration. Larger values speed up the procedure at the cost of coarser
resolution. Default |
n_permutations |
Positive integer giving the number of permutation
replicates used to approximate the null distribution of T. Default
|
a |
Positive numeric normalisation constant for the test statistic T
(see |
seed |
Optional integer seed for reproducibility of the permutation
replicates. Default |
The implementation exploits the fact that T decomposes as a sum of
per-edge contributions . Removing an edge e is equivalent
to subtracting from T, so prefix sums of the sorted
values give the test statistic after any number of
removals in O(1). The permutation null is built once via a single BLAS
matrix multiplication and reused for every iteration, reducing the
overall complexity from O(K * B * |E| * m) (naive) to O(B * |E| * m).
Permutation replicates are generated by uniformly permuting the group labels across the pooled sample of all graphs, while preserving the original group sizes (sampling without replacement).
A list with three components: critical_edges (a
data.frame with columns node1, node2, p_value,
listing the edges removed until the test became non-significant, ordered
from most to least significant; NULL if the global test was not
significant), edges_removed (a list of length-2 integer vectors
giving the (i, j) indices of the removed edges, in removal order),
and modified_populations (the input populations with the
critical edges zeroed out in every graph).
Fraiman, D. and Fraiman, R. (2018) An ANOVA approach for statistical comparisons of brain networks. Scientific Reports, 8, 4746. doi:10.1038/s41598-018-21688-0.
compute_test_statistic,
compute_edge_pvalues, get_critical_nodes.
set.seed(1) control <- generate_category_graphs( n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, seed = 1) patient <- generate_category_graphs( n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.5, base_inter_prob = 0.5, seed = 2) populations <- list(Control = control, Patient = patient) result <- identify_critical_links( populations, alpha = 0.05, method = "fisher", n_permutations = 200, seed = 42) head(result$critical_edges)set.seed(1) control <- generate_category_graphs( n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, seed = 1) patient <- generate_category_graphs( n_graphs = 15, n_nodes = 10, n_communities = 2, base_intra_prob = 0.5, base_inter_prob = 0.5, seed = 2) populations <- list(Control = control, Patient = patient) result <- identify_critical_links( populations, alpha = 0.05, method = "fisher", n_permutations = 200, seed = 42) head(result$critical_edges)
Produces a multi-panel figure summarising the output of
identify_critical_links: one panel per population showing the
weighted central graph, plus a final panel that highlights the critical
edges on a chosen reference central graph. This is the canonical
visualization of the BrainNetTest workflow and replaces the manual
igraph plumbing required to build the same figure.
plot_critical_edges( populations, critical_links, communities = NULL, layout = NULL, central_graphs = NULL, reference = 1L, threshold = 0.5, edge_scale = 6, critical_color = "red", critical_width = 3, background_color = "grey80", background_width = 1, vertex_size = 12, vertex_label = TRUE, panel_titles = NULL, mfrow = NULL, ... )plot_critical_edges( populations, critical_links, communities = NULL, layout = NULL, central_graphs = NULL, reference = 1L, threshold = 0.5, edge_scale = 6, critical_color = "red", critical_width = 3, background_color = "grey80", background_width = 1, vertex_size = 12, vertex_label = TRUE, panel_titles = NULL, mfrow = NULL, ... )
populations |
A named |
critical_links |
The object returned by
|
communities |
Optional integer (or factor) vector of length
|
layout |
Optional layout for the graphs. Either an
|
central_graphs |
Optional list of pre-computed central graphs (one per
population). If |
reference |
Either an integer index or a name selecting which
population's central graph is used as the background of the
critical-edges panel. Default |
threshold |
Numeric in |
edge_scale |
Numeric multiplier applied to the central-graph edge
weights to set |
critical_color, critical_width
|
Color and line width used to draw
critical edges. Defaults |
background_color, background_width
|
Color and line width used to draw
non-critical edges in the critical panel. Defaults |
vertex_size |
Vertex size passed to |
vertex_label |
Logical. If |
panel_titles |
Optional character vector of length
|
mfrow |
Optional integer vector of length 2 giving the
|
... |
Additional arguments forwarded to
|
The first length(populations) panels show the weighted
central graph of each population, with edge widths proportional to the
central-graph weights. The final panel binarises the chosen reference
central graph at threshold and overlays the critical edges
returned by identify_critical_links() in
critical_color. Critical edges that are absent from the binarised
reference graph are added so that they remain visible.
The graphics state (par(mfrow, mar, oma)) is restored on exit.
Invisibly returns NULL; called for its side effect of
producing the multi-panel plot.
identify_critical_links,
compute_central_graph,
get_critical_nodes.
set.seed(123) community_sizes <- c(4, 2, 3, 3, 5) control <- generate_category_graphs( n_graphs = 50, n_nodes = 17, n_communities = 5, community_sizes = community_sizes, base_intra_prob = rep(0.70, 5), base_inter_prob = 0.05, intra_prob_variation = 0.02, inter_prob_variation = 0.01) patient <- generate_category_graphs( n_graphs = 50, n_nodes = 17, n_communities = 5, community_sizes = community_sizes, base_intra_prob = c(0.40, 0.70, 0.70, 0.70, 0.70), base_inter_prob = 0.05, intra_prob_variation = 0.02, inter_prob_variation = 0.01) populations <- list(Control = control, Patient = patient) result <- identify_critical_links(populations, alpha = 0.05, method = "fisher", n_permutations = 200, seed = 1) communities <- rep(seq_along(community_sizes), times = community_sizes) plot_critical_edges(populations, result, communities = communities)set.seed(123) community_sizes <- c(4, 2, 3, 3, 5) control <- generate_category_graphs( n_graphs = 50, n_nodes = 17, n_communities = 5, community_sizes = community_sizes, base_intra_prob = rep(0.70, 5), base_inter_prob = 0.05, intra_prob_variation = 0.02, inter_prob_variation = 0.01) patient <- generate_category_graphs( n_graphs = 50, n_nodes = 17, n_communities = 5, community_sizes = community_sizes, base_intra_prob = c(0.40, 0.70, 0.70, 0.70, 0.70), base_inter_prob = 0.05, intra_prob_variation = 0.02, inter_prob_variation = 0.01) populations <- list(Control = control, Patient = patient) result <- identify_critical_links(populations, alpha = 0.05, method = "fisher", n_permutations = 200, seed = 1) communities <- rep(seq_along(community_sizes), times = community_sizes) plot_critical_edges(populations, result, communities = communities)
This function ranks edges based on their p-values obtained from statistical tests, ordering them from lowest to highest p-value (most to least significant).
rank_edges(edge_pvalues)rank_edges(edge_pvalues)
edge_pvalues |
A square matrix of p-values for each edge, typically obtained from |
A data frame with edges and their corresponding p-values, ordered from most significant.
# Generate synthetic populations control_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, seed = 1 ) patient_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.6, base_inter_prob = 0.4, seed = 2 ) populations <- list(Control = control_graphs, Patient = patient_graphs) # Compute edge frequencies frequencies <- compute_edge_frequencies(populations) edge_counts <- frequencies$edge_counts N <- sapply(populations, length) # Compute p-values for edge differences edge_pvalues <- compute_edge_pvalues(edge_counts, N) # Rank edges based on p-values edge_df <- rank_edges(edge_pvalues) # View the top ranked edges head(edge_df)# Generate synthetic populations control_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.8, base_inter_prob = 0.2, seed = 1 ) patient_graphs <- generate_category_graphs( n_graphs = 5, n_nodes = 10, n_communities = 2, base_intra_prob = 0.6, base_inter_prob = 0.4, seed = 2 ) populations <- list(Control = control_graphs, Patient = patient_graphs) # Compute edge frequencies frequencies <- compute_edge_frequencies(populations) edge_counts <- frequencies$edge_counts N <- sapply(populations, length) # Compute p-values for edge differences edge_pvalues <- compute_edge_pvalues(edge_counts, N) # Rank edges based on p-values edge_df <- rank_edges(edge_pvalues) # View the top ranked edges head(edge_df)