Retrieve a data frame of gene sets and their member genes.
The available species and collections can be checked with msigdbr_species()
and msigdbr_collections()
.
Examples
# get all human gene sets
# \donttest{
msigdbr(species = "Homo sapiens")
#> # A tibble: 4,029,754 × 15
#> gs_cat gs_subcat gs_name gene_symbol entrez_gene ensembl_gene
#> <chr> <chr> <chr> <chr> <int> <chr>
#> 1 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ABCC4 10257 ENSG00000125257
#> 2 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ABRAXAS2 23172 ENSG00000165660
#> 3 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ACTN4 81 ENSG00000130402
#> 4 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ACVR1 90 ENSG00000115170
#> 5 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ADAM9 8754 ENSG00000168615
#> 6 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ADAMTS5 11096 ENSG00000154736
#> 7 C3 MIR:MIR_LEGACY AAACCAC_MIR140 AMER2 219287 ENSG00000165566
#> 8 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ANK2 287 ENSG00000145362
#> 9 C3 MIR:MIR_LEGACY AAACCAC_MIR140 API5 8539 ENSG00000166181
#> 10 C3 MIR:MIR_LEGACY AAACCAC_MIR140 ATOSA 56204 ENSG00000047346
#> # ℹ 4,029,744 more rows
#> # ℹ 9 more variables: human_gene_symbol <chr>, human_entrez_gene <int>,
#> # human_ensembl_gene <chr>, gs_id <chr>, gs_pmid <chr>, gs_geoid <chr>,
#> # gs_exact_source <chr>, gs_url <chr>, gs_description <chr>
# }
# get mouse C2 (curated) CGP (chemical and genetic perturbations) gene sets
# \donttest{
msigdbr(species = "Mus musculus", category = "C2", subcategory = "CGP")
#> # A tibble: 378,935 × 18
#> gs_cat gs_subcat gs_name gene_symbol entrez_gene ensembl_gene
#> <chr> <chr> <chr> <chr> <int> <chr>
#> 1 C2 CGP ABBUD_LIF_SIGNALING_1_… Ahnak 66395 ENSMUSG0000…
#> 2 C2 CGP ABBUD_LIF_SIGNALING_1_… Alcam 11658 ENSMUSG0000…
#> 3 C2 CGP ABBUD_LIF_SIGNALING_1_… Ankrd40 71452 ENSMUSG0000…
#> 4 C2 CGP ABBUD_LIF_SIGNALING_1_… Arid1a 93760 ENSMUSG0000…
#> 5 C2 CGP ABBUD_LIF_SIGNALING_1_… Bckdhb 12040 ENSMUSG0000…
#> 6 C2 CGP ABBUD_LIF_SIGNALING_1_… AU021092 239691 ENSMUSG0000…
#> 7 C2 CGP ABBUD_LIF_SIGNALING_1_… Capn9 73647 ENSMUSG0000…
#> 8 C2 CGP ABBUD_LIF_SIGNALING_1_… Cd24a 12484 ENSMUSG0000…
#> 9 C2 CGP ABBUD_LIF_SIGNALING_1_… Cyfip1 20430 ENSMUSG0000…
#> 10 C2 CGP ABBUD_LIF_SIGNALING_1_… Dcaf11 28199 ENSMUSG0000…
#> # ℹ 378,925 more rows
#> # ℹ 12 more variables: human_gene_symbol <chr>, human_entrez_gene <int>,
#> # human_ensembl_gene <chr>, gs_id <chr>, gs_pmid <chr>, gs_geoid <chr>,
#> # gs_exact_source <chr>, gs_url <chr>, gs_description <chr>, taxon_id <int>,
#> # ortholog_sources <chr>, num_ortholog_sources <dbl>
# }