--- title: "Working with simple somatic mutations" author: "Sean Davis" date: "`r format(Sys.Date(), '%A, %B %d, %Y')`" always_allow_html: yes output: BiocStyle::html_document: df_print: paged toc_float: true abstract: > vignette: > %\VignetteIndexEntry{Somatic Mutation Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # Background # Workflow ```{r warning=FALSE,message=FALSE} library(GenomicDataCommons) library(tibble) ``` ## Genes and gene details ```{r} grep_fields('genes', 'symbol') ``` ```{r} head(available_values('genes','symbol')) ``` ```{r} tp53 = genes() |> GenomicDataCommons::filter(symbol=='TP53') |> results(size=10000) |> as_tibble() ``` ## ssms ```{r} ssms() |> GenomicDataCommons::filter( chromosome==paste0('chr',tp53$gene_chromosome[1]) & start_position > tp53$gene_start[1] & end_position < tp53$gene_end[1]) |> GenomicDataCommons::count() ``` ```{r} ssms() |> GenomicDataCommons::filter( consequence.transcript.gene.symbol %in% c('TP53')) |> GenomicDataCommons::count() ``` ## convert to VRanges ```{r warning=FALSE,message=FALSE} library(VariantAnnotation) vars = ssms() |> GenomicDataCommons::filter( consequence.transcript.gene.symbol %in% c('TP53')) |> GenomicDataCommons::results_all() |> as_tibble() ``` ```{r} vr = VRanges(seqnames = vars$chromosome, ranges = IRanges(start=vars$start_position, width=1), ref = vars$reference_allele, alt = vars$tumor_allele) ``` ```{r} ssm_occurrences() |> GenomicDataCommons::filter( ssm.consequence.transcript.gene.symbol %in% c('TP53')) |> GenomicDataCommons::count() ``` ```{r} var_samples = ssm_occurrences() |> GenomicDataCommons::filter( ssm.consequence.transcript.gene.symbol %in% c('TP53')) |> GenomicDataCommons::expand(c('case', 'ssm', 'case.project')) |> GenomicDataCommons::results_all() |> as_tibble() ``` ```{r} table(var_samples$case$disease_type) ``` ## OncoPrint ```{r} fnames <- files() |> GenomicDataCommons::filter( cases.project.project_id=='TCGA-SKCM' & data_format=='maf' & data_type=='Masked Somatic Mutation' & analysis.workflow_type == 'Aliquot Ensemble Somatic Variant Merging and Masking' ) |> results(size = 1) |> ids() |> gdcdata() ``` ```{r cache=TRUE} library(maftools) melanoma = read.maf(maf = fnames) ``` ```{r} maftools::oncoplot(melanoma) ```