### Install ggcoverage from GitHub Source: https://showteeth.github.io/ggcoverage/index.html Installs the ggcoverage package directly from its GitHub repository. This is recommended for the most up-to-date version. ```R # install.package("remotes") #In case you have not installed it. remotes::install_github("showteeth/ggcoverage") ``` -------------------------------- ### Example: Loading data and adding gene annotation Source: https://showteeth.github.io/ggcoverage/reference/geom_gene.html This example demonstrates loading track files and GTF data, then combining a basic coverage plot with gene annotations using geom_gene. ```R # library(ggcoverage) # library(utils) # library(rtracklayer) # meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") # sample.meta <- utils::read.csv(meta.file) # track folder # track.folder <- system.file("extdata", "RNA-seq", package = "ggcoverage") # load bigwig file # track.df <- LoadTrackFile( # track.folder = track.folder, format = "bw", # meta.info = sample.meta # ) # gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage") # gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf") # basic.coverage <- ggcoverage(data = track.df, color = "auto", range.position = "out") # basic.coverage + geom_gene(gtf.gr = gtf.gr) ``` -------------------------------- ### Install ggcoverage from CRAN and GitHub Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Install the ggcoverage package from CRAN or the latest version from GitHub. It is recommended to install from GitHub for more timely updates. Ensure dependencies like GenomeMatrix are installed if needed. ```R # install via CRAN (v0.7.1) # old version, it's better to install via Github install.packages("ggcoverage") # install via Github (v1.2.0) # install.package("remotes") #In case you have not installed it. # BiocManager::install("areyesq89/GenomeMatrix") # In case of possible dependency error remotes::install_github("showteeth/ggcoverage") ``` -------------------------------- ### Basic ggprotein Plot Example Source: https://showteeth.github.io/ggcoverage/reference/ggprotein.html Example of how to use the ggprotein function with essential arguments. Ensure the library is loaded and file paths are correctly specified. ```R # library(ggcoverage) # coverage.file <- system.file("extdata", "ProteProteomics", "MS_BSA_coverage.xlsx", package = "ggcoverage") # fasta.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.fasta", package = "ggcoverage") # protein.id = "sp|P02769|ALBU_BOVIN" # ggprotein(coverage.file = coverage.file, fasta.file = fasta.file, protein.id = protein.id) ``` -------------------------------- ### Example Usage of GetConsensusPeak Source: https://showteeth.github.io/ggcoverage/reference/GetConsensusPeak.html This example demonstrates how to use the GetConsensusPeak function with a specified peak file. Ensure the ggcoverage library is loaded and the peak file path is correctly set. ```R # library(ggcoverage) # peak.file <- system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage") # peak.df <- GetConsensusPeak(peak.file = peak.file) ``` -------------------------------- ### Install ggcoverage from CRAN Source: https://showteeth.github.io/ggcoverage/index.html Installs the ggcoverage package from the CRAN repository. Use this if you need a stable release. ```R install.packages("ggcoverage") ``` -------------------------------- ### Load Required Libraries Source: https://showteeth.github.io/ggcoverage/index.html Loads the necessary R libraries for using ggcoverage and related functionalities. Ensure these are installed before loading. ```R library("rtracklayer") library("ggcoverage") library("ggpattern") ``` -------------------------------- ### ggcoverage Example Usage Source: https://showteeth.github.io/ggcoverage/reference/ggcoverage.html Example of how to use the ggcoverage function with loaded track data. This demonstrates setting custom colors and adjusting the range position for the plot. ```R # library(ggcoverage) # library(utils) # library(rtracklayer) # meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") # sample.meta <- utils::read.csv(meta.file) # track folder # track.folder <- system.file("extdata", "RNA-seq", package = "ggcoverage") # load bigwig file # track.df <- LoadTrackFile(track.folder = track.folder, format = "bw",region = "chr14:21,677,306-21,737,601", # extend = 2000, meta.info = sample.meta) # gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage") # gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf") # ggcoverage(data = track.df, color = "auto", range.position = "out") ``` -------------------------------- ### Load ggcoverage and other libraries Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Load the ggcoverage package along with other necessary libraries such as rtracklayer, graphics, and ggpattern after installation. ```R library("rtracklayer") library("graphics") library("ggcoverage") library("ggpattern") ``` -------------------------------- ### Full Example: Plotting Coverage with CNV Annotation Source: https://showteeth.github.io/ggcoverage/reference/geom_cnv.html Demonstrates how to load CNV and coverage data, then plot them together using ggcoverage, geom_gc, geom_cnv, and geom_ideogram. Requires BSgenome and specific data files. ```R # library(ggcoverage) # library(utils) # library("BSgenome.Hsapiens.UCSC.hg19") # # prepare files # cnv.file <- system.file("extdata", "DNA-seq", "SRR054616_copynumber.txt", package = "ggcoverage") # track.file <- system.file("extdata", "DNA-seq", "SRR054616.bw", package = "ggcoverage") # # read CNV # cnv.df = read.table(file = cnv.file, sep = "\t", header = TRUE) # # load track # track.df = LoadTrackFile(track.file = track.file, format = "bw") # track.df$seqnames = paste0("chr", track.df$seqnames) # # plot # ggcoverage(data = track.df, color = "grey", region = "chr4:1-160000000", # mark.region = NULL, range.position = "out") + # geom_gc(bs.fa.seq=BSgenome.Hsapiens.UCSC.hg19) + # geom_cnv(cnv.df = cnv.df, bin.col = 3, cn.col = 4) + # geom_ideogram(genome = "hg19",plot.space = 0, highlight.centromere = TRUE) ``` -------------------------------- ### Basic geom_coverage Plot Example Source: https://showteeth.github.io/ggcoverage/reference/geom_coverage.html Demonstrates a basic usage of geom_coverage with loaded track data. Ensure track data is prepared using LoadTrackFile and ggplot2 is loaded. ```r # library(ggcoverage) # library(utils) # library(ggplot2) # meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") # sample.meta <- utils::read.csv(meta.file) # track folder # track.folder <- system.file("extdata", "RNA-seq", package = "ggcoverage") # load bigwig file # track.df <- LoadTrackFile( # track.folder = track.folder, format = "bw", # meta.info = sample.meta # ) # ggplot() + # geom_coverage(data = track.df, color = "auto", mark.region = NULL) ``` -------------------------------- ### Example: Add links to a coverage plot Source: https://showteeth.github.io/ggcoverage/reference/geom_link.html Demonstrates how to use geom_link to add links to a coverage plot. Requires a data frame with genomic region information and a specified link file. ```R library(ggcoverage) # create test dataframe (random) df <- data.frame( seqnames = "chr9", start = seq(from = 4000000, to = 5999000, by = 1000), end = seq(from = 4001000, to = 6000000, by = 1000), score = sample(1:100, 2000, replace = TRUE), Type = "Example", Group = "Example" ) # create plot ggcoverage( data = df, color = "grey", region = "chr9:4000000-6000000", mark.region = NULL, range.position = "out" ) + geom_link(link.file = link.file, file.type = "bedpe", show.rect = TRUE) #> Error in ggcoverage(data = df, color = "grey", region = "chr9:4000000-6000000", mark.region = NULL, range.position = "out"): #> 参数没有用(region = "chr9:4000000-6000000") ``` -------------------------------- ### Basic Protein Coverage Plot Example Source: https://showteeth.github.io/ggcoverage/reference/geom_protein.html Illustrates how to create a basic protein coverage plot using geom_peptide. Requires loading ggplot2 and ggcoverage libraries, specifying file paths for coverage and FASTA, and a unique protein ID. ```r # library(ggplot2) # library(ggcoverage) # coverage.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.xlsx", package = "ggcoverage") # fasta.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.fasta", package = "ggcoverage") # protein.id = "sp|P02769|ALBU_HUMAN" # ggplot() + # geom_peptide(coverage.file = coverage.file, fasta.file = fasta.file, protein.id = protein.id) ``` -------------------------------- ### Prepare Mark Region for Plotting Source: https://showteeth.github.io/ggcoverage/index.html Creates a data frame defining a specific region to be marked on the plot. Includes start, end coordinates, and a label for the region. ```R mark.region=data.frame(start=c(76822533), end=c(76823743), label=c("Promoter")) # check data mark.region ``` -------------------------------- ### Extracting Plot Data with GetPlotData Source: https://showteeth.github.io/ggcoverage/reference/GetPlotData.html Example of how to use GetPlotData to extract data from a ggcoverage plot object. Ensure the plot object is created first. ```R # cov.plot = ggcoverage(data = track.df, color = "auto", region = "chr18:76822285-76900000", # range.position = "out", mark.region=mark.region, show.mark.label = TRUE) # plot.data = GetPlotData(plot = cov.plot, layer.num=1) ``` -------------------------------- ### Basic usage of geom_transcript with ggcoverage Source: https://showteeth.github.io/ggcoverage/reference/geom_transcript.html This example demonstrates how to integrate geom_transcript with a basic ggcoverage plot. It requires loading track data, importing GTF file into a Granges object, and then combining the coverage plot with the transcript annotation. ```R library(ggcoverage) library(utils) library(rtracklayer) meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") sample.meta <- utils::read.csv(meta.file) track folder track.folder <- system.file("extdata", "RNA-seq", package = "ggcoverage") load bigwig file track.df <- LoadTrackFile( track.folder = track.folder, format = "bw", meta.info = sample.meta ) gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage") gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf") basic.coverage <- ggcoverage(data = track.df, color = "auto", range.position = "out") basic.coverage + geom_transcript(gtf.gr = gtf.gr, label.vjust = 1.5) ``` -------------------------------- ### Example Usage of geom_tad2 with ggcoverage Source: https://showteeth.github.io/ggcoverage/reference/geom_tad2.html Demonstrates how to use geom_tad2 to overlay a Hi-C contact map onto a genomic coverage plot. Requires loading HiCDataHumanIMR90 and preparing dataframes for both coverage and the contact matrix. ```R library(ggcoverage) library(HiCDataHumanIMR90) data(Dixon2012_IMR90, package = "HiCDataHumanIMR90") mat <- as.matrix(hic_imr90_40@.Data[[1]]@intdata) #> #> 载入需要的程辑包:HiTC #> 载入需要的程辑包:IRanges #> 载入需要的程辑包:BiocGenerics #> #> #> 载入程辑包:'BiocGenerics' #> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs #> The following objects are masked from 'package:base': #> #> anyDuplicated, append, as.data.frame, basename, cbind, colnames, #> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, #> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, #> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, #> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, #> union, unique, unsplit, which.max, which.min #> 载入需要的程辑包:S4Vectors #> 载入需要的程辑包:stats4 #> #> #> 载入程辑包:'S4Vectors' #> The following object is masked from 'package:base': #> #> expand.grid #> 载入需要的程辑包:GenomicRanges #> 载入需要的程辑包:GenomeInfoDb granges <- hic_imr90_40@.Data[[1]]@xgi # prepare coverage dataframe df <- data.frame( seqnames = "chr1", start = seq(from = 50000000, to = 59999000, by = 1000), end = seq(from = 50001000, to = 60000000, by = 1000), score = sample(1:100, 10000, replace = TRUE), Type = "Example", Group = "Example" ) # create plot ggcoverage( data = df, color = "grey", region = "chr1:50000000-56000000", mark.region = NULL, range.position = "out" ) + geom_tad2(matrix = log2(mat + 1), granges = granges, zlim = 5, color.palette = c("blue", "red")) #> Error in ggcoverage(data = df, color = "grey", region = "chr1:50000000-56000000", mark.region = NULL, range.position = "out"): 参数没有用(region = "chr1:50000000-56000000") ``` -------------------------------- ### Prepare Mark Region Data Frame Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Creates a data frame to define specific genomic regions for marking on the coverage plot. Includes start, end, and label for each region. ```R # create mark region mark.region=data.frame(start=c(76822533), end=c(76823743), label=c("Promoter")) # check data mark.region ``` -------------------------------- ### Annotate Protein Coverage with Features Source: https://showteeth.github.io/ggcoverage/reference/geom_feature.html Example of using geom_feature to add annotations to a protein coverage plot. Requires pre-generated protein coverage plot and a feature dataframe. ```R # library(ggcoverage) # coverage.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.xlsx", package = "ggcoverage") # fasta.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.fasta", package = "ggcoverage") # protein.id = "sp|P02769|ALBU_BOVIN" # protein.coverage = ggprotein(coverage.file = coverage.file, fasta.file = fasta.file, protein.id = protein.id) # feature.df = data.frame(ProteinID = protein.id, start = c(1, 19, 25), end = c(18, 24, 607), # Type = c("Signal", "Propeptide", "Chain")) # protein.coverage + # geom_feature(feature.df = feature.df, feature.color = c("#4d81be","#173b5e","#6a521d")) ``` -------------------------------- ### Filter and Prepare Mark Region Data Source: https://showteeth.github.io/ggcoverage/articles/CustomizeThePlot.html Filter the loaded track data to include specific sample types and define a mark region with start, end, and label for annotation. ```R track.df = track.df %>% dplyr::filter(Type %in% c("MCF7_ER_1", "MCF7_input")) # create mark region mark.region=data.frame(start=c(76822533), end=c(76823743), label=c("Promoter")) # check data mark.region ``` -------------------------------- ### Get Consensus Peaks from ChIP-seq Data Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Retrieves consensus peaks from ChIP-seq data using a peak file. This function requires the MSPC package and its installation path to be specified. ```R # load peak file peak.file <- system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage") # get consensus peak (do nothing when there is only one file) # notice: this step requires MSPC, specific the installation path with mspc.path peak.df <- GetConsensusPeak(peak.file = peak.file) ``` -------------------------------- ### Prepare sample metadata Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Prepares a data frame with sample metadata for use with ggcoverage. This is an R command. ```r # prepare metadata sample.meta = data.frame(SampleName=c('possorted_genome_bam'), Type = c("possorted_genome_bam"), Group = c("10x")) sample.meta ``` -------------------------------- ### List test data for sequential normalization Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Lists the files in the test directory for sequential normalization, showing BAM and BAI files. This is a shell command. ```bash # test the sequential normalization ls -lh ./test #> total 2.0G #> -rw-r--r--. 1 songyabing wanglab 3.9M May 26 16:44 SRR054616_rep3.bam.bai #> -rw-r--r--. 1 songyabing wanglab 3.9M May 26 16:44 SRR054616_rep2.bam.bai #> -rw-r--r--. 1 songyabing wanglab 3.9M May 26 16:44 SRR054616_rep1.bam.bai #> -rw-r--r--. 1 songyabing wanglab 646M May 26 16:44 SRR054616_rep3.bam #> -rw-r--r--. 1 songyabing wanglab 646M May 26 16:44 SRR054616_rep2.bam #> -rw-r--r--. 1 songyabing wanglab 646M May 26 16:44 SRR054616_rep1.bam ``` -------------------------------- ### Load ggcoverage library Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Loads the ggcoverage library. This is an R command. ```r library(ggcoverage) ``` -------------------------------- ### List test data for parallel normalization Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Lists the files in the test2 directory for parallel normalization, showing BAM and BAI files. This is a shell command. ```bash # test the parallel normalization ls -lh ./test2 #> total 2.0G #> -rw-r--r--. 1 songyabing wanglab 3.9M May 26 16:44 SRR054616_rep3.bam.bai #> -rw-r--r--. 1 songyabing wanglab 3.9M May 26 16:44 SRR054616_rep2.bam.bai #> -rw-r--r--. 1 songyabing wanglab 3.9M May 26 16:44 SRR054616_rep1.bam.bai #> -rw-r--r--. 1 songyabing wanglab 646M May 26 16:44 SRR054616_rep3.bam #> -rw-r--r--. 1 songyabing wanglab 646M May 26 16:44 SRR054616_rep2.bam #> -rw-r--r--. 1 songyabing wanglab 646M May 26 16:44 SRR054616_rep1.bam ``` -------------------------------- ### FormatTrack Source: https://showteeth.github.io/ggcoverage/reference/index.html Prepares input data for creating coverage plots. ```APIDOC ## FormatTrack ### Description Prepare Input for Creating Coverage Plot. ### Function Signature FormatTrack() ``` -------------------------------- ### Prepare Mark Regions for Annotation Source: https://showteeth.github.io/ggcoverage/index.html Creates a data frame defining specific genomic regions to be marked or annotated on the coverage plot. Each region has a start, end, and a label. ```R # create mark region mark.region=data.frame(start=c(21678900,21732001,21737590), end=c(21679900,21732400,21737650), label=c("M1", "M2", "M3")) # check data mark.region ``` -------------------------------- ### Highlight Position with SNV Source: https://showteeth.github.io/ggcoverage/index.html Creates a genomic coverage plot highlighting specific positions with SNVs. Similar setup to star marks but uses 'highlight' mark type. ```R ggcoverage(data = track.df, color = "grey", range.position = "out", single.nuc=T, rect.color = "white") + geom_base(bam.file = bam.file, bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19, mark.type = "highlight") + geom_ideogram(genome = "hg19",plot.space = 0) ``` -------------------------------- ### Add Protein Feature Annotations Source: https://showteeth.github.io/ggcoverage/index.html Adds custom feature annotations to a protein coverage plot. Define features in a dataframe with start, end, and type, then use geom_feature to plot them. ```r # protein feature obtained from UniProt protein.feature.df = data.frame(ProteinID = "sp|P02769|ALBU_BOVIN", start = c(1, 19, 25), end = c(18, 24, 607), Type = c("Signal", "Propeptide", "Chain")) # add annotation protein.coverage + geom_feature(feature.df = protein.feature.df, feature.color = c("#4d81be","#173b5e","#6a521d")) ``` -------------------------------- ### Define Sample Metadata Source: https://showteeth.github.io/ggcoverage/articles/CustomizeThePlot.html Create a data frame for sample metadata, including SampleName, Type, and Group. This is used to organize and label data tracks. ```R # sample metadata sample.meta = data.frame(SampleName=c('Chr18_MCF7_ER_1','Chr18_MCF7_ER_2','Chr18_MCF7_ER_3','Chr18_MCF7_input'), Type = c("MCF7_ER_1","MCF7_ER_2","MCF7_ER_3","MCF7_input"), Group = c("IP", "IP", "IP", "Input")) sample.meta ``` -------------------------------- ### List BAM file size Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Displays the size of a large BAM file. This is a shell command. ```bash ls -lh possorted_genome_bam.bam #> -rw-r--r--. 1 songyabing wanglab 27G 8月 31 2021 possorted_genome_bam.bam ``` -------------------------------- ### ggcoverage Source: https://showteeth.github.io/ggcoverage/reference/index.html Creates a coverage plot. ```APIDOC ## ggcoverage ### Description Create Coverage Plot. ### Function Signature ggcoverage() ``` -------------------------------- ### Add GC Content Annotation to a Coverage Plot Source: https://showteeth.github.io/ggcoverage/reference/geom_gc.html Example of adding GC content annotation to an existing coverage plot using geom_gc. Requires BSgenome object for GC calculation. ```R # library(ggcoverage) # library(utils) # library(rtracklayer) # library("BSgenome.Hsapiens.UCSC.hg19") # track folder # track.file <- system.file("extdata", "DNA-seq", "CNV_example.txt", package = "ggcoverage") # track.df <- utils::read.table(track.file, header = TRUE) # gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage") # gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf") # basic.coverage <- ggcoverage( # data = track.df, color = NULL, mark.region = NULL, # region = "chr4:61750000-62,700,000", range.position = "out" # ) # basic.coverage + geom_gc(bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19) ``` -------------------------------- ### Load BAM File for Single-Nucleotide Analysis Source: https://showteeth.github.io/ggcoverage/index.html Loads a BAM file for single-nucleotide level analysis, specifying a region of interest. Requires sample metadata. ```R # prepare sample metadata sample.meta <- data.frame( SampleName = c("tumorA.chr4.selected"), Type = c("tumorA"), Group = c("tumorA") ) # load bam file bam.file = system.file("extdata", "DNA-seq", "tumorA.chr4.selected.bam", package = "ggcoverage") track.df <- LoadTrackFile( track.file = bam.file, meta.info = sample.meta, single.nuc=TRUE, single.nuc.region="chr4:62474235-62474295") head(track.df) ``` -------------------------------- ### Create Joint View Plot - Every Sample Source: https://showteeth.github.io/ggcoverage/index.html Generates a line plot for every sample, faceting and coloring by sample type. Ensure 'track.df', 'mark.region' are defined. ```R basic.coverage = ggcoverage(data = track.df, color = "auto", plot.type = "joint", facet.key = "Type", group.key = "Type", mark.region = mark.region, range.position = "out") basic.coverage ``` -------------------------------- ### geom_gc Function Source: https://showteeth.github.io/ggcoverage/reference/geom_gc.html The geom_gc function adds GC content annotation to a coverage plot. It allows customization of the genome FASTA file, BSgenome object, chromosome splitting, guide lines, colors, and plot spacing. ```APIDOC ## geom_gc ### Description Adds GC Content Annotation to Coverage Plot. ### Arguments - **fa.file** (character) - Genome fasta file. Default: NULL. - **bs.fa.seq** (BSgenome object) - BSgenome for species. Default: NULL. - **chr.split** (character) - Split between chromosome name and description in `fa.file`. Default: "[[:space:]]". - **guide.line** (numeric) - GC content guide line. Default: NULL (use mean GC content). - **line.color** (character) - GC line color. Default: "black". - **guide.line.color** (character) - The color of guide line. Default: "red". - **guide.line.type** (character) - The line type of guide line. Default: "dashed". - **plot.space** (numeric) - Top and bottom margin. Default: 0.1. - **plot.height** (numeric) - The relative height of GC content annotation to coverage plot. Default: 0.2. ### Value Plot. ### Examples ```R # library(ggcoverage) # library(utils) # library(rtracklayer) # library("BSgenome.Hsapiens.UCSC.hg19") # track folder # track.file <- system.file("extdata", "DNA-seq", "CNV_example.txt", package = "ggcoverage") # track.df <- utils::read.table(track.file, header = TRUE) # gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage") # gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf") # basic.coverage <- ggcoverage( # data = track.df, color = NULL, mark.region = NULL, # region = "chr4:61750000-62,700,000", range.position = "out" # ) # basic.coverage + geom_gc(bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19) ``` ``` -------------------------------- ### Load Track File to Dataframe Source: https://showteeth.github.io/ggcoverage/reference/LoadTrackFile.html Demonstrates loading a bigwig file into a dataframe using LoadTrackFile. Requires specifying the track folder, format, region, and metadata. ```R library(ggcoverage) library(utils) meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") sample.meta <- utils::read.csv(meta.file) # track folder track.folder <- system.file("extdata", "RNA-seq", package = "ggcoverage") # load bigwig file track.df <- LoadTrackFile( track.folder = track.folder, format = "bw", region = "chr14:21,677,306-21,737,601", extend = 2000, meta.info = sample.meta ) ``` -------------------------------- ### Load and Inspect Coverage Data Source: https://showteeth.github.io/ggcoverage/index.html Loads and displays the first few rows of a protein coverage dataframe. Ensure the 'coverage.df' object is available in your environment. ```r head(coverage.df) ``` -------------------------------- ### LoadTrackFile Source: https://showteeth.github.io/ggcoverage/reference/index.html Loads track file data into a dataframe. ```APIDOC ## LoadTrackFile ### Description Load Track File to Dataframe. ### Function Signature LoadTrackFile() ``` -------------------------------- ### Add peak annotation to a coverage plot Source: https://showteeth.github.io/ggcoverage/reference/geom_peak.html This example demonstrates how to add peak annotations to an existing coverage plot using geom_peak. It requires loading track files, optionally importing GTF data for gene annotations, and defining a coverage plot before adding the peak layer. ```R # library(ggcoverage) # library(rtracklayer) # sample.meta <- data.frame( # SampleName = c("Chr18_MCF7_ER_1", "Chr18_MCF7_ER_2", "Chr18_MCF7_ER_3", "Chr18_MCF7_input"), # Type = c("MCF7_ER_1", "MCF7_ER_2", "MCF7_ER_3", "MCF7_input"), # Group = c("IP", "IP", "IP", "Input") # ) # track folder # track.folder <- system.file("extdata", "ChIP-seq", package = "ggcoverage") # load bigwig file # track.df <- LoadTrackFile( # track.folder = track.folder, format = "bw", # meta.info = sample.meta # ) # gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage") # gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf") # create mark region # mark.region <- data.frame(start = c(76822533), end = c(76823743), label = c("Promoter")) # basic.coverage <- ggcoverage( # data = track.df, color = "auto", region = "chr18:76822285-76900000", # mark.region = mark.region, show.mark.label = FALSE # ) # get consensus peak file # peak.file <- system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage") # basic.coverage + geom_gene(gtf.gr = gtf.gr) + geom_peak(bed.file = peak.file) ``` -------------------------------- ### Load Genomic Tracks in Parallel Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Loads genomic tracks from BAM files using parallel processing with multiple cores. This demonstrates performance gains over sequential loading. ```R sample.meta = data.frame(SampleName=c('SRR054616_rep1','SRR054616_rep2','SRR054616_rep3'), Type = c("SRR054616_rep1","SRR054616_rep2","SRR054616_rep3"), Group = c("rep1", "rep2", "rep3")) sample.meta track.folder = "./test2" # run with three cores system.time(track.df <- LoadTrackFile( track.folder = track.folder, format = "bam", norm.method = "RPKM", region = "14:21,677,306-21,737,601", bamcoverage.path = "~/anaconda3/bin/bamCoverage", extend = 2000, meta.info = sample.meta, n.cores = 3 )) ``` -------------------------------- ### Load Mass Spectrometry Coverage Data Source: https://showteeth.github.io/ggcoverage/index.html Loads protein coverage data exported from Proteome Discoverer using the openxlsx package. This prepares the data for quality assessment and analysis of protein coverage in mass spectrometry experiments. ```R library(openxlsx) # prepare coverage dataframe coverage.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.xlsx", package = "ggcoverage") coverage.df <- openxlsx::read.xlsx(coverage.file) ``` -------------------------------- ### Basic Coverage Plot for DNA-seq Data Source: https://showteeth.github.io/ggcoverage/index.html Creates a basic grey coverage plot for DNA-seq data. Ensure 'track.df' is loaded. ```R basic.coverage = ggcoverage(data = track.df,color = "grey", mark.region = NULL, range.position = "out") basic.coverage ``` -------------------------------- ### Display head of track data frame Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Displays the first few rows of the loaded track data frame. This is an R command. ```r head(track.df) ``` -------------------------------- ### Load Mass Spectrometry Protein Coverage Data Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Load protein coverage data from an Excel file using the openxlsx package. The head() function is used to display the first few rows of the loaded data. ```r library(openxlsx) # prepare coverage dataframe coverage.file <- system.file("extdata", "Proteomics", "MS_BSA_coverage.xlsx", package = "ggcoverage") coverage.df <- openxlsx::read.xlsx(coverage.file) # check the data head(coverage.df) ``` -------------------------------- ### ggprotein Source: https://showteeth.github.io/ggcoverage/reference/index.html Creates a mass spectrometry protein coverage plot. ```APIDOC ## ggprotein ### Description Create Mass Spectrometry Protein Coverage Plot. ### Function Signature ggprotein() ``` -------------------------------- ### Load BAM track file for a specific region Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Loads a BAM track file for a specified region, optimizing for memory and time by not loading the entire file. The 'norm.method = "None"' indicates no normalization is applied. This is an R command. ```r # prepare track folder track.folder = '~/projects/ggcoverage' # load the track # region length: 3631 system.time(track.df <- LoadTrackFile( track.folder = track.folder, format = "bam", norm.method = "None", region = "chr11:118339075-118342705", extend = 0, meta.info = sample.meta )) ``` -------------------------------- ### Add Base Annotation with Highlight Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Creates a ggcoverage plot with single nucleotide resolution, adding base annotations with a 'highlight' style. Requires BAM file and genome data. ```R # highlight ggcoverage(data = track.df, color = "grey", range.position = "out", single.nuc=T, rect.color = "white") + geom_base(bam.file = bam.file, bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19, mark.type = "highlight") + geom_ideogram(genome = "hg19",plot.space = 0) ``` -------------------------------- ### Create basic coverage plot Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Generates a basic coverage plot from a track data frame. The running time is noted as very small. This is an R command. ```r # create basic coverage plot # the running time is very small system.time(basic.coverage <- ggcoverage(data = track.df, color = "red", range.position = "out", show.mark.label = FALSE)) ``` -------------------------------- ### Load DNA-seq Data from BigWig File Source: https://showteeth.github.io/ggcoverage/index.html Loads DNA-seq coverage data from a BigWig file using 'LoadTrackFile'. Handles potential missing metadata and adds 'chr' prefix to sequence names. ```R # track file track.file <- system.file("extdata", "DNA-seq", "SRR054616.bw", package = "ggcoverage") # load track track.df = LoadTrackFile(track.file = track.file, format = "bw", region = "4:1-160000000") # add chr prefix track.df$seqnames = paste0("chr", track.df$seqnames) ``` -------------------------------- ### Load Libraries for ggcoverage Source: https://showteeth.github.io/ggcoverage/articles/CustomizeThePlot.html Load necessary libraries including ggplot2, patchwork, tidyverse, ggcoverage, and rtracklayer for data manipulation and plotting. ```R library(ggplot2) library(patchwork) library(tidyverse) library(ggcoverage) library(rtracklayer) ``` -------------------------------- ### Load BAM File for Single Nucleotide Analysis Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Loads a BAM file for single nucleotide analysis, specifying a region and metadata. Displays the head of the loaded track data. ```R # prepare sample metadata sample.meta <- data.frame( SampleName = c("tumorA.chr4.selected"), Type = c("tumorA"), Group = c("tumorA") ) # load bam file bam.file = system.file("extdata", "DNA-seq", "tumorA.chr4.selected.bam", package = "ggcoverage") track.df <- LoadTrackFile( track.file = bam.file, meta.info = sample.meta, single.nuc=TRUE, single.nuc.region="chr4:62474235-62474295") head(track.df) #> seqnames start end score Type Group #> 1 chr4 62474235 62474236 5 tumorA tumorA #> 2 chr4 62474236 62474237 5 tumorA tumorA #> 3 chr4 62474237 62474238 5 tumorA tumorA #> 4 chr4 62474238 62474239 6 tumorA tumorA #> 5 chr4 62474239 62474240 6 tumorA tumorA #> 6 chr4 62474240 62474241 6 tumorA tumorA ``` -------------------------------- ### Load BAM track file for a larger region Source: https://showteeth.github.io/ggcoverage/articles/TimeAndMemory.html Loads a BAM track file for a larger specified region, demonstrating the time taken. The 'norm.method = "None"' indicates no normalization is applied. This is an R command. ```r # region length: 203631 system.time(LoadTrackFile( track.folder = track.folder, format = "bam", norm.method = "None", region = "chr11:118339075-118542705", extend = 0, meta.info = sample.meta )) ``` -------------------------------- ### Generate Protein Coverage Plot Source: https://showteeth.github.io/ggcoverage/index.html Creates a basic protein coverage plot using ggprotein. Requires specifying the coverage file, FASTA file, protein ID, and range position setting. ```r protein.coverage = ggprotein(coverage.file = coverage.file, fasta.file = fasta.file, protein.id = "sp|P02769|ALBU_BOVIN", range.position = "out") protein.coverage ``` -------------------------------- ### theme_protein2() Source: https://showteeth.github.io/ggcoverage/reference/theme_protein2.html Applies a predefined theme suitable for protein visualizations using geom_protein. ```APIDOC ## theme_protein2() ### Description Applies a theme to geom_protein plots. ### Usage ```R theme_protein2() ``` ### Value Returns a list of layers that constitute the theme. ``` -------------------------------- ### Load BigWig Tracks with Metadata Source: https://showteeth.github.io/ggcoverage/articles/CustomizeThePlot.html Load BigWig files from a specified folder, filtering by region and applying sample metadata. This function prepares coverage data for plotting. ```R # track folder track.folder = system.file("extdata", "ChIP-seq", package = "ggcoverage") # load bigwig file track.df = LoadTrackFile(track.folder = track.folder, format = "bw", region = "chr18:76822285-76900000", meta.info = sample.meta) # check data head(track.df) ``` -------------------------------- ### FormatTrack Function Source: https://showteeth.github.io/ggcoverage/reference/FormatTrack.html Prepares input data for creating coverage plots by specifying the region, gene information, and extension length. ```APIDOC ## FormatTrack ### Description Prepares input for creating coverage plot. ### Arguments - **data** (dataframe) - Track dataframe loaded by `LoadTrackFile`. - **region** (string) - Region used to create coverage plot, eg: chr14:21,677,306-21,737,601 or chr14:21,677,306. Default: "chr14:21,677,306-21,737,601". - **gtf.gr** (Granges object) - Granges object of GTF, created with `import.gff`. Default: NULL. - **gene.name** (string) - The name of gene. Default: "HNRNPC". - **gene.name.type** (string) - Gene name type (filed of `gtf.gr`), chosen from gene_name and gene_id. Default: "gene_name". - **extend** (integer) - Extend length of `region`. Default: 2000. ### Value A dataframe. ``` -------------------------------- ### theme_coverage2 Source: https://showteeth.github.io/ggcoverage/reference/index.html Applies an alternative theme for geom_coverage. ```APIDOC ## theme_coverage2 ### Description Theme for geom_coverage. ### Function Signature theme_coverage2() ``` -------------------------------- ### Create Basic Coverage Plot Source: https://showteeth.github.io/ggcoverage/index.html Generates a basic coverage plot from a data frame. Use this for a quick overview of genomic coverage. ```R basic.coverage = ggcoverage(data = track.df, color = "grey", mark.region = NULL, range.position = "out") basic.coverage ``` -------------------------------- ### theme_coverage Source: https://showteeth.github.io/ggcoverage/reference/index.html Applies a theme for geom_coverage. ```APIDOC ## theme_coverage ### Description Theme for geom_coverage. ### Function Signature theme_coverage() ``` -------------------------------- ### theme_tad2 Source: https://showteeth.github.io/ggcoverage/reference/index.html Applies a theme for geom_tad2. ```APIDOC ## theme_tad2 ### Description Theme for geom_tad2. ### Function Signature theme_tad2() ``` -------------------------------- ### theme_aa Source: https://showteeth.github.io/ggcoverage/reference/index.html Applies a theme for geom_base with amino acid information. ```APIDOC ## theme_aa ### Description Theme for geom_base with Amino Acid. ### Function Signature theme_aa() ``` -------------------------------- ### Load RNA-seq Metadata Source: https://showteeth.github.io/ggcoverage/index.html Loads the metadata file for RNA-seq samples. This file typically contains sample names, types, and groups. ```R # load metadata meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") sample.meta = read.csv(meta.file) sample.meta ``` -------------------------------- ### Prepare Protein Set from FASTA Source: https://showteeth.github.io/ggcoverage/index.html Reads protein sequences from a FASTA file into an AAStringSet object using the Biostrings package. This is a prerequisite for generating coverage plots. ```r library(Biostrings) fasta.file <- system.file("extdata", "Proteins", "MS_BSA_coverage.fasta", package = "ggcoverage") protein.set <- Biostrings::readAAStringSet(fasta.file) protein.set ``` -------------------------------- ### Load ChIP-seq Track Files Source: https://showteeth.github.io/ggcoverage/index.html Loads bigwig files for ChIP-seq analysis into a data frame. Specifies the track folder, format, and genomic region. Requires sample metadata. ```R track.folder = system.file("extdata", "ChIP-seq", package = "ggcoverage") # load bigwig file track.df = LoadTrackFile(track.folder = track.folder, format = "bw", region = "chr18:76822285-76900000", meta.info = sample.meta) # check data head(track.df) ``` -------------------------------- ### Load Protein FASTA Sequence Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Load protein sequences from a FASTA file using the Biostrings package. This prepares the protein set for further analysis. ```r library(Biostrings) # prepare track dataframe protein.set <- Biostrings::readAAStringSet(fasta.file) ``` -------------------------------- ### Create Metadata Data Frame Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Creates a data frame to store metadata for ChIP-seq samples, including sample names, types, and groups. This is a prerequisite for loading track files. ```R # load metadata sample.meta = data.frame(SampleName=c('Chr18_MCF7_ER_1','Chr18_MCF7_ER_2','Chr18_MCF7_ER_3','Chr18_MCF7_input'), Type = c("MCF7_ER_1","MCF7_ER_2","MCF7_ER_3","MCF7_input"), Group = c("IP", "IP", "IP", "Input")) sample.meta ``` -------------------------------- ### Display Amino Acid Annotation Color Scheme Source: https://showteeth.github.io/ggcoverage/articles/ggcoverage.html Visualizes a default color scheme for amino acid annotations based on Residual colours. Uses base R graphics for image display. ```R aa.color = c( "D" = "#FF0000", "S" = "#FF2400", "T" = "#E34234", "G" = "#FF8000", "P" = "#F28500", "C" = "#FFFF00", "A" = "#FDFF00", "V" = "#E3FF00", "I" = "#C0FF00", "L" = "#89318C", "M" = "#00FF00", "F" = "#50C878", "Y" = "#30D5C8", "W" = "#00FFFF", "H" = "#0F2CB3", "R" = "#0000FF", "K" = "#4b0082", "N" = "#800080", "Q" = "#FF00FF", "E" = "#8F00FF", "*" = "#FFC0CB", " " = "#FFFFFF", " " = "#FFFFFF", " " = "#FFFFFF", " " = "#FFFFFF") graphics::par(mar = c(1, 5, 1, 1)) graphics::image( 1:5, 1:5, matrix(1:length(aa.color),nrow=5), col = rev(aa.color), xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n" ) graphics::text(expand.grid(1:5,1:5), names(rev(aa.color))) graphics::mtext( text = "Amino acids", adj = 1, las = 1, side = 2 ) ``` ```R # reset par default graphics::par(opar) ```