## 4.测试

# Plot multiple sequence alignment using ggplot2 with multiple color schemes supported.

# Supports visualizing multiple sequence alignment of DNA and protein sequences using ggplot2 It supports a number of colour schemes, including Chemistry, Clustal, Shapely, Taylor and Zappo. Multiple sequence alignment can easily be combined with other ‘ggplot2’ plots, such as aligning a phylogenetic tree produced by ‘ggtree’ with multiple sequence alignment.

### 4.1 Load sample data

# Three sample data are shipped with the ggmsa package. Note that ggmsa supports not only fasta files but other objects as well. available_msa()can be used to list MSA objects currently available.


# files currently available:

#  .fasta

# XStringSet objects from 'Biostrings' package:

#  DNAStringSet RNAStringSet AAStringSet BStringSet DNAMultipleAlignment RNAMultipleAlignment AAMultipleAlignment

# bin objects from 'seqmagick' package:

#  DNAbin AAbin

protein_sequences <- system.file("extdata", "sample.fasta", package = "ggmsa")

miRNA_sequences <- system.file("extdata", "seedSample.fa", package = "ggmsa")

nt_sequences <- system.file("extdata", "LeaderRepeat_All.fa", package = "ggmsa")


# [1] "C:/Users/lenovo/Documents/R/win-library/3.6/ggmsa"

# Visualizing Multiple Sequence Alignments #

### 4.2 The most simple code to use ggmsa:


#@ 简单绘制

ggmsa(protein_sequences, start = 265, end = 300)

#@ 调整参数,实现个性化绘制多序列比对图

ggmsa(protein_sequences, start = 265, end = 300, font = "TimesNewRoman", color = "Clustal", char_width = 0.8, none_bg = T, seq_name = T)

ggmsa(protein_sequences, start = 265, end = 300, font = "TimesNewRoman", color = "Chemistry_AA", char_width = 0.8, none_bg = F)

# Colour Schemes #


# color schemes for nucleotide sequences currently available:

#  Chemistry_NT Shapely_NT Taylor_NT Zappo_NT

# color schemes for AA sequences currently available:

#  Clustal Chemistry_AA Shapely_AA Zappo_AA Taylor_AA

### 4.3 Clustal X Colour Scheme(Default)

#@ This is an emulation of the default colourscheme used for alignments in Clustal X, a graphical interface for the ClustalW multiple sequence alignment program. Each residue in the alignment is assigned a colour if the amino acid profile of the alignment at that position meets some minimum criteria specific for the residue type.

ggmsa(protein_sequences, start = 320, end = 360, color = "Clustal")

### 4.4 Color by Chemistry

#@ Amino acids are colored according to their side chain chemistry:

ggmsa(protein_sequences, start = 320, end = 360, color = "Chemistry_AA")

### 4.5 Color by Shapely

#@ This color scheme matches the RasMol amino acid and RasMol nucleotide color schemes, which are, in turn, based on Robert Fletterick’s “Shapely models”.

ggmsa(protein_sequences, start = 320, end = 360, color = "Shapely_AA")

### 4.6 Color by Taylor

#@ This color scheme is taken from Taylor(Taylor 1997) and is also used in JalView(Waterhouse et al. 2009).

ggmsa(protein_sequences, start = 320, end = 360, color = "Taylor_AA")

### 4.7 Color by Zappo

#@ This scheme colors residues according to their physico-chemical properties, and is also used in JalView(Waterhouse et al. 2009).

ggmsa(protein_sequences, start = 320, end = 360, color = "Zappo_AA")

### 4.8 Font

#@ Several classic font for MSA are shipped in the package. In the same ways, you can use available_fonts() to list font currently available


# font families currently available:

# helvetical mono TimesNewRoman DroidSansMono

# helvetical

ggmsa(protein_sequences, start = 320, end = 360, font = "helvetical", color = "Chemistry_AA")

# TimesNewRoman

ggmsa(protein_sequences, start = 320, end = 360, font = "TimesNewRoman", color = "Chemistry_AA")

# DroidSansMono

ggmsa(protein_sequences, start = 320, end = 360, font = "DroidSansMono", color = "Chemistry_AA")

#@ If you specify font = NULL, only tiles will be plot.

ggmsa(protein_sequences, start = 320, end = 360, font = NULL, color = "Chemistry_AA", seq_name = F)

ggmsa(protein_sequences, start = 320, end = 360, font = NULL, color = "Chemistry_AA", seq_name = T)

### 4.9 Characters width

#@ Characters width can be specified by char_width. Defaults is 0.9.

ggmsa(protein_sequences, start = 320, end = 360, char_width = 0.5, color = "Chemistry_AA")

### 4.10 Background

#@ Background can be specified by none_bg. If none_bg = TRUE, only the character will be plot.

ggmsa(protein_sequences, start = 320, end = 360, none_bg = TRUE) + theme_void()

### 4.11 Position Highligthed

#@ Position Highligthed can be specified by posHighligthed. The none_bg = FALSE when you specified position Highligthed by posHighligthed

# 不连续高亮

ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA",

posHighligthed = c(185, 190))

ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA", posHighligthed = c(180, 190, 200))

# 连续高亮

ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA",

posHighligthed = c(180:200))

### 4.12 Sequence names

#@ Sequence names Defaults is ‘NULL’ which indicates that the sequence name is displayed when font = NULL, but ‘font = char’ will not be displayed. If seq_name = TRUE the sequence name will be displayed when you need it.

ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA", seq_name = TRUE)

#2 If seq_name = FALSE the sequence name will not be displayed in any case.

ggmsa(protein_sequences, 164, 213, font = NULL, color = "Chemistry_AA", seq_name = FALSE)

## 5.结束

# RUNRPTEST("./ggmsa", rpackage = "ggmsa",install_method = "website", rpackage_repository = "cran")


#@ 两篇参考文献,有兴趣的同学读一下

# Taylor, W R. 1997. “Residual Colours: A Proposal for Aminochromography.” Protein Eng 10 (7): 743–46.

# Waterhouse, A. M., J. B. Procter, D. M. Martin, M Clamp, and G. J. Barton. 2009. “Jalview Version 2–a Multiple Sequence Alignment Editor and Analysis Workbench.” Bioinformatics 25 (9): 1189.


