next up previous
Next: [1] objective Up: CHNOSZ examples Previous: more.aa

[3] read.expr

## Don't show: data(thermo)
thermo$obigt: 1809 aqueous, 3368 total species
## End(Don't show) ## overall oxidation state of proteins exclusively localized ## to cytoplasm of S. cerevisiae with/without abundance weighting y <- yeastgfp("cytoplasm") aa <- more.aa(y$protein, "Sce")
more.aa: YJL017W was not matched
aaavg <- aasum(aa, average=TRUE)
aasum: dropped 1 proteins with NA composition and/or abundance
ZC(protein.formula(aaavg))
[1] -0.1293854
# the average composition weighted by abundance waaavg <- aasum(aa, abundance=y$abundance, average=TRUE)
aasum: dropped 75 proteins with NA composition and/or abundance
ZC(protein.formula(waaavg))
[1] -0.1488026
## read.expr using one of the provided data files, ## from Ishihama et al., 2008 file <- system.file("extdata/abundance/ISR+08.csv.xz", package="CHNOSZ") # read all protein names and abundances in ID and emPAI columns # (emPAI - exponentially modified protein abundance index) expr <- read.expr(file, "ID", "emPAI") # scatter plot of average oxidation state and emPAI aa <- more.aa(expr$protein, "Eco")
more.aa: EFTU KPY1 ACO2 BLAT UP05 CATA PPCK K6P1 PGMU HLPA NUCD KPY2 OSTA RBB1 YLEA GNTY RMA1 PTGB UP14 YBIL YFIA YBHE YRBC TBPA LEU2 YIIU PTNA YFGB 6PG9 MULI SR54 YCCW YGGX GLR2 YDHD YFHQ YFGA YHBZ RFAE ACO1 F16P DP3B YLIG DP3A YCFF K6P2 KSGA YCBY TKRA PROC YDAO YFHF UP04 TRMU ADH3 RBD1 YRBD GIDA CN16 SYK3 TRC5 YADR YGGB GLR3 VACJ YHBC YTFG PTAA YFHP YRDC RS22 YQCD T2E1 YFHJ QOR HAM1 GIDB YRBB YECO RMA3 THI2 RRMJ ENGC DHAB CS32 YFCY PTTB YIEM CEA1 YGFE RMA2 DFP RFBC YCGC YCGT YDEW YHBG YEHZ YDCW YIJP TRI6 TRFA CEIA KEE1 YCDW PAB4 T341 AGAY YCGS were not matched
pf <- protein.formula(aa) zc <- ZC(pf) # note we specify ylim here that excludes some high-emPAI values plot(zc, expr$abundance, xlab=expr.property("ZC"), ylim=c(0, 90), ylab="emPAI", main="Proteins in E. coli cytosol\nAbundance vs oxidation state of carbon") legend("topleft", pch=1, legend="Ishihama et al., 2008")

Image readexpr1

 

# what if we just want kinases? # "description" is the name of the column where we search for "kinase" expr.kinase <- read.expr(file, "ID", "emPAI", list(description="kinase")) ## read.expr using a different data file, ## from Anderson and Anderson, 2003 file <- system.file("extdata/abundance/AA03.csv", package="CHNOSZ") # look for proteins described as "Complement" read.expr(file, "name", "log10(pg/ml)", list(description="Complement"))
$protein [1] "CO3" "CO4A" "CFAB" "CO9" "C1QA" "CO8B" "CO5" "CO6" "CO7" "CFAI" [11] "CO2" "CO3.C3a" "CO5.C5a" $abundance [1] 9.27 8.72 8.60 8.56 8.35 8.24 8.18 8.03 7.93 7.80 7.34 5.55 4.05
## speciation diagram for ER.to.Golgi proteins (COPII coat ## proteins) as a function of logfO2, after Dick, 2009 # add old parameters for [Met] sidechain to database add.obigt()
add.obigt: using default file: /home/jedick/R/x86_64-slackware-linux-gnu-library/3.3/CHNOSZ/extdata/thermo/OBIGT-2.csv add.obigt: read 305 rows; made 84 replacements, 221 additions, units = cal add.obigt: use data(thermo) to restore default database
y <- yeastgfp("ER.to.Golgi") # don't use those with NA abundance ina <- is.na(y$abundance) # get the amino acid compositions of the proteins aa <- more.aa(y$protein[!ina], "Sce") # add proteins to thermo$protein ip <- add.protein(aa)
add.protein: added 5 new protein(s) to thermo$protein
# use logarithms of activities of proteins such # that total activity of residues is unity pl <- protein.length(ip) logact <- unitize(rep(1, length(ip)), pl) # load the proteins basis("CHNOS+")
C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 69 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 68 -4 aq H2S 0 2 0 0 1 0 70 -7 aq O2 0 0 0 2 0 0 3095 -80 gas H+ 0 1 0 0 0 1 3 -7 aq
a <- affinity(O2=c(-80, -73), iprotein=ip, loga.protein=logact)
energy.args: temperature is 25 C energy.args: pressure is Psat energy.args: variable 1 is log_f(O2) at 128 values from -80 to -73 subcrt: 27 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet)
# make a speciation diagram e <- equilibrate(a, normalize=TRUE)
balance: coefficients are protein length equilibrate: balancing coefficients are 1273 929 297 876 2195 equilibrate: logarithm of total protein length is 0 equilibrate: using 'normalize' for molar formulas
diagram(e, ylim=c(-4.9, -2.9)) # where we are closest to experimental log activity logfO2 <- rep(-78, length(ip)) abline(v=logfO2[1], lty=3) # scale experimental abundances such that # total activity of residues is unity logact.expt <- unitize(log10(y$abundance[!ina]), pl) # plot experimental log activity points(logfO2, logact.expt, pch=16) text(logfO2+0.5, logact.expt, y$protein[!ina]) # add title title(main=paste("ER.to.Golgi; points - relative abundances", "from YeastGFP. Figure after Dick, 2009",sep="\n"))

Image readexpr2

 

# restore default thermodynamic database data(thermo)
thermo$obigt: 1809 aqueous, 3368 total species
############################# ## examples using stress() ## ############################# ## predominance fields for overall protein compositions ## induced and repressed in an/aerobic carbon limitation ## (experiments of Tai et al., 2005) # the activities of ammonium and sulfate # are similar to the non-growth-limiting concentrations # used by Boer et al., 2003 basis(c("glucose", "H2O", "NH4+", "hydrogen", "SO4-2", "H+"), c(-1, 0, -1.3, 999, -1.4, -7))
C H N O S Z ispecies logact state C6H12O6 6 12 0 6 0 0 1757 -1.0 aq H2O 0 2 0 1 0 0 1 0.0 liq NH4+ 0 4 1 0 0 1 18 -1.3 aq H2 0 2 0 0 0 0 3088 999.0 gas SO4-2 0 0 0 4 1 -2 24 -1.4 aq H+ 0 1 0 0 0 1 3 -7.0 aq
# the names of the experiments in thermo$stress expt <- c("Clim.aerobic.down", "Clim.aerobic.up", "Clim.anaerobic.down", "Clim.anaerobic.up") # here we use abundance to indicate that the protein # compositions should be summed together in equal amounts for(i in 1:length(expt)) { p <- stress(expt[i], "Sce") aa <- more.aa(p$protein, "Sce") aa <- aasum(aa, average=TRUE, protein=expt[i]) add.protein(aa) }
stress: found 48 proteins for Sce in Clim.aerobic.down from TBD+05 add.protein: added 1 new protein(s) to thermo$protein stress: found 145 proteins for Sce in Clim.aerobic.up from TBD+05 add.protein: added 1 new protein(s) to thermo$protein stress: found 166 proteins for Sce in Clim.anaerobic.down from TBD+05 add.protein: added 1 new protein(s) to thermo$protein stress: found 35 proteins for Sce in Clim.anaerobic.up from TBD+05 add.protein: added 1 new protein(s) to thermo$protein
species(expt, "Sce")
C6H12O6 H2O NH4+ H2 SO4-2 H+ ispecies logact state name 1 384.4827 -1692.895 606.875 254.4785 16.729 -573.417 3369 -3 aq Clim.aerobic.down_Sce 2 366.5690 -1597.538 590.469 218.6205 14.931 -560.607 3301 -3 aq Clim.aerobic.up_Sce 3 408.3423 -1771.524 659.199 241.9095 16.988 -625.223 3371 -3 aq Clim.anaerobic.down_Sce 4 233.6048 -1029.086 378.486 154.5135 10.600 -357.286 3372 -3 aq Clim.anaerobic.up_Sce
a <- affinity(C6H12O6=c(-30, 0), H2=c(-20, 0))
energy.args: temperature is 25 C energy.args: pressure is Psat energy.args: variable 1 is log_a(C6H12O6) at 128 values from -30 to 0 energy.args: variable 2 is log_f(H2) at 128 values from -20 to 0 subcrt: 10 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet)
d <- diagram(a, normalize=TRUE, fill=NULL)
balance: coefficients are protein length diagram: plotting A/2.303RT from affinity(), divided by balancing coefficients diagram: using 'normalize' in calculation of predominant species
title(main=paste("Relative stabilities of proteins observed in\n", "an/aerobic carbon limitation in yeast")) # the equilibrium distribution favors the proteins upregulated # by carbon limitation at low chemical potentials of C6H12O6 ... stopifnot(c(d$predominant[1,1], d$predominant[1,128])==grep("up", expt)) # ... and favors proteins downregulated by aerobic conditions # at high hydrogen fugacities stopifnot(c(d$predominant[128, 128], d$predominant[128, 1])==grep("down", expt))

Image readexpr3

 


next up previous
Next: [1] objective Up: CHNOSZ examples Previous: more.aa