next up previous
Next: ionize Up: CHNOSZ examples Previous: buffer

protein

proten>   ## Don't show:
proten> data(thermo)
thermo: loaded 1997 aqueous, 3089 total species to thermo$obigt
thermo: loaded 5264 proteins to thermo$ECO
thermo: loaded 6717 proteins to thermo$SGD
thermo: loaded 4155 localizations and 3570 abundances to thermo$yeastgfp

proten> ## End Don't show
proten>   ### Interaction with the 'protein'function
proten>
proten>   ## Thermodynamic properties of proteins
proten>   # get the composition of a protein
proten>   protein("BPT1_BOVIN")
  protein organism source  abbrv chains Ala Cys Asp Glu Phe Gly His Ile Lys Leu Met Asn Pro Gln Arg
4    BPT1    BOVIN BBA+03 P00974      1   6   6   2   2   4   6   0   2   4   2   1   3   4   1   6
  Ser Thr Val Trp Tyr
4   1   3   1   0   4

proten>   # retrieve the rownumber of a protein in thermo$protein
proten>   iprotein <- protein("LYSC","CHICK")

proten>   # calculate properties and parameters of aqueous protein
proten>   protein(iprotein)
protein: found LYSC_CHICK (C613H959N193O185S10, 129 residues)
        name abbrv             formula state source1 source2 date        G         H       S
1 LYSC_CHICK    NA C613H959N193O185S10    aq  BBA+03      NA   NA -4206050 -10369700 4175.86
        Cp        V      a1     a2     a3     a4     c1     c2 omega Z
1 6415.553 10420.89 2512.58 345.88 450.87 -409.5 7768.7 -701.5 -7.94 0

proten>   # of crystalline protein
proten>   protein(iprotein,"cr")
protein: found LYSC_CHICK (C613H959N193O185S10, 129 residues)
        name abbrv             formula state source1 source2 date  G         H       S     Cp
1 LYSC_CHICK    NA C613H959N193O185S10    cr  BBA+03      NA   NA NA -10196090 4553.83 4445.7
        V     a        b c  d  e  f lambda  T
1 10626.3 344.7 13707.34 0 NA NA NA      0 NA

proten>   # a call to info() causes the protein properties to
proten>   # be appended to thermo$obigt
proten>   info("LYSC_CHICK")
protein: found LYSC_CHICK (C613H959N193O185S10, 129 residues)
info: 3090 refers to LYSC_CHICK, C613H959N193O185S10 aq (BBA+03).

proten>   # thermodynamic properties can be calculated with subcrt()
proten>   subcrt("LYSC_CHICK")
subcrt: 1 species at 15 values of T and P (wet)
$species
           name             formula state ispecies
3090 LYSC_CHICK C613H959N193O185S10    aq     3090

$out
$out$LYSC_CHICK
        T          P       rho     logK        G         H         S        V        Cp
1    0.01   1.000000 0.9998289 3286.749 -4108100 -10510350  3684.406 10049.21  4409.319
2   25.00   1.000000 0.9970614 3083.063 -4206050 -10369700  4175.860 10420.95  6415.518
3   50.00   1.000000 0.9880295 2919.759 -4317263 -10199867  4722.506 10600.23  7073.981
4   75.00   1.000000 0.9748643 2788.465 -4442106 -10018826  5261.992 10708.15  7376.581
5  100.00   1.013220 0.9583926 2682.511 -4580176  -9832092  5779.892 10782.93  7548.444
6  125.00   2.320144 0.9390726 2596.618 -4730554  -9641537  6273.262 10840.94  7665.198
7  150.00   4.757169 0.9170577 2526.937 -4892670  -9448098  6742.892 10891.15  7760.881
8  175.00   8.918049 0.8923427 2470.400 -5065799  -9251881  7190.897 10940.05  7856.945
9  200.00  15.536499 0.8647434 2424.600 -5249238  -9052464  7620.111 10994.29  7974.628
10 225.00  25.478603 0.8338733 2387.629 -5442322  -8848798  8034.140 11063.40  8146.453
11 250.00  39.736493 0.7990719 2357.970 -5644451  -8638774  8438.039 11165.02  8439.891
12 275.00  59.431251 0.7592362 2334.423 -5855127  -8417956  8840.380 11338.89  9030.924
13 300.00  85.837843 0.7124075 2316.077 -6074053  -8175407  9259.928 11694.88 10513.444
14 325.00 120.457572 0.6545772 2302.355 -6301438  -7875355  9754.686 12644.61 15882.325
15 350.00 165.211289 0.5746875 2293.455 -6539432  -7308661 10654.664 17027.35 60917.017



proten>   ### Table of properties of some proteins
proten>   basis("CHNOS+")
    C H N O S Z ispecies logact state
CO2 1 0 0 2 0 0       69     -3    aq
H2O 0 2 0 1 0 0        1      0   liq
NH3 0 3 1 0 0 0       68     -4    aq
H2S 0 2 0 0 1 0       70     -7    aq
O2  0 0 0 2 0 0     2852    -80   gas
H+  0 1 0 0 0 1        3     -7    aq

proten>   species(c("LYSC_CHICK","CYC_BOVIN","MYG_HORSE","RNAS1_BOVIN"))
protein: found CYC_BOVIN (C517H825N143O150S4, 104 residues)
protein: found MYG_HORSE (C769H1212N210O218S2, 153 residues)
protein: found RNAS1_BOVIN (C575H909N171O193S12, 124 residues)

proten>   protein.info()
affinity: temperature is 25 C
energy.args: pressure is Psat
affinity: loading ionizable protein groups
subcrt: 27 species at 298.15 K and 1 bar (wet)
affinity: temperature is 25 C
energy.args: pressure is Psat
subcrt: 27 species at 298.15 K and 1 bar (wet)
info: 3090 refers to LYSC_CHICK, C613H959N193O185S10 aq (BBA+03).
info: 3091 refers to CYC_BOVIN, C517H825N143O150S4 aq (BBA+03).
info: 3092 refers to MYG_HORSE, C769H1212N210O218S2 aq (BBA+03).
info: 3093 refers to RNAS1_BOVIN, C575H909N171O193S12 aq (BBA+03).
protein.info: converting things ...
      protein length             formula        G    Z      G.Z     ZC
1  LYSC_CHICK    129 C613H959N193O185S10 -4206.05 7.75 -4419.19  0.016
2   CYC_BOVIN    104  C517H825N143O150S4 -3653.51 8.20 -3865.46 -0.170
3   MYG_HORSE    153 C769H1212N210O218S2 -5179.17 1.15 -5362.90 -0.185
4 RNAS1_BOVIN    124 C575H909N171O193S12 -4957.15 4.04 -5103.66  0.024

proten>   # the following gives the per-residue composition (i.e. formation
proten>   # reaction cofficients) for the ionized proteins
proten>   residue.info()
affinity: temperature is 25 C
energy.args: pressure is Psat
affinity: loading ionizable protein groups
subcrt: 27 species at 298.15 K and 1 bar (wet)
       CO2      H2O      NH3        H2S        O2         H+        name
1 4.751938 1.395349 1.496124 0.07751938 -4.732558 0.06005732  LYSC_CHICK
2 4.971154 1.865385 1.375000 0.03846154 -5.182692 0.07884153   CYC_BOVIN
3 5.026144 1.888889 1.372549 0.01307190 -5.258170 0.00752129   MYG_HORSE
4 4.637097 1.500000 1.379032 0.09677419 -4.608871 0.03257789 RNAS1_BOVIN

proten>   ## Protein Data from Online Sources
proten>   ## Not run:
proten> ##D     ## marked dontrun because it requires internet
proten> ##D     # this asks to search SWISS-Prot
proten> ##D     info("PRND_HUMAN")
proten> ##D     # an online search can also be started from the
proten> ##D     # "subcrt" function
proten> ##D     subcrt("SPRN_HUMAN")
proten> ##D
proten> ## End(Not run)  ## end dontrun
proten>
proten>   ## Inputting protein compositions
proten>   # make a new protein
proten>   protein("GGSGG","PROTEIN_TEST")
protein: added PROTEIN_TEST (length=5).
     protein organism source abbrv chains Ala Cys Asp Glu Phe Gly His Ile Lys Leu Met Asn Pro Gln
1100 PROTEIN     TEST   <NA>  <NA>      1   0   0   0   0   0   4   0   0   0   0   0   0   0   0
     Arg Ser Thr Val Trp Tyr
1100   0   1   0   0   0   0

proten>   # a sequence can be pasted into the command line:
proten>   # type this
proten>   protein("
proten+   # then paste the sequence
proten+   # and end the command by typing
proten+   ","PROTEIN_NEW")
protein: added PROTEIN_NEW (length=41).
     protein organism source abbrv chains Ala Cys Asp Glu Phe Gly His Ile Lys Leu Met Asn Pro Gln
1101 PROTEIN      NEW   <NA>  <NA>      1   3   2   3   8   0   1   3   1   0   0   2   6   2   1
     Arg Ser Thr Val Trp Tyr
1101   0   2   5   0   0   2

proten>   # or use whatever name you want (with an underscore).
proten>
proten>   ## Standard molal entropy of a protein reaction
proten>   basis("CHNOS")
    C H N O S ispecies logact state
CO2 1 0 0 2 0       69     -3    aq
H2O 0 2 0 1 0        1      0   liq
NH3 0 3 1 0 0       68     -4    aq
H2S 0 2 0 0 1       70     -7    aq
O2  0 0 0 2 0     2852    -80   gas

proten>   # here we provide the reaction coefficients of the
proten>   # proteins (per protein backbone); 'subcrt' function calculates
proten>   # the coefficients of the basis species in the reaction
proten>   t <- subcrt(c("CSG_METTL","CSG_METJA"),c(-1/530,1/530),
proten+     T=seq(0,350,length.out=50))
protein: found CSG_METTL (C2484H4000N638O844S7, 530 residues)
protein: found CSG_METJA (C2555H4032N640O865S14, 530 residues)
subcrt: 2 species at 50 values of T and P (wet)
subcrt: reaction is not balanced; it is missing this composition:
          C           H            N           O           S
 -0.1339623 -0.06037736 -0.003773585 -0.03962264 -0.01320755
subcrt: adding missing composition from basis definition and restarting...
subcrt: 7 species at 50 values of T and P (wet)

proten>   thermo.plot.new(xlim=range(t$out$T),ylim=range(t$out$S),
proten+     xlab=axis.label("T"),ylab=axis.label("DS0r"))

proten>   lines(t$out$T,t$out$S)

proten>   # do it at high pressure as well
proten>   t <- subcrt(c("CSG_METTL","CSG_METJA"),c(-1/530,1/530),
proten+     T=seq(0,350,length.out=50),P=3000)
subcrt: 2 species at 50 values of T and P (wet)
subcrt: reaction is not balanced; it is missing this composition:
          C           H            N           O           S
 -0.1339623 -0.06037736 -0.003773585 -0.03962264 -0.01320755
subcrt: adding missing composition from basis definition and restarting...
subcrt: 7 species at 50 values of T and P (wet)

proten>   lines(t$out$T,t$out$S,lty=2)

proten>   # label the plot
proten>   title(main=paste("Standard molal entropy\n",
proten+     "P = Psat (solid), P = 3000 bar (dashed)"))

proten>   t$reaction$coeff <- round(t$reaction$coeff,3)

proten>   d <- describe(t$reaction,
proten+     use.name=c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE))

proten>   text(170,-3,c2s(s2c(d,sep="=",move.sep=TRUE),sep="\n"),cex=0.8)

\begin{figure}\par
\includegraphics{pictures/protein1}
\par
\par
 
\end{figure}

proten>   ### Metastability calculations
proten>
proten>   ## subcellular homologs of yeast glutaredoxin
proten>   ## as a function of logfO2 - logaH2O, after Dick, 2009
proten>   basis("CHNOS+")
    C H N O S Z ispecies logact state
CO2 1 0 0 2 0 0       69     -3    aq
H2O 0 2 0 1 0 0        1      0   liq
NH3 0 3 1 0 0 0       68     -4    aq
H2S 0 2 0 0 1 0       70     -7    aq
O2  0 0 0 2 0 0     2852    -80   gas
H+  0 1 0 0 0 1        3     -7    aq

proten>   protein <- c("GLRX1","GLRX2","GLRX3","GLRX4","GLRX5")

proten>   loc <- c("(C)","(M)","(N)","(N)","(M)")

proten>   species(protein,"YEAST")
protein: found GLRX1_YEAST (C549H886N146O170S4, 110 residues)
protein: found GLRX2_YEAST (C715H1161N181O213S5, 143 residues)
protein: found GLRX3_YEAST (C1444H2195N371O463S10, 285 residues)
protein: found GLRX4_YEAST (C1226H1910N316O389S6, 244 residues)
protein: found GLRX5_YEAST (C762H1200N196O227S6, 150 residues)

proten>   t <- affinity(H2O=c(-10,0),O2=c(-85,-60))
affinity: temperature is 25 C
energy.args: pressure is Psat
energy.args: variable 1 is H2O at 128 increments from -10 to 0
energy.args: variable 2 is O2 at 128 increments from -85 to -60
affinity: loading ionizable protein groups
subcrt: 28 species at 298.15 K and 1 bar (wet)

proten>   diagram(t,names=paste(protein,loc))
diagram: immobile component is protein backbone group
diagram: conservation coefficients are 110 143 285 244 150
diagram: using residue equivalents

proten>   title(main=paste("Yeast glutaredoxins (black) and residues (blue)\n",
proten+     describe(thermo$basis[-c(2,5),])))

proten>   # note the difference when we set as.residue=TRUE to
proten>   # plot stability fields for the residue equivalents of the
proten>   # proteins instead of the proteins themselves ...
proten>   # the residue equivalent for one of the larger proteins appears
proten>   diagram(t,names=paste(protein,loc),as.residue=TRUE,
proten+     add=TRUE,col="blue")
diagram: immobile component is protein backbone group
diagram: conservation coefficients are 110 143 285 244 150
diagram: using residue equivalents

\begin{figure}\par
\includegraphics{pictures/protein2}
\par
\par
 
\end{figure}

proten>   ## surface-layer proteins from Methanococcus and others:
proten>   ## a speciation diagram for surface layer proteins
proten>   ## as a function of oxygen fugacity after Dick, 2008
proten>   # make our protein list
proten>   organisms <- c("METSC","METJA","METFE","HALJP","METVO",
proten+     "METBU","ACEKI","BACST","BACLI","AERSA")

proten>   proteins <- c(rep("CSG",6),rep("SLAP",4))

proten>   proteins <- paste(proteins,organisms,sep="_")

proten>   # set some graphical parameters
proten>   lwd <- c(rep(3,6),rep(1,4))

proten>   lty <- c(1:6,1:4)

proten>   # load the basis species and proteins
proten>   basis("CHNOS+")
    C H N O S Z ispecies logact state
CO2 1 0 0 2 0 0       69     -3    aq
H2O 0 2 0 1 0 0        1      0   liq
NH3 0 3 1 0 0 0       68     -4    aq
H2S 0 2 0 0 1 0       70     -7    aq
O2  0 0 0 2 0 0     2852    -80   gas
H+  0 1 0 0 0 1        3     -7    aq

proten>   species(proteins)
protein: found CSG_METSC (C2812H4405N747O872S16, 571 residues)
protein: found CSG_METFE (C2815H4411N747O872S14, 571 residues)
protein: found CSG_HALJP (C3669H5647N971O1488, 828 residues)
protein: found CSG_METVO (C2575H4097N645O884S11, 553 residues)
protein: found CSG_METBU (C1362H2111N355O442S4, 278 residues)
protein: found SLAP_ACEKI (C3584H5648N926O1138S4, 736 residues)
protein: found SLAP_BACST (C5676H9113N1489O1863S3, 1198 residues)
protein: found SLAP_BACLI (C3977H6396N1068O1286S2, 844 residues)
protein: found SLAP_AERSA (C2250H3580N618O716S2, 481 residues)

proten>   # calculate affinities
proten>   a <- affinity(O2=c(-100,-65))
affinity: temperature is 25 C
energy.args: pressure is Psat
energy.args: variable 1 is O2 at 128 increments from -100 to -65
affinity: loading ionizable protein groups
subcrt: 33 species at 298.15 K and 1 bar (wet)

proten>   # make diagram
proten>   d <- diagram(a,ylim=c(-5,-1),legend.x=NULL,lwd=lwd,
proten+     ylab=as.expression(quote(log~italic(a[j]))),yline=1.7)
diagram: immobile component is protein backbone group
diagram: conservation coefficients are 571 530 571 828 553 278 736 1198 844 481
diagram: using residue equivalents
diagram: log total activity of PBB (from species) is 0.8188854

proten>   # label diagram
proten>   text(-80,-1.9,"METJA")

proten>   text(-74.5,-1.9,"METVO")

proten>   text(-69,-1.9,"HALJP")

proten>   text(-78,-2.85,"METBU",cex=0.8,srt=-22)

proten>   text(-79,-3.15,"ACEKI",cex=0.8,srt=-25)

proten>   text(-81,-3.3,"METSC",cex=0.8,srt=-25)

proten>   text(-87,-3.1,"METFE",cex=0.8,srt=-17)

proten>   text(-79,-4.3,"BACST",cex=0.8)

proten>   text(-85.5,-4.7,"AERSA",cex=0.8,srt=38)

proten>   text(-87,-4.25,"BACLI",cex=0.8,srt=30)

proten>   # add water line
proten>   abline(v=-83.1,lty=2)

proten>   title(main=paste("Surface-layer proteins",
proten+     "After Dick, 2008",sep="\n"))

\begin{figure}\par
\includegraphics{pictures/protein3}
\par
\par
 
\end{figure}

proten>   ## relative metastabilities of bovine proteins,
proten>   ## including prion, as a function of temperature
proten>   ## along a glutathione redox buffer ...
proten>   ## prion protein is relatively favored at high temperature
proten>   mod.buffer("GSH-GSSG",c("GSH","GSSG"),logact=c(-3,-7))
mod.buffer: changed state and/or logact of GSH GSSG in GSH-GSSG buffer.

proten>   basis(c("CO2","H2O","NH4+","SO4-2","H2","H+"),
proten+     c(-1,0,-4,-4,"GSH-GSSG",-7))
      C H N O S  Z ispecies   logact state
CO2   1 0 0 2 0  0       69       -1    aq
H2O   0 2 0 1 0  0        1        0   liq
NH4+  0 4 1 0 0  1       18       -4    aq
SO4-2 0 0 0 4 1 -2       24       -4    aq
H2    0 2 0 0 0  0       66 GSH-GSSG    aq
H+    0 1 0 0 0  1        3       -7    aq

proten>   basis("CO2","gas")
      C H N O S  Z ispecies   logact state
CO2   1 0 0 2 0  0     2844       -1   gas
H2O   0 2 0 1 0  0        1        0   liq
NH4+  0 4 1 0 0  1       18       -4    aq
SO4-2 0 0 0 4 1 -2       24       -4    aq
H2    0 2 0 0 0  0       66 GSH-GSSG    aq
H+    0 1 0 0 0  1        3       -7    aq

proten>   species(c("CYC","RNAS1","BPT1","ALBU","INS","PRIO"),"BOVIN")
protein: found BPT1_BOVIN (C284H438N84O79S7, 58 residues)
protein: found ALBU_BOVIN (C2934H4615N781O897S39, 583 residues)
protein: found INS_BOVIN (C254H381N65O74S6, 51 residues)
protein: found PRIO_BOVIN (C1026H1512N318O305S9, 217 residues)

proten>   a <- affinity(T=c(0,200))
energy.args: pressure is Psat
energy.args: variable 1 is T at 128 increments from 273.15 to 473.15
affinity: loading buffer species
affinity: loading ionizable protein groups
subcrt: 31 species at 128 values of T and P (wet)
buffer: ( GSH-GSSG ) for activity of H2 (active), CO2 (conserved).

proten>   diagram(a,as.residue=TRUE,ylim=c(-2,0.5))
diagram: immobile component is protein backbone group
diagram: conservation coefficients are 104 124 58 583 51 217
diagram: using residue equivalents
diagram: log total activity of PBB (from species) is 0.05576046

proten>   title(main="Bovine proteins")

\begin{figure}\par
\includegraphics{pictures/protein4}
\par
\par
 
\end{figure}


next up previous
Next: ionize Up: CHNOSZ examples Previous: buffer