next up previous
Next: hkf Up: CHNOSZ examples Previous: info

[1] protein.info

## Don't show: data(thermo)
thermo$obigt: 1809 aqueous, 3368 total species
## End(Don't show) ## example for chicken lysozyme C # index in thermo$protein ip <- iprotein("LYSC_CHICK") # amino acid composition ip2aa(ip)
protein organism ref abbrv chains Ala Cys Asp Glu Phe Gly His Ile Lys Leu Met Asn Pro Gln Arg Ser Thr 6 LYSC CHICK BBA+03 P00698 1 12 8 7 2 3 12 1 6 6 8 2 14 2 3 11 10 7 Val Trp Tyr 6 6 6 3
# length and chemical formula protein.length(ip)
[1] 129
protein.formula(ip)
C H N O S LYSC_CHICK 613 959 193 185 10
# formula, Gibbs energy, average oxidation state of carbon protein.info(ip)
aa2eos: found LYSC_CHICK (C613H959N193O185S10, 129 residues) subcrt: 1 species at 298.15 K and 1 bar (wet) protein length formula G Z G.Z ZC 1 LYSC_CHICK 129 C613H959N193O185S10 -4119738 NA NA 0.01631321
# as above, now with charge and Gibbs energy of ionized protein at pH 7 basis("CHNOS+")
C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 69 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 68 -4 aq H2S 0 2 0 0 1 0 70 -7 aq O2 0 0 0 2 0 0 3095 -80 gas H+ 0 1 0 0 0 1 3 -7 aq
protein.info(ip)
subcrt: 1 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet) protein length formula G Z G.Z ZC 1 LYSC_CHICK 129 C613H966.747394557784N193O185S10+7.74739455778439 -4119738 7.747395 -4332873 0.01631321
# group additivity for thermodynamic properties and HKF equation-of-state # parameters of non-ionized protein aa2eos(ip2aa(ip))
aa2eos: found LYSC_CHICK (C613H959N193O185S10, 129 residues) name abbrv formula state ref1 ref2 date G H S Cp V 1 LYSC_CHICK NA C613H959N193O185S10 aq BBA+03 NA NA -4119738 -10283083 4175.86 6415.553 10420.89 a1.a a2.b a3.c a4.d c1.e c2.f omega.lambda z.T 1 2512.58 345.88 450.87 -409.5 7768.7 -701.5 -7.94 0
# calculation of standard thermodynamic properties # (subcrt uses the species name, not ip) subcrt("LYSC_CHICK")
subcrt: 1 species at 15 values of T and P (wet) $species name formula state ispecies 3369 LYSC_CHICK C613H959N193O185S10 aq 3369 $out $out$LYSC_CHICK T P rho logK G H S V Cp 1 0.01 1.000000 0.9998289 3217.694 -4021787 -10423733 3684.406 10049.21 4409.319 2 25.00 1.000000 0.9970614 3019.795 -4119738 -10283083 4175.860 10420.95 6415.518 3 50.00 1.000000 0.9880295 2861.386 -4230950 -10113250 4722.506 10600.23 7073.981 4 75.00 1.000000 0.9748643 2734.284 -4286794 -9932209 5261.992 10708.15 7376.581 5 100.00 1.013220 0.9583926 2631.960 -4493864 -9745475 5779.892 10782.93 7548.444 6 125.00 2.320144 0.9390726 2549.241 -4644242 -9554920 6273.262 10840.94 7665.198 7 150.00 4.757169 0.9170577 2482.359 -4806358 -9361481 6742.892 10891.15 7760.881 8 175.00 8.918049 0.8923427 2428.309 -4979487 -9165264 7190.897 10940.05 7856.945 9 200.00 15.536499 0.8647434 2384.733 -5162925 -8965847 7620.111 10994.29 7974.628 10 225.00 25.478603 0.8338733 2349.762 -5356010 -8762181 8034.140 11063.40 8146.453 11 250.00 39.736493 0.7990719 2321.913 -5558139 -8552157 8438.039 11165.02 8439.891 12 275.00 59.431251 0.7592362 2300.011 -5768815 -8331338 8840.380 11338.89 9030.924 13 300.00 85.837843 0.7124075 2283.166 -5987602 -8088790 9259.928 11694.88 10513.444 14 325.00 120.457572 0.6545772 2270.819 -6215126 -7788738 9754.686 12644.61 15882.325 15 350.00 165.211289 0.5746875 2263.184 -6453120 -7222044 8584.664 17027.35 60917.017
# affinity calculation, protein identified by ip affinity(iprotein=ip)
energy.args: temperature is 25 C energy.args: pressure is Psat subcrt: 27 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet) $sout $sout$CO2 logK G 1 67.61986 -92250 $sout$water logK G 1 41.55247 -56687.71 $sout$NH3 logK G 1 4.678781 -6383 $sout$H2S logK G 1 4.891353 -6673 $sout$oxygen logK G 1 0 0 $sout$`H+` logK G 1 0 0 $sout$H2O_RESIDUE logK G 1 46.50121 -63439 $sout$Ala_RESIDUE logK G 1 18.59784 -25372 $sout$Cys_RESIDUE logK G 1 12.38122 -16891 $sout$Asp_RESIDUE logK G 1 79.94977 -109071 $sout$Glu_RESIDUE logK G 1 80.34706 -109613 $sout$Phe_RESIDUE logK G 1 -10.26796 14008 $sout$Gly_RESIDUE logK G 1 20.16575 -27511 $sout$His_RESIDUE logK G 1 -11.00976 15020 $sout$Ile_RESIDUE logK G 1 13.59727 -18550 $sout$Lys_RESIDUE logK G 1 11.56611 -15779 $sout$Leu_RESIDUE logK G 1 15.21795 -20761 $sout$Met_RESIDUE logK G 1 9.913842 -13524.9 $sout$Asn_RESIDUE logK G 1 45.4838 -62051 $sout$Pro_RESIDUE logK G 1 7.419493 -10122 $sout$Gln_RESIDUE logK G 1 46.06214 -62840 $sout$Arg_RESIDUE logK G 1 -7.467872 10188 $sout$Ser_RESIDUE logK G 1 44.33957 -60490 $sout$Thr_RESIDUE logK G 1 41.33571 -56392 $sout$Val_RESIDUE logK G 1 16.04698 -21892 $sout$Trp_RESIDUE logK G 1 -26.8405 36617 $sout$Tyr_RESIDUE logK G 1 20.78807 -28360 $property [1] "A" $basis C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 69 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 68 -4 aq H2S 0 2 0 0 1 0 70 -7 aq O2 0 0 0 2 0 0 3095 -80 gas H+ 0 1 0 0 0 1 3 -7 aq $species CO2 H2O NH3 H2S O2 H+ ispecies logact state name 1 613 180 193 10 -610.5 0 -6 -3 aq LYSC_CHICK $T [1] 298.15 $P [1] "Psat" $vars character(0) $vals $vals[[1]] [1] NA $values $values$`-6` [1] -598.5468
# affinity calculation, protein loaded as a species species("LYSC_CHICK")
CO2 H2O NH3 H2S O2 H+ ispecies logact state name 1 613 180 193 10 -610.5 0 3369 -3 aq LYSC_CHICK
affinity()
energy.args: temperature is 25 C energy.args: pressure is Psat subcrt: 7 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet) $sout $sout$CO2 logK G 1 67.61986 -92250 $sout$water logK G 1 41.55247 -56687.71 $sout$NH3 logK G 1 4.678781 -6383 $sout$H2S logK G 1 4.891353 -6673 $sout$oxygen logK G 1 0 0 $sout$`H+` logK G 1 0 0 $sout$LYSC_CHICK logK G 1 3019.795 -4119738 $property [1] "A" $basis C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 69 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 68 -4 aq H2S 0 2 0 0 1 0 70 -7 aq O2 0 0 0 2 0 0 3095 -80 gas H+ 0 1 0 0 0 1 3 -7 aq $species CO2 H2O NH3 H2S O2 H+ ispecies logact state name 1 613 180 193 10 -610.5 0 3369 -3 aq LYSC_CHICK $T [1] 298.15 $P [1] "Psat" $vars character(0) $vals $vals[[1]] [1] NA $values $values$`3369` [1] -598.5468
# NB: subcrt() only shows the properties of the non-ionized # protein, but affinity() uses the properties of the ionized # protein if the basis species have H+ ## these are all the same protein.formula("P53_PIG")
C H N O S P53_PIG 1873 2946 536 582 18
protein.formula(iprotein("P53_PIG"))
C H N O S P53_PIG 1873 2946 536 582 18
protein.formula(ip2aa(iprotein("P53_PIG")))
C H N O S P53_PIG 1873 2946 536 582 18
## steps in calculation of chemical activities of two proteins ## in metastable equilibrium, after Dick and Shock, 2011 protein <- iprotein(c("CSG_METVO", "CSG_METJA")) # clear out amino acid residues loaded by the example above # ( in affinity(iprotein=ip) ) data(thermo)
thermo$obigt: 1809 aqueous, 3368 total species
# load supplemental database to use "old" [Met] sidechain group add.obigt()
add.obigt: using default file: /home/jedick/R/x86_64-slackware-linux-gnu-library/3.3/CHNOSZ/extdata/thermo/OBIGT-2.csv add.obigt: read 305 rows; made 84 replacements, 221 additions, units = cal add.obigt: use data(thermo) to restore default database
# set up the basis species to those used in DS11 basis("CHNOS+")
C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 69 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 68 -4 aq H2S 0 2 0 0 1 0 70 -7 aq O2 0 0 0 2 0 0 3095 -80 gas H+ 0 1 0 0 0 1 3 -7 aq
# note this yields logaH2 = -4.657486 swap.basis("O2", "H2")
C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 69 -3.000000 aq H2O 0 2 0 1 0 0 1 0.000000 liq NH3 0 3 1 0 0 0 68 -4.000000 aq H2S 0 2 0 0 1 0 70 -7.000000 aq H2 0 2 0 0 0 0 66 -4.657486 aq H+ 0 1 0 0 0 1 3 -7.000000 aq
# demonstrate the steps of the equilibrium calculation protein.equil(protein, loga.protein=-3)
protein.equil: temperature from argument is 25 degrees C protein.equil: pH from thermo$basis is 7 protein.equil: [Met] is from reference DLH06 protein.equil [1]: first protein is CSG_METVO with length 553 protein.equil [1]: reaction to form nonionized protein from basis species has G0(cal/mol) of -47579819.1780865 protein.equil [1]: ionization reaction of protein has G0(cal/mol) of -95829.2021553493 protein.equil [1]: per residue, reaction to form ionized protein from basis species has G0/RT of -145.510751814671 protein.equil [1]: per residue, logQstar is 63.0052264992363 protein.equil [1]: per residue, Astar/RT = -G0/RT - 2.303logQstar is 0.435856496816172 check it! per residue, Astar/RT calculated using affinity() is 0.435856496816138 protein.equil [all]: lengths of all proteins are 553 530 protein.equil [all]: Astar/RT of all residue equivalents are 0.435856496816172 1.36498841398796 protein.equil [all]: sum of exp(Astar/RT) of all residue equivalents is 5.46196456637761 protein.equil [all]: equilibrium degrees of formation (alphas) of residue equivalents are 0.283100862875426 0.716899137124574 check it! alphas of residue equivalents from equilibrate() are 0.283100862875413 0.716899137124587 protein.equil [all]: for activity of proteins equal to 10^-3, total activity of residues is 10^0.0346284566253204 protein.equil [all]: log10 equilibrium activities of residue equivalents are -0.513430350302529 -0.109913485714633 protein.equil [all]: log10 equilibrium activities of proteins are -3.25615548160723 -2.83418928631542 check it! log10 eq'm activities of proteins from equilibrate() are -3.25615548160725 -2.83418928631541
## we can also look at the affinities # (Reaction 7, Dick and Shock, 2011) # A/2.303RT for protein at unit activity (A-star for the protein) a <- affinity(iprotein=protein[1], loga.protein=0)
energy.args: temperature is 25 C energy.args: pressure is Psat subcrt: 27 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet)
Astar.protein <- a$values[[1]] # divide affinity by protein length (A-star for the residue) pl <- protein.length(protein[1]) Astar.residue <- a$values[[1]]/pl # 0.1893, Eq. 11 # A/2.303RT per residue corresponding to protein activity of 10^-3 loga.residue <- log10(pl*10^-3) Aref.residue <- Astar.residue - loga.residue # 0.446, after Eq. 16 # A-star of the residue in natural log units (A/RT) log(10) * Astar.residue # 0.4359, after Eq. 23
[1] 0.4358565
## using protein.formula: average oxidation state of ## carbon of proteins from different organisms (Dick, 2014) # get amino acid compositions of microbial proteins # generated from the RefSeq database file <- system.file("extdata/refseq/protein_refseq.csv.xz", package="CHNOSZ") ip <- add.protein(read.aa(file))
add.protein: added 779 new protein(s) to thermo$protein
# only use those organisms with a certain # number of sequenced bases ip <- ip[as.numeric(thermo$protein$abbrv[ip]) > 50000] pf <- protein.formula(thermo$protein[ip, ]) zc <- ZC(pf) # the organism names we search for # "" matches all organisms terms <- c("Natr", "Halo", "Rhodo", "Acido", "Methylo", "Chloro", "Nitro", "Desulfo", "Geo", "Methano", "Thermo", "Pyro", "Sulfo", "Buchner", "") tps <- thermo$protein$ref[ip] plot(0, 0, xlim=c(1, 15), ylim=c(-0.3, -0.05), pch="", ylab=expression(italic(Z)[C]), xlab="", xaxt="n", mar=c(6, 3, 1, 1)) for(i in 1:length(terms)) { it <- grep(terms[i], tps) zct <- zc[it] points(jitter(rep(i, length(zct))), zct, pch=20) } terms[15] <- paste("all", length(ip)) axis(1, 1:15, terms, las=2) title(main=paste("Average oxidation state of carbon in proteins", "by taxID in NCBI RefSeq (after Dick, 2014)", sep="\n"))

Image proteininfo1

 


next up previous
Next: hkf Up: CHNOSZ examples Previous: info