Jason E. Aten, Tova F. Fuller and Steve Horvath
# Set the working directory. setwd("/Users/TovaFuller/Documents/HorvathLab2007/Causality") library(impute)
# Source network functions source("/Users/TovaFuller/Documents/HorvathLab2007/Jason Code/neo.txt")
# OUTLINE OF THE FOLLOWING ANALYSIS
# 1. Find markers that are consistent with Insig1 being upstream of Fdft1 and Dhcr7.
# 2. Build a minimal model for the genetic control of Insig1.
# 3. Screen for novel genes downstream of Insig1 by repeating the Single Marker
# Analysis this time holding the markers fixed.
# For brevity of loading / downloading this data is saved as an .rdat file; please read it in
# below. As a csv file this data would take ~20 minutes to read in versus an image file
# which will load much more quickly. a=load("liver.snps.23388genes.clinical.bxh.male.and.female.rdat")
# a contains two variables: a
# [1] "str.me" "liver.bxh.male.female"
#Let's look at the dimensions of the liver data: dim(liver.bxh.male.female)
#[1] 265 24714 str(str.me)
#List of 4
# $ snpcosl : int [1:1278] 1 2 3 4 5 6 7 8 9 10 ...
# $ genecols : int [1:23388] 1279 1280 1281 1282 1283 1284 1285
#1286 1287 1288 ...
# $ sex : num 24667
# $ clinical.cols: int [1:47] 24668 24669 24670 24671 24672 24673 24674
#24675 24676 24677 ...
# We identify the indices of several genes from the expression data that we are interested
# in. which.Insig1=pmatch("Insig1",colnames(liver.bxh.male.female)) which.Fdft1=pmatch("Fdft1.MMT00082285",colnames(liver.bxh.male.female)) which.Dhcr7=pmatch("Dhcr7",colnames(liver.bxh.male.female))
# datCombined contains the data we will analyze
125
female=liver.bxh.male.female$sex==2 datCombined=liver.bxh.male.female[female,]
# STEP 1: Single Marker Analysis: Leafing
# We will now execute a single marker analysis on the data with respect to Fdft1. date() sma=single.marker.analysis(datCombined, snpcols=str.me$snpcols, genecols= which.Insig1, traitcols=c(which.Fdft1,which.Dhcr7)) date() write.csv(sma,"sma.Insig1.to.Fdft1.Dhcr7.AllSNP.BXHfemale.csv")
# Open the csv file denoted above, which should be in your working directory. Sort
# ascending by column M (mlogp.M.AtoB), and notice that the "best" marker by this
# metric is:
# M:rs3666257.chr15.bp32830679
# Explanation of SMA results
# Select for those rows wherein the mlogp.M.AtoB is < 0.05. This gives us a probability
# of 90% or greater for the A to B model. We sort by leo.nb.AtoB descending, and select
# for those rows with a leo.nb.AtoB of 0.5 or greater – corresponding to a likelihood
# ratio > 3 compared to the next best model.
# Notice that the BLV scores are all positive indicating that we are not dealing with
# confounded association. Notice also that the RMSEA.AtoB (column AC) is 0 while the
# other RMSEA columns >0.
# There are two major loci which appear to be controlling Insig1 in this cross:
# chromosome 8 between 68-127 Mb and chromosome 16 between 14-95 Mb. Also
# there are two minor loci on chromosome 12 (39-100 Mb) and chromosome 17 (98-112
# Mb). There is potential for additional influence from chromosome 1 at 112 Mb and
# chromosome 17 at 11 Mb.
# We pick the best markers from our major loci on Chr 16 & 8. The small box below is a
# link to the full admissibility analysis spreadsheet. See excerpt below the box. model
1953 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to
675 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to
673 M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to
1951 M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to
2372 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to
1094 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to
Model
M:rs3677807.chr8.bp108220543
To A:Insig1.MMT00008789
to B:Dhcr7.MMT00053523
M:rs3677807.chr8.bp108220543
To A:Insig1.MMT00008789
to B:Fdft1.MMT00082285
M:rs3705920.chr8.bp104872434
To A:Insig1.MMT00008789
to B:Fdft1.MMT00082285
M:rs3705920.chr8.bp104872434
To A:Insig1.MMT00008789
to B:Dhcr7.MMT00053523
M:rs3705921.chr16.bp49437647
To A:Insig1.MMT00008789
to B:Dhcr7.MMT00053523
M:rs3705921.chr16.bp49437647
To A:Insig1.MMT00008789
to B:Fdft1.MMT00082285
M:rs3695481.chr16.bp68778145
To A:Insig1.MMT00008789
to B:Fdft1.MMT00082285
M:rs3695481.chr16.bp68778145
To A:Insig1.MMT00008789
to B:Dhcr7.MMT00053523
leo.nb.AtoB
mlogp.M.AtoB
1.52
0.0429
1.83
0.0691
2.13
2.03
0.0381
0.213
0.928
0.898
0.00508
0.158
1.12
0.113
0.0371
0.669
225
# Establish admissible probability and likelihood ratio values by looking at positive
# controls
The four pairs of values for leo.nb.AtoB and mlogp.M.AtoB establish admissible probability and likelihood ratio ranges for our novel gene scan. Notice that while Fdft1 does not always have an mlogp.M.AtoB score of < .05, there is at least one marker
(rs3695481.chr16.bp68778145) where this does hold. Similarly for Dhcr7, there are two markers for which the model probability is > .9, giving an mlogp.M.AtoB of < .05.
# STEP 2: Novel gene scan: Rooting
# We are now going to do single marker analysis using the 4 snps shown above. cn=colnames(datCombined) sn1=match(c("rs3677807.chr8.bp108220543","rs3705920.chr8.bp104872434"," rs3705921.chr16.bp49437647","rs3695481.chr16.bp68778145"),cn)
# To speed the analysis, we ensure an absolute correlation of at least 0.5 after inspecting
# permissible ranges for path coefficients. cx=cor(datCombined[,sn1],datCombined[,str.me$genecols],use="p") w = list() for (i in c(1:4)) {w[[i]]=which(abs(cx)[i,]>=0.5)} uni=unique(c(w[[1]],w[[2]],w[[3]],w[[4]]))
# old note/aside:
# To execute a single marker analysis on the data with respect to Fdft1, for instance, we
# would do: date() smaManyGenes=single.marker.analysis(datCombined, snpcols=str.me$snpcols[sn1], genecols=which.Insig1, traitcols=str.me$genecols[uni]) date() write.csv(smaManyGenes,"sma.Chr16.Chr8.Insig1.ManyGenes.BXHfe
male.csv")
# Filter 1: sort by mlogpAtoB: find 0<0.05 find comparable to positive controls
# Filter 2: sort by leo.nb.AtoB
# We also want genes that have high correlation with Insig1 itself; our
# positive controls have high Insig1 correlation (0.686 for Fdft1, and 0.811
# for Dhcr7), however their correlation with the SNP on Chr16 is
# rather less: (0.181 and 0.155, respectively). cx2=cor(datCombined[,which.Insig1],datCombined[,str.me$genecols],use="p
") insig.cor.genes=which(abs(cx2)[1,]>=0.5)
> cx2[1,insig.cor.genes]
Sqle.MMT00000743 X1110015E18Rik.MMT00003670
0.6304737 -0.5471255
325
B430110G05Rik.MMT00004109 BC055107.MMT00004227
0.5301329 0.5273706
Aqp8.MMT00004671 Rdh11.MMT00006408
0.6143781 0.7356240
Bach.MMT00006920 Adpn.MMT00007700
0.5415720 0.7334322
Insig1.MMT00008789 Dlat.MMT00009011
1.0000000 0.6012156
Acas2.MMT00009506 Khk.MMT00011470
0.6762154 0.5025776
D10Ucla1.MMT00011909 BC036563.MMT00012566
0.5146658 0.6778225
Fads2.MMT00012675 MMT00012966
0.6281648 0.6482541
Fdps.MMT00013152 Gpam.MMT00014573
0.5322529 0.7342233
X6330403K07Rik.MMT00016374 Ube2h.MMT00017413
0.5861063 0.5612604
C130074G19Rik.MMT00017549 Fasn.MMT00018029
-0.5300350 0.7775792
C330023D02Rik.MMT00022087 Slc23a1.MMT00022094
0.5242107 0.5025347
Acly.MMT00022101 Scd1.MMT00022878
0.7301801 0.6377463
MGC18837.MMT00022933 Mmab.MMT00023138
0.5256325 0.6611007
Stard4.MMT00024116 Il11ra1.MMT00024788
0.6077838 0.5500532
AV047578.MMT00024954 Fgf10.MMT00025607
0.5968973 0.6467630
Nsdhl.MMT00025608 Dia1.MMT00026453
0.5256367 0.7170547
Idi1.MMT00026786 Mx1.MMT00026973
0.5638015 0.6249441
X3110038L01Rik.MMT00028138 Nol8.MMT00028279
0.5926882 -0.5041313
Tm7sf2.MMT00029655 Slc41a3.MMT00029681
0.6271914 -0.5815686
Chrne.MMT00030398 Pmvk.MMT00030469
0.5359870 0.8231968
Mug1.MMT00031178 X6030440G05Rik.MMT00031386
-0.5244967 0.5639264
Sc5d.MMT00032020 X4833426H19Rik.MMT00032331
0.7232924 0.7297170
MMT00033522 Wdr5.MMT00035187
0.5286671 0.5898010
Elovl6.MMT00035545 X9130427A09Rik.MMT00035964
0.5801317 0.5331331
Paox.MMT00036605 X0610030G03Rik.MMT00036912
0.7116067 0.6882970
Qdpr.MMT00038017 Pgd.MMT00038022
0.5881283 0.5088466
Elovl5.MMT00039557 Grb7.MMT00040021
0.5304403 0.5174425
X1810014L12Rik.MMT00040123 Allc.MMT00041316
0.6219885 0.5245968
Lss.MMT00046250 Adipor2.MMT00046649
0.7657943 0.6310839
Mod1.MMT00046920 Fads1.MMT00046994
0.5997629 0.7408953
Zdhhc6.MMT00047560 Acac.MMT00048501
0.5414055 0.7362951
Aacs.MMT00050881 Paox.MMT00052924
425
0.6096575 0.7688725
Thrsp.MMT00053283 Dhcr7.MMT00053523
0.5974466 0.8110627
X5730427C23Rik.MMT00054020 X1700007H16Rik.MMT00054102
0.5625832 0.5589084
Slc25a1.MMT00054225 Mvd.MMT00054501
0.6637185 0.7689078
X0610007P14Rik.MMT00054614 Pcyt2.MMT00057183
0.7746789 0.5699673
Dntt.MMT00057265 MMT00057407
0.5073485 -0.5038681
Map17.MMT00058422 Cyp2r1.MMT00058486
0.6249717 0.5594660
X1810044O22Rik.MMT00058679 Hoxb1.MMT00058757
0.6529061 0.5634646
Sc4mol.MMT00059097 MMT00060232
0.7113636 0.6089287
Eaf2.MMT00060307 Echdc1.MMT00061424
0.5587733 0.6080371
MMT00063300 Supt3h.MMT00063933
0.5027611 0.5981178
Qdpr.MMT00063965 Fdps.MMT00063985
0.5800807 0.5627501
Hmgcr.MMT00066449 Acacb.MMT00067953
0.6891436 0.5720003
Pcsk9.MMT00068222 Dbi.MMT00068995
0.8515240 0.6268549
X1700025B16Rik.MMT00069557 Acat2.MMT00069683
0.5074497 0.6815003
Gng1.MMT00069753 Gcat.MMT00069984
0.5647934 0.5064543
C730027P07Rik.MMT00070046 Gpsn2.MMT00071050
0.5177622 0.6836609
Cyp51.MMT00071535 MMT00074126
0.6513783 0.5435561
Fdps.MMT00074306 X9630046K23Rik.MMT00075802
0.5579677 -0.5019937
X1810044O22Rik.MMT00075896 Gale.MMT00077659
0.6939889 0.6045933
Pfkfb1.MMT00078546 Fdft1.MMT00080352
0.5139119 0.6860285
Ebp.MMT00080421 Oprs1.MMT00081203
0.5159082 0.6833021
Tsrc1.MMT00081767 Cs.MMT00082255
0.5028149 0.5136033
Fdft1.MMT00082285
0.6723033
>
# 110 genes if we exclude Insig1’s correlation with itself.
# Do any of these genes have good SMA results? insig1.in.own.results=which("Insig1.MMT00008789"==names(insig.cor.genes
)) insig.highcor.genes = insig.cor.genes[-insig1.in.own.results] date() smaHighInsig1cor=single.marker.analysis(datCombined, snpcols=str.me$snpcols[sn1], genecols=which.Insig1, traitcols=str.me$genecols[insig.highcor.genes]) date()
525
write.csv(smaHighInsig1cor,"sma.Chr16.Chr8.high.Insig1.cor.genes.BXHfem
ale.csv")
# Here is that data, as an excel spreadsheet.
# We run several filters on these SMA results
# First we require LEO.NB.AtoB of 0.75 or greater.
# Second we require mlogp.M.AtoB of 0.05 or less.
# The following table results. model: SNP to gene to gene
M:rs3705921.chr16.bp49437647 A:Insig1 B:Acac.MMT00048501
M:rs3695481.chr16.bp68778145
M:rs3705920.chr8.bp104872434
M:rs3705921.chr16.bp49437647
M:rs3705921.chr16.bp49437647
M:rs3677807.chr8.bp108220543
M:rs3705921.chr16.bp49437647
M:rs3705920.chr8.bp104872434
M:rs3705920.chr8.bp104872434
M:rs3677807.chr8.bp108220543
M:rs3677807.chr8.bp108220543
M:rs3705920.chr8.bp104872434
M:rs3705921.chr16.bp49437647
M:rs3677807.chr8.bp108220543
M:rs3705921.chr16.bp49437647
M:rs3695481.chr16.bp68778145
M:rs3677807.chr8.bp108220543
M:rs3705920.chr8.bp104872434
M:rs3695481.chr16.bp68778145
M:rs3695481.chr16.bp68778145
M:rs3677807.chr8.bp108220543
M:rs3677807.chr8.bp108220543
M:rs3705921.chr16.bp49437647
M:rs3705921.chr16.bp49437647
M:rs3705920.chr8.bp104872434
M:rs3677807.chr8.bp108220543
M:rs3695481.chr16.bp68778145
M:rs3705920.chr8.bp104872434
M:rs3695481.chr16.bp68778145
M:rs3705921.chr16.bp49437647
M:rs3705921.chr16.bp49437647
M:rs3677807.chr8.bp108220543
M:rs3695481.chr16.bp68778145
M:rs3705921.chr16.bp49437647
M:rs3695481.chr16.bp68778145
M:rs3705921.chr16.bp49437647
M:rs3705921.chr16.bp49437647
M:rs3705921.chr16.bp49437647
M:rs3677807.chr8.bp108220543
A:Insig1 B:Acas2.MMT00009506
A:Insig1 B:Adipor2.MMT00046649
A:Insig1 B:Adipor2.MMT00046649
A:Insig1 B:Aqp8.MMT00004671
A:Insig1 B:B430110G05Rik.MMT00004109
A:Insig1 B:BC036563.MMT00012566
A:Insig1 B:C130074G19Rik.MMT00017549
A:Insig1 B:Cs.MMT00082255
A:Insig1 B:Cs.MMT00082255
A:Insig1 B:Dbi.MMT00068995
A:Insig1 B:Dbi.MMT00068995
A:Insig1 B:Dhcr7.MMT00053523
A:Insig1 B:Dhcr7.MMT00053523
A:Insig1 B:Dia1.MMT00026453
A:Insig1 B:Dlat.MMT00009011
A:Insig1 B:Dlat.MMT00009011
A:Insig1 B:Dlat.MMT00009011
A:Insig1 B:Eaf2.MMT00060307
A:Insig1 B:Elovl6.MMT00035545
A:Insig1 B:Fads1.MMT00046994
A:Insig1 B:Fads2.MMT00012675
A:Insig1 B:Fasn.MMT00018029
A:Insig1 B:Fdft1.MMT00080352
A:Insig1 B:Fdft1.MMT00080352
A:Insig1 B:Fdft1.MMT00080352
A:Insig1 B:Fdft1.MMT00082285
A:Insig1 B:Fdft1.MMT00082285
A:Insig1 B:Fdps.MMT00063985
A:Insig1 B:Gale.MMT00077659
A:Insig1 B:Map17.MMT00058422
A:Insig1 B:MMT00012966
A:Insig1 B:MMT00060232
A:Insig1 B:Mod1.MMT00046920
A:Insig1 B:Mx1.MMT00026973
A:Insig1 B:Pgd.MMT00038022
A:Insig1 B:Qdpr.MMT00038017
A:Insig1 B:Qdpr.MMT00063965
A:Insig1 B:Rdh11.MMT00006408
625
0.984
1.15
2.03
1.65
1.12
2.13
0.871
0.85
0.879
0.908
1.47
1.79
0.825
0.797
1.85
1.44 leo.nb.AtoB mlogp.M.AtoB PathAB PPathAB BLV.AtoB
1.14 0.0015 0.736 0 0.846
1.1
1.85
0.00277
0.0289
0.676
0.62
0
0
0.689
0.893
1.03
0.886
1.21
1.13
0.0495
0.0283
0.0263
0.0123
0.62
0.614
0.526
0.678
0
0
6.71E-13
0
0.649
0.509
0.567
0.719
1.31
1.29
0.958
1.56
1.7
0.928
1.52
1.09
0.0277
0.0144
0.0399
0.00782
0.04
0.00508
0.0429
0.0377
-0.53
0.497
0.497
0.627
0.627
0.811
0.811
0.716
3.80E-13
2.96E-11
2.96E-11
0
0
0
0
0
0.561
0.542
0.417
0.793
0.802
1.08
1.37
0.865
1.49
0.957
0.921
0.866
0.772
0.92
0.9
1.88
0.0045
0.0104
0.046
0.000894
0.0173
0.033
0.0362
0.0138
0.00406
0.0292
0.0441
0.0371
0.0381
0.0385
0.0329
0.0397
0.0447
0.0196
0.00438
0.0382
0.0411
0.0127
0.011
0.0317
0.601
0.601
0.601
0.778
0.686
0.686
0.686
0.672
0.672
0.625
0.648
0.609
0.6
0.625
0.509
0.588
0.736
0
0
0
0.559 4.88E-15
0.568 1.11E-15
0.741
0.628
0
0
0
0
0
0
0
0
0.563 2.66E-15
0.605 0
0
0
0
0
0
6.52E-12
0
0.58 2.22E-16
0
0.765
0.571
0.52
0.507
0.425
0.526
0.507
1.07
1
0.724
1.02
0.88
0.742
1.09
0.511
0.483
0.511
0.516
0.727
0.862
0.445
0.435
1.09
0.722
M:rs3677807.chr8.bp108220543
M:rs3705921.chr16.bp49437647
M:rs3705921.chr16.bp49437647
M:rs3705920.chr8.bp104872434
M:rs3705920.chr8.bp104872434
M:rs3705921.chr16.bp49437647
M:rs3695481.chr16.bp68778145
M:rs3677807.chr8.bp108220543
M:rs3677807.chr8.bp108220543
M:rs3677807.chr8.bp108220543
M:rs3705921.chr16.bp49437647
M:rs3677807.chr8.bp108220543
M:rs3705920.chr8.bp104872434
M:rs3705920.chr8.bp104872434
# Link to full spreadsheet:
A:Insig1 B:Sc4mol.MMT00059097
A:Insig1 B:Sc4mol.MMT00059097
A:Insig1 B:Scd1.MMT00022878
A:Insig1 B:Scd1.MMT00022878
A:Insig1 B:Slc23a1.MMT00022094
A:Insig1 B:Slc25a1.MMT00054225
A:Insig1 B:Slc25a1.MMT00054225
A:Insig1 B:Slc41a3.MMT00029681
A:Insig1 B:Stard4.MMT00024116
A:Insig1 B:X0610030G03Rik.MMT00036912
A:Insig1 B:X6030440G05Rik.MMT00031386
A:Insig1 B:X6030440G05Rik.MMT00031386
A:Insig1 B:X6030440G05Rik.MMT00031386
A:Insig1 B:Zdhhc6.MMT00047560
1.54
1.85
0.879
1.21
1.6
1.5
1.86
1.16
1.03
1.94
1.26
1.1
0.947
1.41
0.0137
0.0171
0.0111
0.0304
0.00993
0.012
0.042
0.034
0.045
0.0343
0.0272
0.0308
0.0333
0.0332
0.711
0.711
0.638
0.638
0.503 1.44E-11
0.664 0
0.664 0
-0.577 2.22E-16
0
0
0
0
0.608
0.688
0.564 2.22E-15
0.564 2.22E-15
0
0
0.564 2.22E-15
0.541 7.33E-14
0.799
1.03
0.502
0.57
0.735
0.675
1.02
0.747
0.624
0.953
0.522
0.685
0.58
0.7
725
model gene
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Acac.MMT00048501
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Acas2.MMT00009506
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Adipor2.MMT00046649
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Adipor2.MMT00046649
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Aqp8.MMT00004671
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:B430110G05Rik.MMT00004109
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:BC036563.MMT00012566
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:C130074G19Rik.MMT00017549
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Cs.MMT00082255
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Cs.MMT00082255
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Dbi.MMT00068995
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Dbi.MMT00068995
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Dhcr7.MMT00053523
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Dhcr7.MMT00053523
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Dia1.MMT00026453
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Dlat.MMT00009011
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Dlat.MMT00009011
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Dlat.MMT00009011
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Eaf2.MMT00060307
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Elovl6.MMT00035545
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Fads1.MMT00046994
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Fads2.MMT00012675
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Fasn.MMT00018029
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Fdft1.MMT00080352
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Fdft1.MMT00080352
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Fdft1.MMT00080352
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Fdft1.MMT00082285
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Fdft1.MMT00082285
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Fdps.MMT00063985
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Gale.MMT00077659
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Map17.MMT00058422
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:MMT00012966
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:MMT00060232
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Mod1.MMT00046920
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Mx1.MMT00026973
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Pgd.MMT00038022
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Qdpr.MMT00038017
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Qdpr.MMT00063965
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Rdh11.MMT00006408
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Sc4mol.MMT00059097
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Sc4mol.MMT00059097
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Scd1.MMT00022878
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Scd1.MMT00022878
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Slc23a1.MMT00022094
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Slc25a1.MMT00054225
M:rs3695481.chr16.bp68778145 to A:Insig1.MMT00008789 to B:Slc25a1.MMT00054225
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Slc41a3.MMT00029681
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Stard4.MMT00024116
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:X0610030G03Rik.MMT00036912
M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:X6030440G05Rik.MMT00031386
M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:X6030440G05Rik.MMT00031386
M:rs3705920.chr8.bp104872434 to A:Insig1.MMT00008789 to B:Zdhhc6.MMT00047560
1.88
1.86
1.16
1.03
1.94
1.26
1.1
0.947
0.85
0.879
1.49
0.957
0.921
0.866
0.772
0.92
0.9
1.41
1.54
1.85
0.879
1.21
1.6
1.5
leo.nb.AtoB mlogp.M.AtoB PathAB PPathAB BLV.AtoB
1.14
1.1
1.85
0.0015
0.00277
0.0289
0.736
0.676
0.62
0
0
0
0.846
0.689
0.893
1.03
0.886
1.21
1.13
0.0495
0.0283
0.0263
0.0123
0.62
0.614
0.678
0
0
0.526 6.71E-13
0
0.649
0.509
0.567
0.719
1.31
1.29
0.958
1.56
1.7
0.928
1.52
0.0277
0.0429
-0.53 3.80E-13
0.0144
0.497 2.96E-11
0.0399
0.497 2.96E-11
0.00782
0.627
0.04
0.627
0.00508
0.811
0.811
0
0
0
0
0.561
0.542
0.417
0.793
0.802
1.08
1.37
0.984
1.15
2.03
1.65
1.12
2.13
0.871
1.09
0.908
1.47
1.79
0.825
0.797
1.85
1.44
0.0377
0.0045
0.0104
0.046
0.0138
0.00406
0.0292
0.716
0.601
0.601
0.601
0.778
0.686
0
0
0
0
0.000894
0.559 4.88E-15
0.0173
0.568 1.11E-15
0.033
0.741
0.0362
0.628
0.686
0.0441
0.686
0.0371
0.672
0.0381
0.672
0
0
0
0
0
0
0
0
0.0385
0.563 2.66E-15
1
0.724
1.02
0.88
0.742
1.09
0.511
0.865
0.516
0.727
0.862
0.445
0.435
1.09
0.722
0.0329
0.605
0.0397
0.625
0.0447
0.648
0.0196
0.609
0.00438
0.0382
0.625
0.0317
0.736
0.0137
0.711
0.0171
0.711
0.0111
0.638
0.012
0.664
0.042
0.664
0.034
0.045
0.608
0
0
0
0
0
0
0.0411
0.509 6.52E-12
0.0127
0.588
0.011
0
0
0
0
0.0304
0.638
0
0.00993
0.503 1.44E-11
0.0343
0.6
0.58 2.22E-16
-0.577 2.22E-16
0.688
0
0
0
0
0
0.0272
0.564 2.22E-15
0.0308
0.564 2.22E-15
0.0333
0.564 2.22E-15
0.0332
0.541 7.33E-14
1.07
1.02
0.747
0.624
0.953
0.522
0.685
0.58
0.483
0.511
0.765
0.571
0.52
0.507
0.425
0.526
0.507
0.7
0.799
1.03
0.502
0.57
0.735
0.675
# Now we do two follow on results on this set of 37 (35) genes.
Acac.MMT00048501
Acas2.MMT00009506
Adipor2.MMT00046649
Aqp8.MMT00004671
B430110G05Rik.MMT00004109
BC036563.MMT00012566
C130074G19Rik.MMT00017549
Cs.MMT00082255
Dbi.MMT00068995
Dhcr7.MMT00053523
Dia1.MMT00026453
Dlat.MMT00009011
Eaf2.MMT00060307
Elovl6.MMT00035545
Fads1.MMT00046994
Fads2.MMT00012675
Fasn.MMT00018029
Fdft1.MMT00082285
Fdps.MMT00063985
Gale.MMT00077659
Map17.MMT00058422
# MMT00012966 – discard because not mapping to gene in UCSC 2004 Mouse assembly
# MMT00060232 – discard because mapping to three locations in UCSC 2004
Mouse genome
Mod1.MMT00046920
Mx1.MMT00026973
Pgd.MMT00038022
Qdpr.MMT00038017
Rdh11.MMT00006408
Sc4mol.MMT00059097
Scd1.MMT00022878
Slc23a1.MMT00022094
Slc25a1.MMT00054225
Slc41a3.MMT00029681
Stard4.MMT00024116
X0610030G03Rik.MMT00036912
X6030440G05Rik.MMT00031386
Zdhhc6.MMT00047560
# Next a neo() analysis to rule in good candidates and rule out non-viable ones
# using multimarker models.
#
# The neo analysis will be the first composite or multi-marker analysis that we’ve
# done on these genes. We are encouraged to see many of the known responders to Insig1
# present in this list: Acac, Fdft1, Mod1, Dhcr7. Others like Hmgcr had a mlogp.M.AtoB
# of 0.0974, and so didn’t make the cut even though they are also downstream of Insig1.
# This indicates that while we may be a little conservative here, however if any of
# these genes are novel, we will have a great deal of confidence in predicting these
925
# novel genes as really downstream of Insig1. downstream.of.insig1 = c("Acac.MMT00048501","Acas2.MMT00009506","Adipor2.MMT00046649",
"Aqp8.MMT00004671","B430110G05Rik.MMT00004109","BC036563.MMT00012566","C130074G19Rik.MMT0
0017549","Cs.MMT00082255","Dbi.MMT00068995","Dhcr7.MMT00053523","Dia1.MMT00026453","Dlat.
MMT00009011","Eaf2.MMT00060307","Elovl6.MMT00035545","Fads1.MMT00046994","Fads2.MMT000126
75","Fasn.MMT00018029","Fdft1.MMT00082285","Fdps.MMT00063985","Gale.MMT00077659","Map17.M
MT00058422","Mod1.MMT00046920","Mx1.MMT00026973","Pgd.MMT00038022","Qdpr.MMT00038017","Rd h11.MMT00006408","Sc4mol.MMT00059097","Scd1.MMT00022878","Slc23a1.MMT00022094","Slc25a1.M
MT00054225","Slc41a3.MMT00029681","Stard4.MMT00024116","X0610030G03Rik.MMT00036912","X603
0440G05Rik.MMT00031386","Zdhhc6.MMT00047560")
# So first a NEO multimarker analysis. traits=cbind(datCombined[,which.Insig1],datCombined[,downstream.of.insi
g1]) colnames(traits)[1]= colnames(datCombined)[which.Insig1] datC = data.frame(cbind(datCombined[,str.me$snpcols],traits)) pm=neo.get.param() pm$A = 1279 pm$B = 1280:1316
# impute medians of missing trait data... pm$rough.and.ready.NA.imputation = TRUE pm$quiet=FALSE pm$run.title=“insig1.downstream“ x=neo(datC, pm=pm, snpcols=1:1278) save(x,file=” neo.analysis.insig1.downstreamers.rdat”)
# Grafting of controls for Insig1 downstream candidates z=neo.graft.A.B(x)
# Then, on the candidates that remain, we’ll run a robustness analysis.
# Robustness Analysis of Insig1 downstream candidates ra = list() for (k in 1:37) {
datExpr = datC[,c(1279,1279+k)]
datSNP = datC[,str.me$snpcols]
ra[[k]]=NEOrobustnessForwardAndGreedy(datSNP, datExpr,
TopForwardAndGreedySNPs=1:20)
print(paste(“done with robustness on “,colnames(datC)[1279+k]))
# save in the middle, in case of terminal session auto logout.
save(ra,file=”ra.robustness.on.insig1.downstreamers.rdat”)
}
# older notes on Robustness analysis of the uni genes….
# Robustness Analysis
1025
# We run robustness analysis out to 20 SNPs on the novel genes that we have identified.
# We use the NEOrobustnessGreedy function to determine LEO and ZEO scores for
# SNPs identified by greedy selection. traits=cbind(datCombined[,which.Insig1],datCombined[,str.me$genecols[un i]]) colnames(traits)[1]= colnames(datCombined)[which.Insig1]
#r1=list()
#r1$greedy=NEOrobustnessGreedy(datCombined[,str.me$snpcols],traits,TopG reedySNPs=1:20,no.log.quiet=T) datC = data.frame(cbind(datCombined[,str.me$snpcols],traits))
# quickly check: if we do a full NEO, do we get any of these genes? pm=neo.get.param() pm$A = 1279 pm$B = 1280:1338
# impute medians of missing trait data... pm$rough.and.ready.NA.imputation = TRUE pm$quiet=TRUE pm$run.title=“robustness.analysis.for.chr16.chr8.insig1.roots“ x=neo(datC, pm=pm, snpcols=1:1278)
# We use the NEOrobustnessForward function to determine LEO and ZEO scores for
# SNPs identified by forward regression selection. rFsp27PCblue200.50$forward=NEOrobustnessForward(datSNP[,restSNP],traits
,TopForwardSNPs=1:50) write.csv(rFsp27PCblue200.50$greedy,"tables/8-
2/rTurq.R2.min.greedy.csv") write.csv(rFsp27PCblue200.50$forward,"tables/8-
2/rFsp27PCblue200.50.forward.csv")
#
# The following function allows the user to specify the number of SNPs wanted for the
# starting set in robustness analysis. We aim to start with SNPs that are highly correlated
# to either "trait" we are analyzing (individual expressions, principal component, or
# clinical trait). This function will rank SNPs by their correlation to either of these traits,
# and identify a consensus ranking taking the parallel minimum from these two ranks.
# Then, X (a number specified by the user) number of SNPs are identified that have the
# best consensus ranking.
if(exists("SNPselection.min")) rm("SNPselection.min");
SNPselection.min=function(SNPdata,exp,PC,numSNPs) { corX=rep(NA,ncol(SNPdata)) corPC= rep(NA,ncol(SNPdata)) for (i in c(1:ncol(SNPdata))) { corX[i]=abs(cor(as.numeric(as.character(SNPdata[,i])),exp,use="p")) corPC[i]=abs(cor(as.numeric(as.character(SNPdata[,i])),PC,use="p"))} rankCorX=order(corX,decreasing=T) rankCorPC=order(corPC,decreasing=T) rankCombined=pmin(rankCorX,rankCorPC) dongle=F causSNPs=rep(NA,length(rankCombined)) for (i in c(1:length(rankCombined)))
{
1125
} if(sum(rankCombined<i)==numSNPs) { causSNPs=rankCombined<i dongle=T break}
} if(dongle==F) { rankCorX=rankCorX+0.5 rankCombined=pmin(rankCorX,rankCorPC) for (i in c(1:(2*length(rankCombined)))/2) if(sum(rankCombined<i)==numSNPs) { causSNPs=rankCombined<i
} dongle=T break
}
} return(causSNPs)
{
#Helper function: factor to numeric
# helper function to convert from factors to numeric via character--affected by options(digits = 4); getOptions("digits") to check. if(exists("anac") ) rm(anac); anac=function(x) as.numeric(as.character(x))
# helper function for robustness.plot if(exists("get.ylim") ) rm(get.ylim); get.ylim = function(the.data) {
x=data.frame(the.data)
b.to.a.greedysnp.leo.nb.for = anac(x$LEO.NB.FOR)
b.to.a.greedysnp.leo.nb.all = anac(x$LEO.NB.ALL)
b.to.a.greedysnp.leo.nb.max = anac(x$SIMPLE.MAX.MAX)
ylim.max = max(c(b.to.a.greedysnp.leo.nb.for, b.to.a.greedysnp.leo.nb.all, b.to.a.greedysnp.leo.nb.max),na.rm=T)
ylim.min= min(c(b.to.a.greedysnp.leo.nb.for, b.to.a.greedysnp.leo.nb.all, b.to.a.greedysnp.leo.nb.max),na.rm=T)
c(ylim.min, ylim.max)
}
# window opening helper function for robustness.plot
# open a window for plotting, on windows, mac, or unix if (exists("cross.platform.windows")) { rm(cross.platform.windows) } cross.platform.windows=function() {
if (.Platform$OS.type == "windows") {
return(windows());
}
if (has.name("pkgType",.Platform)) {
if (.Platform$pkgType == "mac.binary") {
return(quartz());
}
}
return(X11());
}
# robustness.plot(): plot the Robustness studies. if(exists("robustness.plot") ) rm(robustness.plot); robustness.plot = function(the.data, do.postscript=FALSE, file.name =
"postscriptfile.ps", ylim=get.ylim(the.data), main="LEO.NB Robustness Study", xlab="Number of SNPs", ylab="LEO.NB", threshold1 = 0.3, threshold2 = 0.75, new.window=FALSE) {
1225
if (do.postscript) {
postscript(file=file.name, horizontal=FALSE) # optional output to Postscript file
} else {
if (new.window) cross.platform.windows()
} x=data.frame(the.data) b.to.a.greedysnp.leo.nb.for = anac(x$LEO.NB.FOR) b.to.a.greedysnp.leo.nb.all = anac(x$LEO.NB.ALL) b.to.a.greedysnp.leo.nb.max = anac(x$SIMPLE.MAX.MAX) ylim[2] = max(ylim[2],1.5+max(threshold1,threshold2)) # make space for the dashed threshold lines device.num = plot(1:length(b.to.a.greedysnp.leo.nb.for),b.to.a.greedysnp.leo.nb.for,type="l",ylab=ylab
,xlab=xlab,ylim=ylim,cex.lab=1.5, main=main,cex.main=1.5) points(1:length(b.to.a.greedysnp.leo.nb.for),b.to.a.greedysnp.leo.nb.for,pch=21,bg="red", cex=1.5) lines(1:length(b.to.a.greedysnp.leo.nb.max),b.to.a.greedysnp.leo.nb.max) points(1:length(b.to.a.greedysnp.leo.nb.max),b.to.a.greedysnp.leo.nb.max,pch=23,cex=1.5,b g="blue") # diamond lines(1:length(b.to.a.greedysnp.leo.nb.all),b.to.a.greedysnp.leo.nb.all) points(1:length(b.to.a.greedysnp.leo.nb.all),b.to.a.greedysnp.leo.nb.all,pch=22,cex=1.5,b g="yellow") # square abline(h=threshold1, lty=2) # LEO.NB.ALL threshold abline(h=threshold2,lty=2) # LEO.NB.FOR threshold print("pick legend location now and press left button once. Then press right button and choose stop.") a=locator() legend(a[1],a[2], c("LEO.NB.FOR", "LEO.NB.ALL", "MAX.vs.MAX"), cex=1.2, pch=c(21,22,23),pt.bg=c("red","yellow","blue"))
if (do.postscript) dev.off() # if you started with pdf(), postscript() or bmp() earlier
invisible(device.num)
}
# ZEO version of the above robustness analysis
# robustness.plot(): plot the Robustness studies. if(exists("robustness.plot.zeo") ) rm(robustness.plot.zeo); robustness.plot.zeo = function(the.data, do.postscript=FALSE, file.name =
"postscriptfile.ps", ylim=get.ylim(the.data), main="ZEO Robustness Study", xlab="Number of SNPs", ylab="ZEO score", threshold1 = 1, new.window=FALSE) { if (do.postscript) {
postscript(file=file.name, horizontal=FALSE) # optional output to Postscript file
} else {
if (new.window) cross.platform.windows()
} x=data.frame(the.data) score.for = anac(x$zeo.for) score.all = anac(x$zeo.all) score.max = anac(x$zeo.max) ylim[2] = max(ylim[2],1.5+threshold1) # make space for the dashed threshold lines
1325
device.num = plot(1:length(score.for),score.for,type="l",ylab=ylab,xlab=xlab,ylim=ylim,cex.lab=1.5, main=main,cex.main=1.5) points(1:length(score.for),score.for,pch=21,bg="red",cex=1.5) lines(1:length(score.max),score.max) points(1:length(score.max),score.max,pch=23,cex=1.5,bg="blue") # diamond lines(1:length(score.all),score.all) points(1:length(score.all),score.all,pch=22,cex=1.5,bg="yellow") # square abline(h=threshold1, lty=2) # ZEO threshold print("pick legend location now and press left button once. Then press right button and choose stop.") a=locator() legend(a[1],a[2], c("ZEO.FOR", "ZEO.ALL", "ZEO.MAX"), cex=1.2, pch=c(21,22,23),pt.bg=c("red","yellow","blue"))
if (do.postscript) dev.off() # if you started with pdf(), postscript() or bmp() earlier
invisible(device.num)
}
1425
setwd(“c:/good”) a=load("bxh/liver.snps.23388genes.clinical.bxh.male.and.female.rdat") which.Insig1=pmatch("Insig1",colnames(liver.bxh.male.female)) which.Fdft1=pmatch("Fdft1.MMT00082285",colnames(liver.bxh.male.female)) which.Dhcr7=pmatch("Dhcr7",colnames(liver.bxh.male.female))
# datCombined contains the data we will analyze male=liver.bxh.male.female$sex==1 datCombined=liver.bxh.male.female[male,] cx2=cor(datCombined[,which.Insig1],datCombined[,str.me$genecols],use="p
") insig.cor.genes=which(abs(cx2)[1,]>=0.5) cor.insig.cor.genes = cx2[insig.cor.genes] names(cor.insig.cor.genes) = names(insig.cor.genes)
> insig.cor.genes
Srebf2.MMT00000464 Sqle.MMT00000743
139 209
Tcf1.MMT00001510 Saa2.MMT00002021
412 548
Atp6v0b.MMT00003351 Irak1bp1.MMT00003492
965 1009
Rdh11.MMT00006408 Adpn.MMT00007700
1894 2283
D13Wsu177e.MMT00007756 Pstpip2.MMT00008523
2303 2571
Insig1.MMT00008789 Rdbp.MMT00009489
2677 2901
Acas2.MMT00009506 Carkl.MMT00011790
2906 3603
D10Ucla1.MMT00011909 BC036563.MMT00012566
3632 3835
Fads2.MMT00012675 Leng1.MMT00012778
3862 3884
MMT00012966 Gpam.MMT00014573
3939 4391
Mt2.MMT00015218 Hsd17b7.MMT00016623
4609 5028
Podn.MMT00016783 Fasn.MMT00018029
5089 5458
Prdm15.MMT00018504 Flot2.MMT00018937
5592 5698
Acly.MMT00022101 AV006891.MMT00025196
6569 7398
Saa1.MMT00025601 Nsdhl.MMT00025608
1525
7499 7503
Dia1.MMT00026453 Idi1.MMT00026786
7730 7831
X3110038L01Rik.MMT00028138 Tm7sf2.MMT00029655
8218 8641
Harp.MMT00029705 Pmvk.MMT00030469
8651 8880
X6030440G05Rik.MMT00031386 Sc5d.MMT00032020
9110 9282
X4833426H19Rik.MMT00032331 X2310005P05Rik.MMT00032577
9351 9416
Mt1.MMT00032920 Mvk.MMT00035384
9506 10165
X9130427A09Rik.MMT00035964 X0610030G03Rik.MMT00036912
10332 10593
Grb7.MMT00040021 X1810014L12Rik.MMT00040123
11378 11399
Padi2.MMT00042855 Clu.MMT00044454
12129 12575
Etv4.MMT00045272 X2610507B11Rik.MMT00045911
12800 12969
Lss.MMT00046250 Saa3.MMT00046768
13092 13243
Fads1.MMT00046994 Acac.MMT00048501
13324 13798
MMT00048763 Aacs.MMT00050881
13892 14434
Paox.MMT00052924 Thrsp.MMT00053283
15010 15101
Dhcr7.MMT00053523 X5730427C23Rik.MMT00054020
15166 15304
Slc25a1.MMT00054225 Mvd.MMT00054501
15372 15456
X0610007P14Rik.MMT00054614 Idi1.MMT00055476
15494 15753
Pcyt2.MMT00057183 Dntt.MMT00057265
16151 16176
MMT00057407 Sc4mol.MMT00059097
16221 16683
Supt3h.MMT00063933 MMT00065821
17971 18466
Hmgcr.MMT00066449 Pcsk9.MMT00068222
18645 19096
Camta2.MMT00069293 Acat2.MMT00069683
19379 19500
Gng1.MMT00069753 Slc16a11.MMT00070912
19523 19836
Cyp51.MMT00071535 BC024093.MMT00072598
20022 20342
Hmgcs1.MMT00075013 BC013481.MMT00077125
20996 21633
Fdft1.MMT00080352 X2700082D03Rik.MMT00080450
22673 22700
Oprs1.MMT00081203 Pigf.MMT00082281
22897 23219
Fdft1.MMT00082285
1625
23220
> sort(cor.insig.cor.genes,decreasing=T)
Insig1.MMT00008789 Pcsk9.MMT00068222
1.0000000 0.7930873
Mvd.MMT00054501 Sc5d.MMT00032020
0.7691968 0.7613225
Dhcr7.MMT00053523 Acly.MMT00022101
0.7397716 0.7394993
Pmvk.MMT00030469 Sc4mol.MMT00059097
0.7163582 0.7094914
Adpn.MMT00007700 X4833426H19Rik.MMT00032331
0.7094542 0.7023919
Hmgcr.MMT00066449 Lss.MMT00046250
0.7007980 0.6997807
BC036563.MMT00012566 Gpam.MMT00014573
0.6875101 0.6798278
Fasn.MMT00018029 X0610007P14Rik.MMT00054614
0.6737612 0.6675071
Acat2.MMT00069683 Dia1.MMT00026453
0.6667161 0.6619738
Fdft1.MMT00080352 Supt3h.MMT00063933
0.6604907 0.6520207
Srebf2.MMT00000464 Rdh11.MMT00006408
0.6486456 0.6430461
Fdft1.MMT00082285 Tm7sf2.MMT00029655
0.6423378 0.6351654
Acac.MMT00048501 Hsd17b7.MMT00016623
0.6260731 0.6259919
Rdbp.MMT00009489 Fads1.MMT00046994
0.6234484 0.6223085
Paox.MMT00052924 Thrsp.MMT00053283
0.6141881 0.6085806
Camta2.MMT00069293 Cyp51.MMT00071535
0.6069609 0.6061542
Oprs1.MMT00081203 Fads2.MMT00012675
0.6030635 0.5897720
X3110038L01Rik.MMT00028138 Mvk.MMT00035384
0.5772267 0.5685569
Gng1.MMT00069753 Sqle.MMT00000743
0.5627438 0.5617388
Aacs.MMT00050881 Idi1.MMT00026786
0.5565744 0.5542443
Slc25a1.MMT00054225 X2310005P05Rik.MMT00032577
0.5541542 0.5489471
Hmgcs1.MMT00075013 BC013481.MMT00077125
0.5470072 0.5453437
AV006891.MMT00025196 X9130427A09Rik.MMT00035964
0.5438655 0.5429591
X5730427C23Rik.MMT00054020 Carkl.MMT00011790
0.5373447 0.5371647
X6030440G05Rik.MMT00031386 Acas2.MMT00009506
0.5343585 0.5343236
Dntt.MMT00057265 Podn.MMT00016783
0.5326553 0.5320033
X0610030G03Rik.MMT00036912 MMT00012966
1725
0.5319817 0.5313946
Pcyt2.MMT00057183 X2700082D03Rik.MMT00080450
0.5306254 0.5284124
Leng1.MMT00012778 Tcf1.MMT00001510
0.5253542 0.5188046
Slc16a11.MMT00070912 Pstpip2.MMT00008523
0.5159696 0.5156726
Idi1.MMT00055476 Harp.MMT00029705
0.5144712 0.5136224
Nsdhl.MMT00025608 D10Ucla1.MMT00011909
0.5099147 0.5093927
Padi2.MMT00042855 X1810014L12Rik.MMT00040123
0.5086602 0.5069298
Grb7.MMT00040021 Prdm15.MMT00018504
0.5015804 -0.5005894
Saa2.MMT00002021 BC024093.MMT00072598
-0.5007631 -0.5018638
Atp6v0b.MMT00003351 MMT00048763
-0.5048252 -0.5063006
MMT00057407 Flot2.MMT00018937
-0.5086263 -0.5202037
D13Wsu177e.MMT00007756 X2610507B11Rik.MMT00045911
-0.5211441 -0.5282687
MMT00065821 Saa1.MMT00025601
-0.5294949 -0.5304144
Etv4.MMT00045272 Saa3.MMT00046768
-0.5490883 -0.5555498
Mt2.MMT00015218 Irak1bp1.MMT00003492
-0.5619260 -0.5634995
Mt1.MMT00032920 Clu.MMT00044454
-0.5707783 -0.5896827
Pigf.MMT00082281
-0.6010850
> downstream.of.insig1.in.female = c("Acac.MMT00048501","Acas2.MMT00009506","Adipor2.MMT00046649",
"Aqp8.MMT00004671","B430110G05Rik.MMT00004109","BC036563.MMT00012566","C130074G19Rik.MMT0
0017549","Cs.MMT00082255","Dbi.MMT00068995","Dhcr7.MMT00053523","Dia1.MMT00026453","Dlat.
MMT00009011","Eaf2.MMT00060307","Elovl6.MMT00035545","Fads1.MMT00046994","Fads2.MMT000126
75","Fasn.MMT00018029","Fdft1.MMT00082285","Fdps.MMT00063985","Gale.MMT00077659","Map17.M
MT00058422","Mod1.MMT00046920","Mx1.MMT00026973","Pgd.MMT00038022","Qdpr.MMT00038017","Rd h11.MMT00006408","Sc4mol.MMT00059097","Scd1.MMT00022878","Slc23a1.MMT00022094","Slc25a1.M
MT00054225","Slc41a3.MMT00029681","Stard4.MMT00024116","X0610030G03Rik.MMT00036912","X603
0440G05Rik.MMT00031386","Zdhhc6.MMT00047560") novel.and.downstream.of.insig1.in.female = downstream.of.insig1.in.female[c(33,13,26,5,29,30,20,25,34)] novel.and.downstream.of.insig1.in.female
[1] "X0610030G03Rik.MMT00036912" "Eaf2.MMT00060307"
[3] "Rdh11.MMT00006408" "B430110G05Rik.MMT00004109"
[5] "Slc23a1.MMT00022094" "Slc25a1.MMT00054225"
[7] "Gale.MMT00077659" "Qdpr.MMT00038017"
[9] "X6030440G05Rik.MMT00031386"
1825
traits=cbind(datCombined[,which.Insig1],datCombined[,novel.and.downstre
am.of.insig1.in.female]) colnames(traits)[1]= colnames(datCombined)[which.Insig1] datC = data.frame(cbind(datCombined[,str.me$snpcols],traits)) w.novel = match(novel.and.downstream.of.insig1.in.female,colnames(datC)) w.novel
[1] 1280 1281 1282 1283 1284 1285 1286 1287 1288
> cor(datC[,c(1279,w.novel)])
Insig1.MMT00008789
Insig1.MMT00008789 1.0000000
X0610030G03Rik.MMT00036912 0.5319817
Eaf2.MMT00060307 0.4118977
Rdh11.MMT00006408 0.6430461
B430110G05Rik.MMT00004109 0.4545422
Slc23a1.MMT00022094 0.4017729
Slc25a1.MMT00054225 0.5541542
Gale.MMT00077659 0.4566726
Qdpr.MMT00038017 0.3966096
X6030440G05Rik.MMT00031386 0.5343585
So these genes are all strongly correlated with Insig1 in the Male liver Apoe-null BxH cross as well. m2=match(c(“rs3705921.chr16.bp49437647”,”rs3677807.chr8.bp108220543”),co lnames(datC)) sma=single.marker.analysis(datC, snpcols=m2, genecols=1279, traitcols=w.novel) write.csv(sma,"bxh/sma.Insig1.to.Novel.check.BXH.apoenull.male.csv")
# For convenience, embedded here as well:
1925
model
11 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Slc25a1.MMT00054225
13 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Gale.MMT00077659
1 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:X0610030G03Rik.MMT00036912
5 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Rdh11.MMT00006408
16 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Qdpr.MMT00038017
17 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:X6030440G05Rik.MMT00031386
2 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:X0610030G03Rik.MMT00036912
8 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:B430110G05Rik.MMT00004109
7 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:B430110G05Rik.MMT00004109
4 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Eaf2.MMT00060307
12 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Slc25a1.MMT00054225
18 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:X6030440G05Rik.MMT00031386
3 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Eaf2.MMT00060307
10 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Slc23a1.MMT00022094
9 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Slc23a1.MMT00022094
15 M:rs3705921.chr16.bp49437647 to A:Insig1.MMT00008789 to B:Qdpr.MMT00038017
6 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Rdh11.MMT00006408
14 M:rs3677807.chr8.bp108220543 to A:Insig1.MMT00008789 to B:Gale.MMT00077659
# perhaps in the Males the SNPs controlling Insig1 are slightly different, so we should re-do our search there. snp.cor.insig1.males=cor(datCombined[,which.Insig1],datCombined[,str.me
$snpcols],use="p") sort(abs(snp.cor.insig1.males),decreasing=T) max(snp.cor.insig1.males)
[1] 0.2999653
> which.max(snp.cor.insig1.males)
[1] 213
> which.min(snp.cor.insig1.males)
[1] 380
> min(snp.cor.insig1.males)
[1] -0.2380754
> cn[213]
[1] "rs3703298.chr2.bp179772115"
> cn[380]
[1] "rs3696083.chr4.bp123411360"
>
#Okay, so let’s back up and start with the Insig1 eQTL analysis on the
Male BxH Apoe-null data. setwd(“c:/good/bxh”) male.insig1.for.rqtl = make.rqtl.file(x=datC,snpcols=1:1278,traitcols=1279:1288,file.name="ins ig1.male.bxh.apoenull.read.cross.file.for.rqtl.csv", has.cm.not.bp=FALSE) cgp.male.insig1=calc.genoprob(male.insig1.for.rqtl) leo.nb.AtoB leo.nb.BtoA leo.i
leo.o
0.401
-0.635 10.9
0.401 good.o leo.ab.over.ba mlogp.M.AtoB mlogp.M.BtoA
0.635
0.0207
0.655
0.342
0.32
0.17
0.0557
0.0243
-0.57 7.09
-0.699
10
0.342
0.32
-0.17 15.3
0.543
-0.0903
5.5
-0.0243 9.55
0.0557
0.483
0.57
0.699
0.17
0.0903
0.0243
0.0919
0.0219
0.218
0.0314
0.378
0.662
0.721
0.389
0.122
0.402
-0.025
-0.0318
-0.136
-0.158
-0.163
-0.277
-0.326
-0.377
-0.404
-0.555
-0.618
-2.21
-0.132 10.1
0.136 6.39
0.517 4.25
-0.467 15.4
-0.864 5.57
-0.025
-0.143 7.23 -0.0318
0.444
0.1 5.73
0.0568
-0.168 10.9
-0.163
0.141 9.95
0.0565
-1.01 5.75
-0.326
0.145 5.27
0.0506
-1.01 5.36
-0.404
0.44
-0.393
-0.637
0.107
0.111
-0.136
-0.158
0.00551
-0.277
0.687
-0.377
0.603
-0.555
-0.151
-1.35
0.053
0.0496
0.548
0.209
0.305
0.287
0.351
0.383
0.557
0.918
0.769
2.37
0.16
0.16
0.411
0.0505
0.311
0.00968
1.04
0.00595
1.16
0.363
0.618
1.01
2025
s1=scanone(cgp.male.insig1,pheno.col=1) plot(s1,gap=20,ylab="LOD score for Insig1 in Male BxH Apoe-Null
Liver",cex.lab=1.5)
# Looks like chromosome 16 has the strongest signal in the Males.
# And chromosme 8 has nothing.
> s1
2125
chr pos lod rs3683945 1 1.6027865 2.757667e-01 rs3674785 1 2.4364550 1.771050e-01
... rs3705921 16 24.7188235 7.423818e-01 #2ndary peak in females rs3690619 16 24.8749000 4.857139e-01 rs4135604 16 25.0503995 4.346606e-01 rs3719157 16 25.7116485 5.353817e-01 rs3688027 16 26.5947015 5.353776e-01 rs3702995 16 27.2475115 3.727884e-01 rs3687272 16 27.5468285 2.904028e-01 rs3682166 16 28.9351060 7.081835e-01 rs3717690 16 30.1068985 7.699761e-01 rs3693435 16 31.4743060 7.699907e-01 rs3687551 16 32.1279200 8.924578e-01 rs3675347 16 32.2268255 8.577956e-01 rs3693190 16 33.0221350 1.010477e+00 rs3673897 16 33.5111160 1.448505e+00 rs3690033 16 33.5112585 1.448540e+00 rs3695481 16 34.3890725 1.390609e+00 rs3668777 16 35.6327545 1.609417e+00 rs3672557 16 35.8920155 1.642046e+00 rs3721202 16 36.9481785 1.493165e+00 rs3702474 16 37.6993220 1.493507e+00 rs4135423 16 37.7722340 1.611809e+00 rs3704648 16 38.0112490 1.611758e+00 rs3680665 16 42.9007740 1.763913e+00 rs3693968 16 43.2751485 1.530289e+00 rs3724196 16 43.6041775 1.841426e+00 rs3709512 16 43.8972555 3.520914e+00 # Major peak in Males rs3674782 16 44.3391510 2.944370e+00 rs3655083 16 46.2244330 2.508065e+00 rs3690561 16 46.5362490 2.508074e+00 rs3664190 16 47.4346280 2.930611e+00 rs3664755 16 47.4346705 2.930598e+00 rs3148818 16 47.7593295 2.703044e+00 rs4221288 16 49.2446425 8.698641e-01 rs3722843 16 49.8093630 8.699028e-01 rs4137196 17 1.9791745 5.055418e-01
...
# some of the traits will need re-conversion from factor to numeric
# using as.numeric(as.character()) or anac()... for (i in 1:ncol(cgp.male.insig1$pheno)) {
cgp.male.insig1$pheno[,i] = as.numeric(as.character(cgp.male.insig1$pheno[,i]))
}
# because notice that without this conversion, some gene
# expression phenotypes are read in as factors (Uhg!) str(cgp.male.insig1)
...
2225
$ pheno:'data.frame': 129 obs. of 10 variables:
..$ Insig1.MMT00008789 : num [1:129] 0.1456 0.0892 0.0923
0.1098 0.1029 ...
..$ X0610030G03Rik.MMT00036912: Factor w/ 129 levels "-
0.000113179536128882",..: 99 78 21 66 94 7 69 26 56 109 ...
..$ Eaf2.MMT00060307 : num [1:129] -0.0538 -0.1524 -0.0871
0.0418 -0.0176 ...
..$ Rdh11.MMT00006408 : num [1:129] 0.00721 -0.22661 -
0.09600 0.07175 -0.00359 ...
..$ B430110G05Rik.MMT00004109 : num [1:129] 0.0851 -0.0417 0.0743
0.0297 0.0891 ...
..$ Slc23a1.MMT00022094 : num [1:129] 0.0608 0.0361 0.2427
0.0946 0.1747 ...
..$ Slc25a1.MMT00054225 : Factor w/ 129 levels "-
0.00024392620252911",..: 114 46 96 8 122 120 49 112 44 119 ...
..$ Gale.MMT00077659 : num [1:129] 0.1226 0.0913 0.0922
0.0357 0.0955 ...
..$ Qdpr.MMT00038017 : num [1:129] -0.00905 0.01112 -
0.02883 -0.05107 -0.11140 ...
..$ X6030440G05Rik.MMT00031386: Factor w/ 129 levels "-
0.00077500008046627",..: 53 40 115 117 112 89 42 11 108 95 ... chr16=1069:1128 s2=scanone(cgp.male.insig1,pheno.col=1) plot(s1,chr=16,gap=20,ylab="LOD score for Insig1 in Male BxH Apoe-Null
Liver",cex.lab=1.5)
# the cM is really approximated here by base position.
2325
# what is wrong here that the correlation itself is so small? cor(datC[,pmatch("Insig1",colnames(datC))],datC[,pmatch("rs3709512",col names(datC))],use="p")
[1] 0.003599376 peak16=pmatch("rs3709512",colnames(datC)) w.insig1= pmatch("Insig1",colnames(datC))
> peak16
[1] 1120
> w.insig1
[1] 1279
> cor(use="p",datC[,chr16],datC[,w.insig1])
2425
[,1] rs3715939.chr16.bp5457456 -0.038911746 rs3722676.chr16.bp6816272 -0.051002411 rs3685246.chr16.bp10132714 -0.046828855 rs4151923.chr16.bp13376351 -0.021823599 rs3673183.chr16.bp14051913 -0.018261217 rs3658139.chr16.bp13856562 -0.021823599 rs3714738.chr16.bp14260417 -0.025887768 rs4165301.chr16.bp22437843 -0.046585753 rs4165379.chr16.bp23356807 -0.046585753 rs4165446.chr16.bp23967238 -0.046585753 rs3682563.chr16.bp27259772 -0.082164767 rs3669767.chr16.bp27403239 -0.082164767 rs4167177.chr16.bp28681533 -0.071728768 rs3704997.chr16.bp31405611 -0.128830675 rs3676973.chr16.bp32137649 -0.129695227 rs3658872.chr16.bp31410563 -0.129695227 rs3682565.chr16.bp33952435 -0.110899608 rs3713911.chr16.bp35293698 -0.110899608 rs4151928.chr16.bp38153087 -0.114690617 rs3688256.chr16.bp38962112 -0.110617844 rs3663871.chr16.bp41598339 -0.123745463 rs3713966.chr16.bp44278213 -0.123745463 rs3723465.chr16.bp45164267 -0.123745463 rs3695839.chr16.bp47600856 -0.133752594 rs3682852.chr16.bp47603941 -0.132559509 rs3705921.chr16.bp49437647 -0.132559509 rs3690619.chr16.bp49749800 -0.132559509 rs4135604.chr16.bp50100799 -0.118264228 rs3719157.chr16.bp51423297 -0.075859317 rs3688027.chr16.bp53189403 -0.071910714 rs3687272.chr16.bp55093657 -0.079566710 rs3702995.chr16.bp54495023 -0.079566710 rs3682166.chr16.bp57870212 -0.054244094 rs3023244.chr.N.bp.N -0.025821140 rs3717690.chr16.bp60213797 -0.025821140 rs3693435.chr16.bp62948612 0.012501515 rs3675347.chr16.bp64453651 0.008875473 rs3687551.chr16.bp64255840 0.008875473 rs3693190.chr16.bp66044270 0.006193595 rs3690033.chr16.bp67022517 -0.011188824 rs3673897.chr16.bp67022232 -0.011188824 rs3672557.chr16.bp71784031 -0.048591256 rs3668777.chr16.bp71265509 -0.059632270 rs3695481.chr16.bp68778145 -0.033162878 rs3721202.chr16.bp73896357 -0.036980430 rs3702474.chr16.bp75398644 -0.035287275 rs4135423.chr16.bp75544468 -0.044996391 rs3704648.chr16.bp76022498 -0.044996391 rs3680665.chr16.bp85801548 -0.034647008 rs3693968.chr16.bp86550297 -0.034647008 rs3724196.chr16.bp87208355 -0.025519703 rs3709512.chr16.bp87794511 0.003599376 rs3674782.chr16.bp88678302 -0.010223085 rs3655083.chr16.bp92448866 -0.007213660 rs3690561.chr16.bp93072498 0.016074347
2525
rs3664755.chr16.bp94869341 0.019900717 rs3664190.chr16.bp94869256 0.019900717 rs3148818.chr16.bp95518659 0.021340642 rs4221288.chr16.bp98489285 0.057309941 rs3722843.chr16.bp99618726 0.050001814
> pm=neo.get.param() pm$A = 1279 pm$B = 1280:1288
# impute medians of missing trait data... pm$rough.and.ready.NA.imputation = TRUE pm$quiet=FALSE pm$run.title=“insig1.downstream.male“ x=neo(datC, pm=pm, snpcols=1:1278) save(x,file=”bxh/male.neo.analysis.insig1.downstreamers.rdat”) date() plot(datC[,pmatch("Insig1",colnames(datC))],datC[,pmatch("rs3709512",co lnames(datC))])
2625