I have an sample filename file:
chr7 149601 MERGED_DEL_2_39754 T . 141.35 . AC=0;AF=0.00;AN=2;DP=37;MQ=37.00;MQ0=0;1000gALT=<DEL>;AF1000g=0.09;AFR_AF=0.01;AMR_AF=0.03;ASN_AF=0.27;EUR_AF=0.04;TS=HPGOM;TSseq=T,T,G,T,T;CAnc=T;GAnc=T;OAnc=T;mSC=0.000;GRP=-2.16;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:37:99:0,111,1458:0,0:0,0:0,0:18,18:0
chr7 149616 rs190051229 C . 108.65 . AC=0;AF=0.00;AN=2;DP=35;MQ=37.00;MQ0=0;1000gALT=T;AF1000g=0.00;ASN_AF=0.01;CpG;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.000;GRP=-2.15;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:35:78.65:0,79,1305:0,0:17,17:0,0:0,0:0
chr7 149628 rs3814456 A . 129.31 . AC=0;AF=0.00;AN=2;DP=37;MQ=37.00;MQ0=0;1000gALT=G;AF1000g=0.14;AFR_AF=0.16;AMR_AF=0.07;ASN_AF=0.27;EUR_AF=0.06;TS=HPGOM;TSseq=A,A,A,A,A;CAnc=A;GAnc=A;OAnc=A;mSC=0.000;GRP=-2.23;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:37:99:0,99,1290:14,22:0,0:0,0:0,0:0
chr7 149634 rs146001818 G T 1375.63 . AC=2;AF=1.00;AN=2;BaseQRankSum=0.456;DP=39;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=0.9997;MQ=37.00;MQ0=0;MQRankSum=1.641;QD=35.27;ReadPosRankSum=1.459;1000gALT=T;AF1000g=0.01;AFR_AF=0.01;AMR_AF=0.01;EUR_AF=0.03;TS=HPGOM;TSseq=G,G,G,G,G;CAnc=G;GAnc=G;OAnc=G;mSC=0.001;GRP=0.0686;Map20=1;ANN=T|upstream_gene_variant|MODIFIER|LOC100507642|LOC100507642|transcript|NR_108064.1|Noncoding||n.-1G>T|||||84|,T|upstream_gene_variant|MODIFIER|LOC100507642|LOC100507642|transcript|NR_108065.1|Noncoding||n.-1G>T|||||84|,T|intergenic_region|MODIFIER|LOC100507642|LOC100507642|intergenic_region|LOC100507642||||||||| GT:DP:GQ:PL:A:C:G:T:IR 1/1:39:74.36:1409,74,0:0,0:0,0:0,1:15,22:0
chr7 149645 rs112562180 C . 165.42 . AC=0;AF=0.00;AN=2;DP=46;MQ=37.00;MQ0=0;1000gALT=A;AF1000g=0.02;AFR_AF=0.03;AMR_AF=0.03;EUR_AF=0.02;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.000;GRP=-1.93;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:46:99:0,135,1758:0,0:22,22:0,0:1,0:0
chr7 149659 rs79606188 T . 195.53 . AC=0;AF=0.00;AN=2;DP=55;MQ=37.00;MQ0=0;1000gALT=A;AF1000g=0.02;AFR_AF=0.07;AMR_AF=0.01;TS=HPGOM;TSseq=T,T,T,T,G;CAnc=T;GAnc=T;OAnc=T;mSC=0.005;GRP=0.0203;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:55:99:0,166,2189:0,0:0,0:0,0:26,28:0
chr7 149724 rs193238495 C . 216.56 . AC=0;AF=0.00;AN=2;DP=62;MQ=37.18;MQ0=0;1000gALT=T;AF1000g=0.00;AFR_AF=0.01;CpG;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.000;GRP=-0.139;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:62:99:0,187,2385:0,0:37,24:0,0:0,0:0
chr7 149765 rs3814455 C . 198.52 . AC=0;AF=0.00;AN=2;DP=56;MQ=37.73;MQ0=0;1000gALT=T;AF1000g=0.54;AFR_AF=0.35;AMR_AF=0.60;ASN_AF=0.31;EUR_AF=0.79;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.000;GRP=-0.494;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:56:99:0,169,2174:0,0:22,32:0,0:0,0:0
chr7 149785 rs185668085 C . 192.52 . AC=0;AF=0.00;AN=2;DP=54;MQ=37.76;MQ0=0;1000gALT=G;AF1000g=0.01;ASN_AF=0.04;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.002;GRP=-0.216;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:54:99:0,163,2135:0,0:19,33:0,0:0,0:0
chr7 149825 rs189449059 C . 156.38 . AC=0;AF=0.00;AN=2;DP=42;MQ=37.71;MQ0=0;1000gALT=T;AF1000g=0.00;TS=HPGOM;TSseq=C,C,C,C,-;CAnc=C;GAnc=C;OAnc=C;mSC=0.000;GRP=0.693;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:42:99:0,126,1609:0,0:17,24:0,0:0,0:0
chr7 149863 . G A 407.49 . AC=1;AF=0.50;AN=2;BaseQRankSum=-1.315;DP=37;Dels=0.00;FS=1.341;HRun=1;HaplotypeScore=1.9995;MQ=37.00;MQ0=0;MQRankSum=-0.201;QD=11.01;ReadPosRankSum=1.469;TS=HPGOM;TSseq=G,G,G,G,G;CAnc=G;GAnc=G;OAnc=G;mSC=0.000;GRP=-1.5;Map20=1;ANN=A|non_coding_exon_variant|MODIFIER|LOC100507642|LOC100507642|transcript|NR_108064.1|Noncoding|1/3|n.146G>A||||||,A|non_coding_exon_variant|MODIFIER|LOC100507642|LOC100507642|transcript|NR_108065.1|Noncoding|1/2|n.146G>A|||||| GT:DP:GQ:PL:A:C:G:T:IR 0/1:37:99:437,0,759:6,8:0,0:13,9:0,0:0
chr7 149880 rs115127983 C . 108.24 . AC=0;AF=0.00;AN=2;DP=26;MQ=37.00;MQ0=0;1000gALT=G;AF1000g=0.04;AFR_AF=0.15;AMR_AF=0.02;CpG;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.003;GRP=-1.24;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:26:78.25:0,78,1029:0,0:15,10:0,0:0,0:0
chr7 150067 rs181041230 G . 138.34 . AC=0;AF=0.00;AN=2;DP=36;MQ=38.37;MQ0=0;1000gALT=A;AF1000g=0.00;AFR_AF=0.01;TS=HPGOM;TSseq=G,G,G,G,G;CAnc=G;GAnc=G;OAnc=G;mSC=0.005;GRP=0.119;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:36:99:0,108,1425:0,0:0,0:16,19:0,0:0
chr7 150253 rs28397846 A . 159.4 . AC=0;AF=0.00;AN=2;DP=43;MQ=39.05;MQ0=0;1000gALT=G;AF1000g=0.03;AFR_AF=0.14;AMR_AF=0.02;TS=HPGOM;TSseq=A,A,A,-,G;CAnc=A;GAnc=A;OAnc=A;mSC=0.000;GRP=-2.18;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:43:99:0,129,1687:24,19:0,0:0,0:0,0:0
chr7 150280 rs139905037 A . 159.4 . AC=0;AF=0.00;AN=2;DP=45;MQ=38.96;MQ0=0;1000gALT=G;AF1000g=0.00;ASN_AF=0.01;TS=HPGOM;TSseq=A,A,A,-,-;CAnc=A;GAnc=A;OAnc=A;mSC=0.000;GRP=-0.168;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:45:99:0,129,1682:19,25:0,1:0,0:0,0:0
chr7 150353 rs75914010 A . 162.42 . AC=0;AF=0.00;AN=2;DP=45;MQ=37.00;MQ0=0;1000gALT=T;AF1000g=0.02;AFR_AF=0.03;AMR_AF=0.02;EUR_AF=0.02;TS=HPGOM;TSseq=A,A,A,A,A;CAnc=A;GAnc=A;OAnc=A;mSC=0.000;GRP=-0.647;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:45:99:0,132,1739:21,24:0,0:0,0:0,0:0
chr7 150356 rs185358707 C . 113.39 . AC=0;AF=0.00;AN=2;DP=43;MQ=37.00;MQ0=0;1000gALT=T;AF1000g=0.00;CpG;TS=HPGOM;TSseq=C,C,C,C,C;CAnc=C;GAnc=C;OAnc=C;mSC=0.000;GRP=-1.59;Map20=1 GT:DP:GQ:PL:A:C:G:T:IR 0/0:43:83.39:0,83,1538:0,1:19,21:0,0:2,0:0
My goal is to save all lines that are within a specified range. This is what I have so far.
#!/usr/bin/env python
import sys
file=open('filename')
sys.stdout=open('mega1.txt', 'w')
for line in file:
fields = line.strip().split()
chrm = fields[0]
pos = int(fields[1])
id1 = fields[2]
if id1 in range(149601, 1149601):
print line
I am not sure why it continues to run.
For this samplefile name, all the lines will be saved into the new mega1.txt file as the values in column2 all fit the range.
Aucun commentaire:
Enregistrer un commentaire