|
发表于
2019-12-6 16:22:26
|
显示全部楼层
本帖最后由 Riven 于 2019-12-6 16:25 编辑
- test2.
- #!/usr/bin/env python
- # -*- coding=utf-8 -*-
- import os
- import gzip
- import re
- import argparse
- from Bio import SeqIO
- parser = argparse.ArgumentParser()
- parser.add_argument('-r', '--ref', help="the reference file path, default:human b37, *.fasta", default="/PUBLIC/database/HUMAN/genome/Human/human_g1k_v37_decoy.fasta")
- parser.add_argument('-c', '--chr', help="specify the chromosome to search", required=True)
- parser.add_argument('-b', '--bases', help="the group of bases to search", required=True)
- parser.add_argument('-o', '--outfile', help="the output file path", default="search_out.xls")
- args = vars(parser.parse_args())
- print(args)
- ref = os.path.abspath(args['ref'].strip())
- chr = args['chr'].strip()
- base_group = args['bases'].strip()
- out = os.path.abspath(args['outfile'].strip())
- def get_base_group_info(ref, chrom, residue, file):
- with open(file, 'w') as out:
- recod_dict = SeqIO.to_dict(SeqIO.parse(ref, "fasta"))
- chr_seq = str(recod_dict[chrom].seq)
- match = re.finditer(residue, chr_seq)
- for i in match:
- tup = i.span()
- start_pos = int(tup[0]) + 1
- end_pos = tup[1]
- # out.write(chrom + ':' + str(start_pos) + '-' + end_pos + '\n')
- out.write(chrom + ':' + str(start_pos) + '-' + str(end_pos) + '\n')
- if __name__ == "__main__":
- get_base_group_info(ref, chr, base_group, out)
复制代码
|
|