'

:

: [1]   Word-176927.doc : 1


1 '

1.

2

3

4

2

1

2

3

1 BioJava

4 BIOJAVA

1

2

3

4 i

5 i i

1



Ѹ ' - - . ' . , . ' '. ' . :

:

  • , '
  • '

' Protein Data Bank (PDB) [1]. . - . ָ .

, . :

  • - -
  • '

, , . - , NP-, . - i , , . .

1 '


1.


, ' . :

' ': , , . - -. , , ' , i.

' , . , . ָ.

: . i , i , .

, i, i , .


2


. , . . , i , . , , , .

: i, i, , ( - , , , α-). 1.1 [2].


1.1.

i

GenBank

i

EMBL

NDB

i

SWISS-PROT

PIR

PDB

MMDB

LIGAND


PROSITE


ProDOM


, i , . , . , LifeSeq Incyte Genomics, Inc. i , .

, , ( , ).


3


, , i , . ' ,   . i , , i i i, , . , .

. , i . , . i , . , . i .

, . . . , .

, . i . , i , , , .

' Accelrys.[3] . , i , ,  ' . i , , . 1.1 .








1.1 - Accelrys


i .  NCBIs Web-base Map Viewer[4]. -, NCBI. i , i i. , NCBI. , i .


1.2. 8- Homo Sapiens NCBIs Map Viewer

1.2 , NCBIs Map Viewer, Homo Sapiens.


4


. , , , , , ( , , ) , .

. , i i, i, i . , , .

, . . , , .  .

. , . Entrez (NCBI)[5]. , NCBI 1.2.[2]


1.2 Entrez

PubMed

i PIR, SWISS-PROT, PDB, EMBL

i GenBank, EMBL, DDJB ( )

PDB

OMIM

NCBI

NCBI


, . , PubMed , .

i Entrez: , , , Networked Entrez, -. Networked Entrez NCBI -. , - , Network Entrez , . i . i i , . .

2


1


- i . i , i , i , i . , i , , .

i - , i , . - i (i ) i .  , i i, i i.

:

i. i i ATTCAGTGCT ATTGCT. . :

i - * i - * i .

2:


ATTCAGTGCT

ATT----GCT


, i, - . i , , . , . i , . -.

, i. . i . , i, . .

: , , , , , , . , .

- i . i. : BLASTN, BLASTX, BALSA. : BLASTP, Smith-Waterman, PHI-BLAST.

BLASTN [6] . -. 2.1 i . BLASTP -.


2.1, i BLASTN


2


. : . i . '. , , , .

, . . ' PDB. 50000 .  . 2.2 i - 1TVF PDB. PDB SCOP ( ), . ' . , .


2.2. -


' NP-. , , , .

' MaxSub, GDT, 3dSearch, TM-align, DALI. i i ' , ' ( , i ), ' , , .

, , . 3-Dimesional Structural Superposition (3DSS)[7], - i PDB. . . i PDB PDB-. . , , , . 2.3 ( 1SDB i 1HOE).


2.3. 3DSS


, , MATRAS. . , , , , i PDB. , , . i , , Java Jmol. 2.4 ( 1MDB i 4HHB)


2.4. MATRAS


3


, . . , . () . i . i BioJava [2] BioPerl.


1 BioJava


BioJava 1999 Java. Java . . LGPL, [2]. i, 10 :

  1. BLAST
  2. / i
  3. /
  4. i

Java 1.5 , i i BioJava. , i . i .

. . , i . , i (), (), () (). 20 ( 20 ). , i . i org.biojava.bio.symbol org.biojava.bio.seq.

,   . : PDB, GenBank, EMBL, FASTA, SWISSPROT. , . . , , i . .

i i i . i org.biojava.bio.dp org.biojava.bio.alignment. -, -, -. , i, i .

i BioJava i . i org.biojava.bio.gui . , BioJava , Java Jmol.

, i . , BioSQL. , Python, Java, Ruby, Perl. : PostgreSQL, MySQL, Oracle, HSQLDB.

, BioJava , , BioRuby, BioPerl, BioPython. i , i , i i i , .

4 BIOJAVA


1


: , i

: i .

i i BioJava.


2


, , , . , jar ,   . Java Java Development Kit (JDK) - . , , . BioJava 1.6.1 JDK 1.5 . , CLASSPATH. , , Java (Java Virtual Machine). :


export CLASSPATH=/home/kotuk/biojava-live.jar:/home/kotuk/bytecode.jar:

                                      /home/kotuk/commons-cli.jar:

                                      /home/kotuk/commons-collections-2.1.jar:

                                      /home/kotuk/commons-dbcp-1.1.jar:

                                      /home/kotuk/commons-pool-1.1.jar:.


, , , BioJava. Apache Jakarta Commons. - , BioJava ' , i . ' , .

i Java, (Integrated Development Environment, IDE). , , , . IDE Java ' Eclipse, IntelliJ Idea, NetBeans. IntelliJ Idea [9]. ' ,   .

. Java-, - i , . BioJavaProgram. i 4.1.












4.1. IDE Idea


3


i - -.  i i: , . , BLAST. , BioJava i . , i 1.5 .

, , , , . , . ( ) , . ' , i .

(, , ), i, .


4 i


. ' SequenceAlignment, -. ( ' SubstitutionMatrix) NUC-4.4 ( ).   ftp://ftp.ncbi.nlm.nih.gov/blast/matrices. , , , . ' aligner . :

SubstitutionMatrix matr = new SubstitutionMatrix(DNATools.getDNA(), new File(MATRIX_PATH));

SequenceAlignment aligner = new NeedlemanWunsch(0, 4, 2, 2, 2, matr);

getSequence, , i. , getBestSeq, aligner. i , . .

'- -. - , ' . , .


5 i i


i 16 i . : NCBI, i {A, C, G, T}, , , . i 220 720 . i /home/skhamenk/dna, SEQUENCE_PATH. .

: -, -. , i. , - i, - .

i  , : i   BG560803 D104   BG560804 D106 . -68.0 ' , , i i . .

, i BG560814 D221 BG560810 D215 . 699 398 . 764 i i.

BioJava getAlignmentString(). i . 4.2. - i O(m * n), m i n - i . .











4.2 i


, i . i Java BioJava. 1.


, . , . , , i , , i , .

, . i . , . , , -   .

i . , .

BioJava. i . , .

BioJava i . , .

, i i . ' ,   .


[1] Protein Data Bank [ ] . - : http://www.rcsb.org . - : 14.01.2009

[2] Bergeron B. Bioinformatics computing. 1st ed. Upper Saddle River, NJ: Prentice Hall PTR, 2002, 395p.

[3] Aceleryss [ ] . - : http://www.rcsb.org . - : 14.01.2009

[4] NCBI Map Viewer [ ] . - : http://www.ncbi.nlm.nih.gov/projects/mapview/ . - : 14.01.2009

[5]NCBI Entrez [ ] . - : Entrez/ . - : 14.01.2009

[6]BLASTN [ ] . - : Entrez/ . - : 14.01.2009 [7]3DSS[ ] . - : 3dss/ . - : 14.01.2009

[8]BioJava[ ] . - : . - : 14.01.2009

[9]IntelliJ Idea[ ] . - : http://www.jetbrains.com/idea/ . - : 14.01.2009

1


BioJavaProgram

import org.biojava.bio.symbol.IllegalSymbolException;

import org.biojava.bio.seq.DNATools;

import org.biojava.bio.seq.Sequence;

import org.biojava.bio.alignment.SubstitutionMatrix;

import org.biojava.bio.alignment.SequenceAlignment;

import org.biojava.bio.alignment.NeedlemanWunsch;

import org.biojava.bio.alignment.SmithWaterman;


import java.io.*;

import java.util.List;

import java.util.ArrayList;


/**

* @author Siarhei Khamenka

*/

public class BioJavaProgram {


    public static final String MATRIX_PATH = "/home/kotuk/NUC44.MAT";

    public static final String SEQUENCES_PATH = "/home/kotuk/fasta/";


    static Sequence getSequence(String fileName) {

        try {

            BufferedReader br = new BufferedReader(new FileReader(SEQUENCES_PATH + fileName));

            String name = br.readLine(), st, seq = "";

            while ((st = br.readLine()) != null) {

                seq += st;

            }

            br.close();

            return DNATools.createDNASequence(seq, name);

        } catch (IOException e) {

            e.printStackTrace();

        } catch (IllegalSymbolException e) {

            e.printStackTrace();

        }

        return null;

    }


    static List<Sequence> getBestSeq(SequenceAlignment aligner) throws Exception {

        String[] seqFiles = new File(SEQUENCES_PATH).list();

        Sequence bestQuery = null, bestTarget = null;

        double maxRes = Double.MIN_VALUE;

        for (int i = 0; i < seqFiles.length; i++) {

            Sequence query = getSequence(seqFiles[i]);

            for (int j = i + 1; j < seqFiles.length; j++) {

                Sequence target = getSequence(seqFiles[j]);

                double res = aligner.pairwiseAlignment(query, target);

                if (res > maxRes) {

                    bestQuery = query;

                    bestTarget = target;

                }

            }

        }

        List<Sequence> res = new ArrayList();

        res.add(bestQuery);

        res.add(bestTarget);

        return res;

    }


    public static void main(String[] args) throws Exception {

        SubstitutionMatrix matr = new SubstitutionMatrix(DNATools.getDNA(), new File(MATRIX_PATH));

        SequenceAlignment aligner = new NeedlemanWunsch(0, 4, 2, 2, 2, matr);

        List<Sequence> bestNW = getBestSeq(aligner);

        aligner.pairwiseAlignment(bestNW.get(0), bestNW.get(1));

        System.out.println("Alignment Needleman-Wunsch : " + aligner.getAlignmentString());

        aligner = new SmithWaterman(0, 4, 2, 2, 2, matr);

        List<Sequence> bestSW = getBestSeq(aligner);

        aligner.pairwiseAlignment(bestSW.get(0), bestSW.get(1));

        System.out.println("Alignment Smith-Woterman : " + aligner.getAlignmentString());

    }

}

: 1