<!-- *******************************************************************

     PIR-International Protein Sequence Database (PSD)

     Author:  Bruce C. Orcutt, Ph.D.
              National Biomedical Research Foundation
              3900 Reservoir Road, NW
              Washington, DC 20007
     E-mail:  orcutt@nbrf.georgetown.edu

     Version: version 003 (preliminary version)
     Date:    25-Jan-2002

     Copyright (c) 2000,2002 National Biomedical Research Foundation
     All rights reserved.
******************************************************************** -->

<!-- ProteinDatabase: the root element. -->
<!ELEMENT ProteinDatabase (Database?,ProteinEntry+)>
<!ATTLIST ProteinDatabase id      ID    #REQUIRED
                          release CDATA #IMPLIED
                          date    CDATA #IMPLIED>

<!-- Database: database name information. -->
<!ELEMENT Database (#PCDATA)>   <!-- database creator identifier -->

<!-- ProteinEntry: the root element. -->
<!ELEMENT ProteinEntry (header,protein,organism,reference*,
                        comment*,genetics*,complex*,function*,
                        classification?,keywords?,feature*,
                        summary,sequence)>
<!ATTLIST ProteinEntry id  ID  #REQUIRED>


<!-- header: database information. -->
<!ELEMENT header       (uid,accession*,created_date,seq-rev_date,txt-rev_date)>

<!ELEMENT uid          (#PCDATA)>       <!-- entry identifier -->
<!ELEMENT accession    (#PCDATA)>       <!-- accession number -->
<!ELEMENT created_date (#PCDATA)>       <!-- date (DD-MMM-YYYY) -->
<!ELEMENT seq-rev_date (#PCDATA)>       <!-- date (DD-MMM-YYYY) -->
<!ELEMENT txt-rev_date (#PCDATA)>       <!-- date (DD-MMM-YYYY) -->


<!-- protein: the protein-names. -->
<!ELEMENT protein      (name,alt-name*,contains*)>

<!ELEMENT name         (#PCDATA)>       <!-- protein name -->
<!ATTLIST name         status      (validated|imported|similarity)  #IMPLIED
                       attribution  CDATA       #IMPLIED>
<!ELEMENT alt-name     (#PCDATA)>       <!-- alternate protein name -->
<!ATTLIST alt-name     status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED>
<!ELEMENT contains     (#PCDATA)>       <!-- activity name -->
<!ATTLIST contains     status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED>

<!-- organism: identification of the biological source. -->
<!ELEMENT organism     (source,common?,formal,variety?,note*)>

<!ELEMENT source       (#PCDATA)>       <!-- source name -->
<!ELEMENT common       (#PCDATA)>       <!-- common name -->
<!ELEMENT formal       (#PCDATA)>       <!-- scientific name -->
<!ELEMENT variety      (#PCDATA)>       <!-- variety name -->

<!-- reference -->
<!ELEMENT reference    (refinfo,contents*,note*,accinfo*)>

<!-- refinfo: identification of the literature source. -->
<!ELEMENT refinfo      (authors,citation,volume?,month?,year,pages?,
                       (title|description)?,xrefs?)>
<!ATTLIST refinfo      refid  NMTOKEN   #IMPLIED>

<!ELEMENT authors      (author+|(anonymous,group?))> <!-- list of authors -->
<!ELEMENT author       (#PCDATA)>       <!-- author name -->
<!ELEMENT anonymous    EMPTY>           <!-- anonymous author -->
<!ELEMENT group        (#PCDATA)>       <!-- group or laboratory -->
<!ELEMENT citation     (#PCDATA)>       <!-- citation name -->
<!ATTLIST citation     type  (journal|book|submission|other)  "journal">
<!ELEMENT volume       (#PCDATA)>       <!-- volume number -->
<!ELEMENT month        (#PCDATA)>       <!-- month -->
<!ELEMENT year         (#PCDATA)>       <!-- year -->
<!ELEMENT pages        (#PCDATA)>       <!-- page numbers -->
<!ELEMENT title        (#PCDATA)>       <!-- title text -->
<!ELEMENT contents     (#PCDATA)>       <!-- description of contents -->

<!-- accinfo: identification of the literature source of the sequence;
              the published sequence is identified by the label;
              the link is to the genetics information. -->
<!ELEMENT accinfo      (accession,status*,mol-type*,seq-spec?,
                        xrefs?,exp-source?,note*)>
<!ATTLIST accinfo      link   NMTOKENS  #IMPLIED
                       label  NMTOKEN   #IMPLIED>

<!ELEMENT mol-type     (#PCDATA)>       <!-- "DNA" | "mRNA" | "genomic RNA" |
                                             "nucleic acid" | "protein" -->
<!ELEMENT exp-source   (#PCDATA)>       <!-- experimental source description -->


<!-- comment -->
<!ELEMENT comment      (#PCDATA|uid)*>  <!-- comment text -->
<!ATTLIST comment      status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED
                       link         NMTOKENS    #IMPLIED
                       label        NMTOKEN     #IMPLIED>

<!-- genetics -->
<!ELEMENT genetics     (gene*,xrefs?,map-position?,genome?,mobile-element?,
                        gene-origin?,genetic-code?,start-codon?,introns?,
                        intron-status?,other-product*,note*)>
<!ATTLIST genetics     label  NMTOKEN   #IMPLIED>

<!ELEMENT gene         (db?,uid)>       <!-- gene symbols -->
<!ELEMENT map-position (#PCDATA)>       <!-- map text -->
<!ELEMENT genome       (#PCDATA)>       <!-- "mitochondrion" | "chloroplast" |
                                             "cyanelle" | plasmid" -->
<!ELEMENT mobile-element (#PCDATA)>     <!-- element text -->
<!ELEMENT gene-origin    (#PCDATA)>     <!-- origin text -->
<!ELEMENT genetic-code   (#PCDATA)>     <!-- translation table -->
<!ELEMENT start-codon    (#PCDATA)>     <!-- start codon -->
<!ELEMENT introns        (#PCDATA)>     <!-- intron specification -->
<!ELEMENT intron-status  (#PCDATA)>     <!-- "absent" | "incomplete" -->
<!ELEMENT other-product  (#PCDATA)>     <!-- accession number -->


<!-- complex -->
<!ELEMENT complex      (#PCDATA|uid)*>  <!-- complex description -->
<!ATTLIST complex      status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED
                       link         NMTOKENS    #IMPLIED
                       label        NMTOKEN     #IMPLIED>


<!-- function -->
<!ELEMENT function     (description?,pathway*,note*)>
<!ATTLIST function     label  NMTOKEN   #IMPLIED>

<!ELEMENT pathway      (#PCDATA)>       <!-- pathway name -->
<!ATTLIST pathway      status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED>


<!-- classification -->
<!ELEMENT classification (superfamily+)>

<!ELEMENT superfamily  (#PCDATA)>       <!-- superfamily name -->


<!-- keyword -->
<!ELEMENT keywords     (keyword*)>      <!-- keywords -->

<!ELEMENT keyword      (#PCDATA)>       <!-- keyword -->

<!-- feature -->
<!ELEMENT feature      (feature-type,description?,seq-spec?,status?,note?)>
<!ATTLIST feature      link   NMTOKENS  #IMPLIED
                       label  NMTOKEN   #IMPLIED>

<!ELEMENT feature-type (#PCDATA)>       <!-- "domain" | "product" | "region" |
                                        "cleavage-site" | "cross-link" |
                                        "disulfide-bonds" | "active-site" |
                                        "binding-site" | "inhibitory-site" |
                                        "modified-site" -->


<!-- summary: summarizes the sequence properties. -->
<!ELEMENT summary      (length,type,status?)>

<!ELEMENT length       (#PCDATA)>       <!-- length of aa sequence -->
<!ELEMENT type         (#PCDATA)>       <!-- "fragment" | "fragments" |
                                        "complete" -->


<!-- sequence: the amino acid sequence. -->
<!ELEMENT sequence     (#PCDATA)>       <!-- amino acid symbols and
                                             punctuation -->


<!-- General elements. Elements that can be contained in several
     other elements. -->

<!ELEMENT xrefs        (xref+)>         <!-- cross-references -->
<!ELEMENT xref         (db,uid)>        <!-- a cross-reference -->
<!ELEMENT db           (#PCDATA)>       <!-- database tag -->
<!ELEMENT note         (#PCDATA)>       <!-- note text -->
<!ATTLIST note         status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED>
<!ELEMENT description  (#PCDATA)>       <!-- description -->
<!ATTLIST description  status      (validated)  #IMPLIED
                       attribution  CDATA       #IMPLIED>
<!ELEMENT seq-spec     (#PCDATA)>       <!-- sequence specification -->

<!-- status element. The valid set of values depends on the parent
     element. -->
<!-- in an accinfo elment: "preliminary" | "nucleic acid sequence not shown" |
     "translation not shown" | "protein sequence not shown" |
     "significant sequence differences" | "translated from GB/EMBL/DDBJ" |
     "not compared with conceptual translation" | "unencoded polypeptide" |
     "conceptual translation of pseudogene" -->
<!-- in a feature element: "experimental" | "predicted" | "atypical" |
     "absent" -->
<!-- in a summary element: "tentative" -->
<!ELEMENT status       (#PCDATA)>



