PanTools graph database structure

Please see Fig. 23 for the PanTools graph database. Please use it as a reference for writing Cypher queries and for understanding the structure of the database.

digraph G {
   rankdir=LR;
   node [shape=box];

   subgraph cluster_metadata {
      pangenome [label="pangenome"];
      genome [label="genome"];
      accession [label="accession"];
      phenotype [label="phenotype"];
      annotation [label="annotation"];
      sequence [label="sequence"];

      pangenome -> genome [label="has"];
      genome -> phenotype [label="has_phenotype"];
      genome -> accession [label="has"];
      annotation -> genome [label="annotates"];
      genome -> sequence [label="has"];
   }

   subgraph cluster_nucleotide {
      label="nucleotide layer";

      nucleotide [label="nucleotide"];

      nucleotide -> nucleotide [label="FF"];
      nucleotide -> nucleotide [label="FR"];
      nucleotide -> nucleotide [label="RF"];
      nucleotide -> nucleotide [label="RR"];
   }

   sequence -> nucleotide [label="FF"];
   nucleotide -> sequence [label="FF"];
   nucleotide -> sequence [label="RF"];
   nucleotide -> sequence [label="FR"];
   nucleotide -> sequence [label="RR"];

   subgraph cluster_annotation {
      label="annotation layer";
      gene [label="gene"];
      mRNA [label="mRNA"];
      CDS [label="CDS"];
      exon [label="exon"];

      gene -> mRNA [label="is_parent_of"];
      gene -> mRNA [label="codes_for"];
      mRNA -> CDS [label="is_parent_of"];
      CDS -> mRNA [label="contributes_to"];
      mRNA -> exon [label="is_parent_of"];
      mRNA -> mRNA [label="is_similar_to"];
   }

   homology_group [label="homology_group"];
   variant [label="variant"];
   repeat [label="repeat"];
   synteny [label="synteny"];

   homology_group -> mRNA [label="has_homolog"];
   mRNA -> variant [label="has_variant"];
   repeat -> nucleotide [label="start"];
   repeat -> nucleotide [label="stop"];
   mRNA -> synteny [label="part_of"];
   synteny -> synteny [label="is_syntenic_to"];
   gene -> nucleotide [label="starts"];
   gene -> nucleotide [label="stops"];
   mRNA -> nucleotide [label="starts"];
   mRNA -> nucleotide [label="stops"];
   CDS -> nucleotide [label="starts"];
   CDS -> nucleotide [label="stops"];
   exon -> nucleotide [label="starts"];
   exon -> nucleotide [label="stops"];

   subgraph cluster_functions {
      label="functions layer";

      GO [label="GO"];
      pfam [label="pfam"];
      interpro [label="interpro"];
      tigrfam [label="tigrfam"];

      GO -> GO [label="regulates"];
      GO -> GO [label="negatively_regulates"];
      GO -> GO [label="positively_regulates"];
      GO -> GO [label="is_a"];
      GO -> GO [label="part_of"];
      pfam -> GO [label="is_similar_to"];
      interpro -> GO [label="is_similar_to"];
      tigrfam -> GO [label="is_similar_to"];
      tigrfam -> GO [label="contributes_to"];
   }

   mRNA -> GO [label="has_GO"];
   mRNA -> pfam [label="has_pfam"];
   mRNA -> interpro [label="has_interpro"];
   mRNA -> tigrfam [label="has_tigrfam"];
}

Fig. 23 PanTools graph database schema in neo4j.