Saturday, 9 April 2016

SQL Graph Database Using Continued Fractions

This post is a continuation of a previous post (Continued Fraction Database File System) which used a SQL RDBMS to implement a file system tree using continued fractions. If you want to understand the maths behind the project, read that previous post first and the paper by Dan Hazel which it is based on, Using rational numbers to key nested sets.

In the previous post, each node had one path, and so could only represent trees. In a graph, each node can have multiple paths. Each path can be represented by a list of integers, where each integer is the index of the child beneath it's parent node, for example:

The integer list paths can be used in a continued fraction to calculate a real number value to be used as the primary key for a node path relation. So our database schema looks like this:

This model could be extended with a properties relation to store multiple key-value pairs for each node and path.

The JPA entities for the node and path relations are:

Node.java

package org.adrianwalker.continuedfractions.graph.entity;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import javax.persistence.Basic;
import static javax.persistence.CascadeType.ALL;
import javax.persistence.Column;
import javax.persistence.Entity;
import static javax.persistence.FetchType.LAZY;
import javax.persistence.GeneratedValue;
import static javax.persistence.GenerationType.IDENTITY;
import javax.persistence.Id;
import javax.persistence.NamedQueries;
import javax.persistence.NamedQuery;
import javax.persistence.OneToMany;
import javax.persistence.Table;

@Entity
@Table(name = "node")
@NamedQueries({
  @NamedQuery(name = "tree",
      query = "SELECT DISTINCT n2 "
      + "FROM Node n1, Node n2, NodePath np1, NodePath np2 "
      + "WHERE n1.id = np1.node.id "
      + "AND (np2.id >= np1.id AND np2.id < np1.sid) "
      + "AND n2.id = np2.node.id "
      + "AND n1.id = :id "
      + "ORDER BY n2.id"
  ),
  @NamedQuery(name = "parents",
      query = "SELECT DISTINCT n2 "
      + "FROM Node n1, Node n2, NodePath np1, NodePath np2 "
      + "WHERE n1.id = np1.node.id "
      + "AND n2.id = np2.node.id "
      + "AND (np1.id > np2.id AND np1.id < np2.sid) "
      + "AND np2.hops = np1.hops - 1 "
      + "AND n1.id = :id "
      + "ORDER BY np2.id"
  ),
  @NamedQuery(name = "children",
      query = "SELECT DISTINCT n2 "
      + "FROM Node n1, Node n2, NodePath np1, NodePath np2 "
      + "WHERE n1.id = np1.node.id "
      + "AND n2.id = np2.node.id "
      + "AND (np2.id > np1.id AND np2.id < np1.sid) "
      + "AND np2.hops = np1.hops + 1 "
      + "AND n1.id = :id "
      + "ORDER BY np2.id"
  )})
public class Node implements Serializable {

  private static final long serialVersionUID = 1L;

  @Id
  @GeneratedValue(strategy = IDENTITY)
  @Basic(optional = false)
  @Column(name = "id", nullable = false)
  private Long id;

  @Basic(optional = false)
  @Column(name = "name", nullable = false)
  private String name;

  @OneToMany(fetch = LAZY, cascade = ALL, orphanRemoval = true, mappedBy = "node")
  private List<NodePath> nodePaths;

  public Node() {
  }

  public Long getId() {
    return id;
  }

  public void setId(final Long id) {
    this.id = id;
  }

  public String getName() {
    return name;
  }

  public void setName(final String name) {
    this.name = name;
  }

  public List<NodePath> getNodePaths() {

    if (null == nodePaths) {
      nodePaths = new ArrayList<>();
    }

    return nodePaths;
  }

  public void setNodePaths(final List<NodePath> nodePaths) {
    this.nodePaths = nodePaths;
  }

  @Override
  public int hashCode() {

    int hash = 5;
    hash = 89 * hash + Objects.hashCode(this.id);

    return hash;
  }

  @Override
  public boolean equals(final Object obj) {

    if (this == obj) {
      return true;
    }

    if (obj == null) {
      return false;
    }

    if (getClass() != obj.getClass()) {
      return false;
    }

    return Objects.equals(this.id, ((Node) obj).id);
  }

  @Override
  public String toString() {
    return "Node{" + "id=" + id + ", name=" + name + '}';
  }
}

NodePath.java

package org.adrianwalker.continuedfractions.graph.entity;

import java.io.Serializable;
import java.math.BigDecimal;
import java.util.Objects;
import javax.persistence.Basic;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Index;
import javax.persistence.JoinColumn;
import javax.persistence.ManyToOne;
import javax.persistence.Table;
import javax.persistence.UniqueConstraint;
import org.adrianwalker.continuedfractions.Fraction;

@Entity
@Table(name = "node_path",
    uniqueConstraints = {
      @UniqueConstraint(columnNames = {"id", "sid"})
    },
    indexes = {
      @Index(columnList = "hops", unique = false)
    })
public class NodePath implements Serializable {

  private static final long serialVersionUID = 1L;

  @Id
  @Basic(optional = false)
  @Column(name = "id", nullable = false, precision = (Fraction.SCALE * 2) - 1, scale = Fraction.SCALE)
  private BigDecimal id;

  @Basic(optional = false)
  @Column(name = "sid", nullable = false, precision = (Fraction.SCALE * 2) - 1, scale = Fraction.SCALE)
  private BigDecimal sid;

  @Basic(optional = false)
  @Column(name = "hops", nullable = false)
  private Integer hops;

  @ManyToOne(optional = false)
  @JoinColumn(name = "node_id", referencedColumnName = "id", nullable = false)
  private Node node;

  public NodePath() {
  }

  public BigDecimal getId() {
    return id;
  }

  public void setId(final BigDecimal id) {
    this.id = id;
  }

  public BigDecimal getSid() {
    return sid;
  }

  public void setSid(final BigDecimal sid) {
    this.sid = sid;
  }

  public Integer getHops() {
    return hops;
  }

  public void setHops(final Integer hops) {
    this.hops = hops;
  }

  public Node getNode() {
    return node;
  }

  public void setNode(final Node node) {
    this.node = node;
  }

  @Override
  public int hashCode() {

    int hash = 3;
    hash = 41 * hash + Objects.hashCode(this.id);

    return hash;
  }

  @Override
  public boolean equals(final Object obj) {

    if (this == obj) {
      return true;
    }

    if (obj == null) {
      return false;
    }

    if (getClass() != obj.getClass()) {
      return false;
    }

    return Objects.equals(this.id, ((NodePath) obj).id);
  }

  @Override
  public String toString() {
    return "NodePath{" + "id=" + id + ", sid=" + sid + ", hops=" + hops + '}';
  }
And a simple JPA controller:

Controller.java

package org.adrianwalker.continuedfractions.graph.controller;

import java.util.List;
import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import javax.persistence.Query;
import org.adrianwalker.continuedfractions.graph.entity.Node;
import org.adrianwalker.continuedfractions.graph.entity.NodePath;

public final class Controller {

  private final EntityManagerFactory emf;

  public Controller(final EntityManagerFactory emf) {
    this.emf = emf;
  }

  public EntityManager getEntityManager() {
    return emf.createEntityManager();
  }

  public Node create(final Node node) throws Exception {

    EntityManager em = getEntityManager();

    try {
      begin(em);
      em.persist(node);
      end(em);
    } finally {
      em.close();
    }

    return node;
  }

  public NodePath create(final NodePath nodePath) throws Exception {

    EntityManager em = getEntityManager();

    try {
      begin(em);
      em.persist(nodePath);
      end(em);
    } finally {
      em.close();
    }

    return nodePath;
  }

  public List<Node> tree(final Node node) {

    EntityManager em = getEntityManager();
    Query tree = em.createNamedQuery("tree");
    tree.setParameter("id", node.getId());

    try {
      return tree.getResultList();
    } finally {
      em.close();
    }
  }

  public List<Node> parents(final Node node) {

    EntityManager em = getEntityManager();
    Query children = em.createNamedQuery("parents");
    children.setParameter("id", node.getId());

    try {
      return children.getResultList();
    } finally {
      em.close();
    }
  }

  public List<Node> children(final Node node) {

    EntityManager em = getEntityManager();
    Query children = em.createNamedQuery("children");
    children.setParameter("id", node.getId());

    try {
      return children.getResultList();
    } finally {
      em.close();
    }
  }

  private void begin(final EntityManager em) {
    em.getTransaction().begin();
  }

  private void end(final EntityManager em) {
    em.getTransaction().commit();
  }
}

The Graph class calls the controller to persist Node and NodePath entities. It has methods to add nodes and node paths, to list all the nodes beneath a given node, and methods to get the immediate parents and children of a given node:

Graph.java

package org.adrianwalker.continuedfractions.graph;

import java.math.BigDecimal;
import java.util.List;
import static org.adrianwalker.continuedfractions.Fraction.decimal;
import org.adrianwalker.continuedfractions.graph.controller.Controller;
import org.adrianwalker.continuedfractions.graph.entity.Node;
import org.adrianwalker.continuedfractions.graph.entity.NodePath;
import static org.adrianwalker.continuedfractions.graph.Path.sibling;
import static org.adrianwalker.continuedfractions.Fraction.fraction;

public final class Graph {

  private final Controller controller;

  public Graph(final Controller controller) {

    this.controller = controller;
  }

  public Node addNode(final String name) throws Exception {

    Node node = new Node();
    node.setName(name);

    return controller.create(node);
  }

  public NodePath addPath(final Node node, final int... path) throws Exception {

    NodePath nodePath = new NodePath();
    int[] nvDv = fraction(path);
    BigDecimal id = decimal(nvDv);
    int[] snvSdv = fraction(sibling(path));
    BigDecimal sid = decimal(snvSdv);

    nodePath.setId(id);
    nodePath.setSid(sid);
    nodePath.setHops(path.length);
    nodePath.setNode(node);

    return controller.create(nodePath);
  }

  public List<Node> tree(final Node node) {

    return controller.tree(node);
  }

  public List<Node> parents(final Node node) {

    return controller.parents(node);
  }

  public List<Node> children(final Node node) {

    return controller.children(node);
  }
}

Below is a unit test example of how to use the Graph class to create the graph in the image above:

GraphTest.java

package org.adrianwalker.continuedfractions.graph;

import java.util.Arrays;
import static java.util.stream.Collectors.toList;
import javax.persistence.EntityManagerFactory;
import javax.persistence.Persistence;
import org.adrianwalker.continuedfractions.graph.controller.Controller;
import org.adrianwalker.continuedfractions.graph.entity.Node;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class GraphTest {

  private static EntityManagerFactory emf;

  public GraphTest() {
  }

  @BeforeClass
  public static void setUpClass() {
    emf = Persistence.createEntityManagerFactory("graph");
  }

  @AfterClass
  public static void tearDownClass() {
    emf.close();
  }

  @Before
  public void setUp() {
  }

  @After
  public void tearDown() {
  }

  /*
      A
     /|\
    B | C
     \|/
      D
      |
      E
   */
  @Test
  public void testGraph() throws Exception {

    Controller nc = new Controller(emf);
    Graph hierarchy = new Graph(nc);

    Node a = hierarchy.addNode("A");
    Node b = hierarchy.addNode("B");
    Node c = hierarchy.addNode("C");
    Node d = hierarchy.addNode("D");
    Node e = hierarchy.addNode("E");

    hierarchy.addPath(a, 1);
    hierarchy.addPath(b, 1, 1);
    hierarchy.addPath(c, 1, 2);
    hierarchy.addPath(d, 1, 3);
    hierarchy.addPath(d, 1, 1, 1);
    hierarchy.addPath(d, 1, 2, 1);
    hierarchy.addPath(e, 1, 1, 1, 1);
    hierarchy.addPath(e, 1, 2, 1, 1);
    hierarchy.addPath(e, 1, 3, 1);

    // trees
    Assert.assertEquals(Arrays.asList(new String[]{"A", "B", "C", "D", "E"}),
        hierarchy.tree(a).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"B", "D", "E"}),
        hierarchy.tree(b).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"C", "D", "E"}),
        hierarchy.tree(c).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"D", "E"}),
        hierarchy.tree(d).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"E"}),
        hierarchy.tree(e).stream()
        .map(node -> node.getName())
        .collect(toList()));

    // children
    Assert.assertEquals(Arrays.asList(new String[]{"B", "C", "D"}),
        hierarchy.children(a).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"D"}),
        hierarchy.children(b).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"D"}),
        hierarchy.children(c).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"E"}),
        hierarchy.children(d).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{}),
        hierarchy.children(e).stream()
        .map(node -> node.getName())
        .collect(toList()));

    // parents
    Assert.assertEquals(Arrays.asList(new String[]{}),
        hierarchy.parents(a).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"A"}),
        hierarchy.parents(b).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"A"}),
        hierarchy.parents(c).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"A", "B", "C"}),
        hierarchy.parents(d).stream()
        .map(node -> node.getName())
        .collect(toList()));

    Assert.assertEquals(Arrays.asList(new String[]{"D"}),
        hierarchy.parents(e).stream()
        .map(node -> node.getName())
        .collect(toList()));
  }
}

Source Code