22
33import static org .nodes .data .RDF .simplify ;
44
5+ import java .io .BufferedInputStream ;
56import java .io .File ;
7+ import java .io .FileInputStream ;
8+ import java .io .FileNotFoundException ;
9+ import java .io .IOException ;
10+ import java .io .InputStream ;
611import java .util .ArrayList ;
712import java .util .HashMap ;
13+ import java .util .LinkedHashMap ;
814import java .util .List ;
915import java .util .Map ;
1016import java .util .regex .Pattern ;
1117
1218import org .nodes .DTGraph ;
1319import org .nodes .DTLink ;
1420import org .nodes .DTNode ;
21+ import org .nodes .LightUGraph ;
1522import org .nodes .MapDTGraph ;
1623import org .nodes .Node ;
24+ import org .nodes .UGraph ;
25+ import org .nodes .UNode ;
1726import org .openrdf .model .Statement ;
1827import org .openrdf .rio .RDFFormat ;
28+ import org .openrdf .rio .RDFParseException ;
29+ import org .openrdf .rio .RDFParser ;
30+ import org .openrdf .rio .Rio ;
31+ import org .openrdf .rio .helpers .RDFHandlerBase ;
32+ import org .openrdf .rio .turtle .TurtleParser ;
33+ import org .rdfhdt .hdt .exceptions .NotFoundException ;
34+ import org .rdfhdt .hdt .hdt .HDT ;
35+ import org .rdfhdt .hdt .hdt .HDTManager ;
36+ import org .rdfhdt .hdt .triples .IteratorTripleString ;
37+ import org .rdfhdt .hdt .triples .TripleString ;
1938
2039import nl .peterbloem .kit .Functions ;
2140import nl .peterbloem .kit .Global ;
2241
2342public class RDF
2443{
44+ public static MapDTGraph <String , String > readHDT (File file )
45+ throws FileNotFoundException , IOException
46+ {
47+ MapDTGraph <String , String > graph = new MapDTGraph <String , String >();
48+
49+ // Load HDT file. NOTE: Use loadHDT() if you don't need ?P?, ?PO or ??O queries
50+ HDT hdt = HDTManager .loadHDT (
51+ new BufferedInputStream (new FileInputStream (file )), null );
52+
53+ int i = 0 ;
54+ try {
55+ // Search pattern: Empty string means "any"
56+ IteratorTripleString it = hdt .search ("" , "" , "" );
57+ DTNode <String , String > node1 , node2 ;
58+
59+ while (it .hasNext ()) {
60+ TripleString ts = it .next ();
61+
62+ String subject = ts .getSubject ().toString (),
63+ predicate = ts .getPredicate ().toString (),
64+ object = ts .getObject ().toString ();
65+
66+ node1 = graph .node (subject );
67+ node2 = graph .node (object );
68+
69+ if (node1 == null )
70+ node1 = graph .add (subject );
71+
72+
73+ if (node2 == null )
74+ node2 = graph .add (object );
75+
76+ node1 .connect (node2 , predicate );
77+
78+ Functions .dot (i , (int )it .estimatedNumResults ());
79+ i ++;
80+ }
81+ } catch (NotFoundException e )
82+ {
83+ // File must be empty, return empty graph
84+ } finally
85+ {
86+ // IMPORTANT: Free resources
87+ hdt .close ();
88+ }
89+
90+ return graph ;
91+ }
92+
93+
2594 /**
2695 * Reads the given file into a graph.
2796 *
@@ -30,12 +99,22 @@ public class RDF
3099 */
31100 public static MapDTGraph <String , String > read (File file )
32101 {
33- return read (file , null );
102+ return read (file , RDFFormat .RDFXML );
103+ }
104+
105+ public static MapDTGraph <String , String > read (File file , RDFFormat format )
106+ {
107+ return read (file , null , format );
34108 }
35109
36110 public static MapDTGraph <String , String > read (File file , List <String > linkWhitelist )
37111 {
38- RDFDataSet testSet = new RDFFileDataSet (file , RDFFormat .RDFXML );
112+ return read (file , null , RDFFormat .RDFXML );
113+ }
114+
115+ public static MapDTGraph <String , String > read (File file , List <String > linkWhitelist , RDFFormat format )
116+ {
117+ RDFDataSet testSet = new RDFFileDataSet (file , format );
39118
40119 List <Statement > triples = testSet .getStatements (null , null , null , false );
41120
@@ -82,8 +161,9 @@ public static MapDTGraph<String, String> createDirectedGraph(
82161 MapDTGraph <String , String > graph = new MapDTGraph <String , String >();
83162 DTNode <String , String > node1 , node2 ;
84163
85- Global .log ().info ("Constructing graph" );
164+ Global .log ().info ("Constructing graph (size: " + sesameGraph . size ()+ ") " );
86165
166+ int i = 0 ;
87167 for (Statement statement : sesameGraph )
88168 {
89169
@@ -119,6 +199,9 @@ public static MapDTGraph<String, String> createDirectedGraph(
119199 node2 = graph .add (object );
120200
121201 node1 .connect (node2 , predicate );
202+
203+ Functions .dot (i , sesameGraph .size ());
204+ i ++;
122205 }
123206
124207 return graph ;
@@ -180,4 +263,52 @@ public static DTGraph<String, String> simplify(DTGraph<String, String> graph)
180263
181264 return out ;
182265 }
266+
267+ /**
268+ * Reads a simple graph: no self-loops, no multiple edges. Two resources
269+ * have an edge if they are connected in either direction by one or more predicates
270+ *
271+ * @param file
272+ * @return
273+ */
274+ public static UGraph <String > readSimple (File file )
275+ throws IOException
276+ {
277+ RDFFormat format = RDFFormat .forFileName (file .getName ());
278+
279+ InputStream in = new BufferedInputStream (new FileInputStream (file ));
280+ RDFParser parser = Rio .createParser (format );
281+
282+ final UGraph <String > graph = new LightUGraph <String >();
283+ final Map <String , UNode <String >> nodes = new HashMap <String , UNode <String >>();
284+
285+ parser .setRDFHandler (new RDFHandlerBase ()
286+ {
287+ @ Override
288+ public void handleStatement (Statement statement )
289+ {
290+ String subject = statement .getSubject ().toString ();
291+ String object = statement .getObject ().toString ();
292+
293+ if (! nodes .containsKey (subject ))
294+ nodes .put (subject , graph .add (subject ));
295+ if (! nodes .containsKey (object ))
296+ nodes .put (object , graph .add (object ));
297+
298+ UNode <String > subNode = nodes .get (subject );
299+ UNode <String > obNode = nodes .get (object );
300+
301+ if ( (!subNode .connected (obNode )) && subNode .index () != obNode .index () )
302+ subNode .connect (obNode );
303+ }
304+ });
305+
306+ try {
307+ parser .parse (in , "local://" );
308+ } catch (Exception e )
309+ {
310+ throw new RuntimeException ("Error parsing file (" +file .getAbsolutePath ()+")." , e );
311+ }
312+ return graph ;
313+ }
183314}
0 commit comments