1818 */
1919package ubic .basecode .ontology .search ;
2020
21- import java .util .ArrayList ;
22- import java .util .Collection ;
23- import java .util .HashSet ;
24- import java .util .List ;
25- import java .util .Set ;
26- import java .util .regex .Matcher ;
27- import java .util .regex .Pattern ;
28-
29- import org .apache .commons .lang3 .StringEscapeUtils ;
30- import org .apache .commons .lang3 .StringUtils ;
31- import org .apache .commons .lang3 .exception .ExceptionUtils ;
32- import org .apache .commons .lang3 .time .StopWatch ;
33- import org .apache .lucene .queryParser .QueryParser .Operator ;
34- import org .slf4j .Logger ;
35- import org .slf4j .LoggerFactory ;
36-
3721import com .hp .hpl .jena .ontology .Individual ;
3822import com .hp .hpl .jena .ontology .OntClass ;
3923import com .hp .hpl .jena .ontology .OntModel ;
4024import com .hp .hpl .jena .rdf .model .NodeIterator ;
4125import com .hp .hpl .jena .rdf .model .RDFNode ;
4226import com .hp .hpl .jena .shared .JenaException ;
43- import com .hp .hpl .jena .sparql .ARQException ;
27+ import org .apache .commons .lang3 .StringUtils ;
28+ import org .apache .commons .lang3 .time .StopWatch ;
29+ import org .apache .commons .text .StringEscapeUtils ;
30+ import org .apache .lucene .queryParser .QueryParser .Operator ;
31+ import org .slf4j .Logger ;
32+ import org .slf4j .LoggerFactory ;
33+ import ubic .basecode .ontology .model .*;
4434
45- import ubic .basecode .ontology .model .OntologyIndividual ;
46- import ubic .basecode .ontology .model .OntologyIndividualImpl ;
47- import ubic .basecode .ontology .model .OntologyResource ;
48- import ubic .basecode .ontology .model .OntologyTerm ;
49- import ubic .basecode .ontology .model .OntologyTermImpl ;
35+ import java .util .*;
36+ import java .util .regex .Matcher ;
37+ import java .util .regex .Pattern ;
5038
5139/**
52- * @author pavlidis
53- *
40+ * @author pavlidis
41+ *
5442 */
5543public class OntologySearch {
5644
5745 // Lucene cannot properly parse these characters... gives a query parse error.
5846 // OntologyTerms don't contain them anyway
5947 private final static char [] INVALID_CHARS = { ':' , '(' , ')' , '?' , '^' , '[' , ']' , '{' , '}' , '!' , '~' , '"' , '\'' };
6048
61- private static Logger log = LoggerFactory .getLogger ( OntologySearch .class );
49+ private static final Logger log = LoggerFactory .getLogger ( OntologySearch .class );
6250
6351 /**
6452 * Find classes that match the query string. Obsolete terms are not returned.
6553 *
6654 * @param model that goes with the index
6755 * @param index to search
6856 * @param queryString
69- * @return Collection of OntologyTerm objects
57+ * @return Collection of OntologyTerm objects
7058 */
71- public static Collection <OntologyTerm > matchClasses ( OntModel model , SearchIndex index , String queryString ) {
59+ public static Collection <OntologyTerm > matchClasses ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
7260
7361 Set <OntologyTerm > results = new HashSet <>();
7462 NodeIterator iterator = runSearch ( index , queryString );
7563
76- while ( iterator != null && iterator .hasNext () ) {
64+ while ( iterator .hasNext () ) {
7765 RDFNode r = iterator .next ();
7866 r = r .inModel ( model );
79- if ( log .isDebugEnabled () ) log . debug ( "Search results: " + r );
80- if ( r .isURIResource () ) {
67+ log .debug ( "Search results: {}." , r );
68+ if ( r .isURIResource () && r . canAs ( OntClass . class ) ) {
8169 try {
82-
83- if ( !r .canAs ( OntClass .class ) ) {
84- if ( log .isDebugEnabled () )
85- log .debug ( "Unable to convert jena resource " + r
86- + " to OntClass.class, skipping. Resource: " + r .toString () );
87- continue ;
88- }
89-
9070 OntClass cl = r .as ( OntClass .class );
9171 OntologyTermImpl impl2 = new OntologyTermImpl ( cl );
9272 if ( impl2 .isTermObsolete () ) continue ;
9373 results .add ( impl2 );
94- if ( log .isDebugEnabled () ) log .debug ( impl2 .toString () );
95- } catch ( ARQException e ) {
96- throw new RuntimeException ( e .getCause () );
74+ log .debug ( "{}" , impl2 );
9775 } catch ( JenaException e ) {
98- throw new RuntimeException ( e .getCause () );
99- } catch ( Exception e ) {
100- log .error ( ExceptionUtils .getStackTrace ( e ), e );
76+ throw new OntologySearchJenaException ( String .format ( "Failed to convert Jena resource %s to %s." , r , OntClass .class ), queryString , e );
10177 }
10278 }
103-
10479 }
80+
10581 return results ;
10682 }
10783
@@ -111,12 +87,12 @@ public static Collection<OntologyTerm> matchClasses( OntModel model, SearchIndex
11187 * @param model that goes with the index
11288 * @param index to search
11389 * @param queryString
114- * @return Collection of OntologyTerm objects
90+ * @return Collection of OntologyTerm objects
11591 */
116- public static Collection <OntologyIndividual > matchIndividuals ( OntModel model , SearchIndex index , String queryString ) {
92+ public static Collection <OntologyIndividual > matchIndividuals ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
11793
11894 Set <OntologyIndividual > results = new HashSet <>();
119- NodeIterator iterator = null ;
95+ NodeIterator iterator ;
12096
12197 queryString = queryString .trim ();
12298
@@ -127,53 +103,34 @@ public static Collection<OntologyIndividual> matchIndividuals( OntModel model, S
127103 if ( lastWordLength > 1 ) {
128104 try { // Use wildcard search.
129105 iterator = runSearch ( index , queryString + "*" );
130- } catch ( ARQException e ) { // retry without wildcard
131- log .info ( "Caught " + e + " caused by " + e .getCause () + " reason " + e .getMessage ()
132- + ". Retrying search without wildcard." );
133- iterator = runSearch ( index , queryString );
106+ } catch ( OntologySearchJenaException e ) { // retry without wildcard
107+ log .warn ( "Failed to perform search with wildcard. Retrying search without wildcard." , e );
108+ try {
109+ iterator = runSearch ( index , queryString );
110+ } catch ( OntologySearchJenaException e1 ) {
111+ throw new RetryWithoutWildcardFailedException ( "Failed to search while retrying without wildcard." , queryString , e .getCause (), e1 .getCause () );
112+ }
134113 }
135114 } else {
136115 iterator = runSearch ( index , queryString );
137116 }
138117
139- while ( iterator != null && iterator .hasNext () ) {
118+ while ( iterator .hasNext () ) {
140119 RDFNode r = iterator .next ();
141120 r = r .inModel ( model );
142- if ( log .isDebugEnabled () ) log . debug ( "Search results: " + r );
143- if ( r .isResource () ) {
121+ log .debug ( "Search results: {}" , r );
122+ if ( r .isResource () && r . canAs ( Individual . class ) ) {
144123 try {
145-
146- if ( !r .canAs ( Individual .class ) ) {
147- if ( log .isDebugEnabled () )
148- log .debug ( "Unable to convert jena resource " + r
149- + " to Individual.class, skipping. Resource: " + r .toString () );
150- continue ;
151- }
152-
153124 Individual cl = r .as ( Individual .class );
154125 OntologyIndividual impl2 = new OntologyIndividualImpl ( cl );
155126 results .add ( impl2 );
156- if ( log .isDebugEnabled () ) log .debug ( impl2 .toString () );
157- } catch ( ARQException e ) {
158- throw new RuntimeException ( e .getCause () );
127+ log .debug ( "{}" , impl2 );
159128 } catch ( JenaException je ) {
160-
161- log .error ( "Trying again: " + je , je );
162- try {
163- Individual cl = r .as ( Individual .class );
164- OntologyIndividual impl2 = new OntologyIndividualImpl ( cl );
165- results .add ( impl2 );
166-
167- } catch ( Exception e ) {
168- log .error ( "Second attempt failed: " + e , e );
169- }
170-
171- } catch ( Exception e ) {
172- log .error ( ExceptionUtils .getStackTrace ( e ), e );
129+ throw new OntologySearchJenaException ( String .format ( "Failed to convert Jena resource %s to %s." , r , Individual .class ), queryString , je );
173130 }
174131 }
175-
176132 }
133+
177134 return results ;
178135
179136 }
@@ -185,9 +142,9 @@ public static Collection<OntologyIndividual> matchIndividuals( OntModel model, S
185142 * @param model that goes with the index
186143 * @param index to search
187144 * @param queryString
188- * @return Collection of OntologyResource objects
145+ * @return Collection of OntologyResource objects
189146 */
190- public static Collection <OntologyResource > matchResources ( OntModel model , SearchIndex index , String queryString ) {
147+ public static Collection <OntologyResource > matchResources ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
191148
192149 Set <OntologyResource > results = new HashSet <>();
193150 NodeIterator iterator = null ;
@@ -201,69 +158,47 @@ public static Collection<OntologyResource> matchResources( OntModel model, Searc
201158 if ( lastWordLength > 1 ) {
202159 try { // Use wildcard search.
203160 iterator = runSearch ( index , queryString + "*" );
204- } catch ( ARQException e ) { // retry without wildcard
205- log .error ( "Caught " + e + " caused by " + e .getCause () + " while searching " + model + " for "
206- + queryString + ". Retrying search without wildcard." );
207- iterator = runSearch ( index , queryString );
161+ } catch ( OntologySearchJenaException e ) { // retry without wildcard
162+ // retry without wildcard
163+ log .warn ( "Failed to search in {}. Retrying search without wildcard." , model , e );
164+ try {
165+ iterator = runSearch ( index , queryString );
166+ } catch ( OntologySearchJenaException e1 ) {
167+ throw new RetryWithoutWildcardFailedException ( "Failed to search while retrying without wildcard." , queryString , e .getCause (), e1 .getCause () );
168+ }
208169 }
209170 } else {
210- try {
211- iterator = runSearch ( index , queryString );
212- } catch ( ARQException e ) { // retry without wildcard
213- log .error ( "Caught " + e + " caused by " + e .getCause () + " while searching " + model + " for "
214- + queryString + ". Retrying search without wildcard." );
215- throw e ;
216- }
171+ iterator = runSearch ( index , queryString );
217172 }
218173
219- while ( iterator != null && iterator .hasNext () ) {
174+ while ( iterator .hasNext () ) {
220175 RDFNode r = iterator .next ();
221176 r = r .inModel ( model );
222- if ( log .isDebugEnabled () ) log . debug ( "Search results: " + r );
223- if ( r .isURIResource () ) {
177+ log .debug ( "Search results: {}." , r );
178+ if ( r .isURIResource () && r . canAs ( OntClass . class ) ) {
224179 try {
225-
226- if ( !r .canAs ( OntClass .class ) ) {
227- if ( log .isDebugEnabled () )
228- log .debug ( "Unable to convert jena resource resource " + r
229- + " to OntClass.class, skipping. Resource: " + r .toString () );
230- continue ;
231- }
232-
233180 OntClass cl = r .as ( OntClass .class );
234181 OntologyTermImpl impl2 = new OntologyTermImpl ( cl );
235182 if ( impl2 .isTermObsolete () ) continue ;
236183 results .add ( impl2 );
237184 if ( log .isDebugEnabled () ) log .debug ( impl2 .toString () );
238185 } catch ( JenaException e ) {
239186 // these are completely uninformative exceptions at the moment.
240- log .error ( e .getMessage (), e );
241- // throw new RuntimeException( e.getCause() );
242- } catch ( Exception e ) {
243- log .error ( e .getMessage (), e );
187+ throw new OntologySearchJenaException ( e .getMessage (), queryString , e );
244188 }
245- } else if ( r .isResource () ) {
189+ } else if ( r .isResource () && r . canAs ( Individual . class ) ) {
246190 try {
247-
248- if ( !r .canAs ( Individual .class ) ) {
249- if ( log .isDebugEnabled () )
250- log .debug ( "Unable to convert jena resource resource " + r
251- + " to Individual.class, skipping. Resource: " + r .toString () );
252- continue ;
253- }
254-
255191 Individual cl = r .as ( Individual .class );
256192 OntologyIndividual impl2 = new OntologyIndividualImpl ( cl );
257193 results .add ( impl2 );
258194 if ( log .isDebugEnabled () ) log .debug ( impl2 .toString () );
259195 } catch ( JenaException e ) {
260196 // these are completely uninformative exceptions at the moment.
261- log .error ( e .getMessage (), e );
262- // throw new RuntimeException( e.getCause() );
263- } catch ( Exception e ) {
264- log .error ( ExceptionUtils .getStackTrace ( e ), e );
197+ throw new OntologySearchJenaException ( e .getMessage (), queryString , e );
265198 }
266- } else if ( log .isDebugEnabled () ) log .debug ( "This search term not included in the results: " + r );
199+ } else {
200+ log .debug ( "This search term not included in the results: {}." , r );
201+ }
267202
268203 }
269204 return results ;
@@ -291,13 +226,7 @@ public static String stripInvalidCharacters( String toStrip ) {
291226 return StringEscapeUtils .escapeJava ( result ).trim ();
292227 }
293228
294- /**
295- * @param model
296- * @param index
297- * @param queryString
298- * @return
299- */
300- private static NodeIterator runSearch ( SearchIndex index , String queryString ) {
229+ private static NodeIterator runSearch ( SearchIndex index , String queryString ) throws OntologySearchJenaException {
301230 String strippedQuery = StringUtils .strip ( queryString );
302231
303232 if ( StringUtils .isBlank ( strippedQuery ) ) {
@@ -311,26 +240,20 @@ private static NodeIterator runSearch( SearchIndex index, String queryString ) {
311240 list .add ( m .group ( 1 ) );
312241 }
313242 String enhancedQuery = StringUtils .join ( list , " " + Operator .AND + " " );
314- /*
315- * Note that LARQ does not allow you to change the default operator without making it non-thread-safe.
316- */
317243
318- try {
319- StopWatch timer = new StopWatch ();
320- timer .start ();
321- index .getLuceneQueryParser ().setDefaultOperator ( Operator .AND );
322- NodeIterator iterator = index .searchModelByIndex ( enhancedQuery );
244+ // Note: LARQ does not allow you to change the default operator without making it non-thread-safe.
245+ index .getLuceneQueryParser ().setDefaultOperator ( Operator .AND );
323246
247+ StopWatch timer = StopWatch .createStarted ();
248+ try {
249+ return index .searchModelByIndex ( enhancedQuery );
250+ } catch ( JenaException e ) {
251+ throw new OntologySearchJenaException ( "Failed to search with enhanced query." , enhancedQuery , e );
252+ } finally {
253+ timer .stop ();
324254 if ( timer .getTime () > 100 ) {
325- log .info ( "Ontology resource search for: " + queryString + " (parsed to: " + enhancedQuery + ") : "
326- + timer .getTime () + "ms" );
255+ log .info ( "Ontology resource search for: {} (parsed to: {}) took {} ms." , queryString , enhancedQuery , timer .getTime () );
327256 }
328- return iterator ;
329- } catch ( ARQException e ) {
330- // index is closed?
331- log .error ( "Error while searching: " + e .getMessage (), e );
332- return null ;
333257 }
334-
335258 }
336259}
0 commit comments