The data thus collected had to be sent to our Solr installation.
/**
*
*/
package crrasolrindexer;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.*;
import org.apache.solr.client.solrj.*;
import org.apache.solr.client.solrj.impl.*;
import org.apache.solr.common.*;
/**
* @author slittle2
*
* Indexer simply turns an IndexDatum into a URL and sends it to the
* Solr server to be indexed. :-) Sounds simple, right?
*
* Scratch that. Indexer *actually* uses Solrj to communicate with Solr.
* No URLs to send, etc.
*
*/
public class Indexer {
/**
* @param args
* @throws IOException
* @throws SolrServerException
*/
// Available for testing
public static void main(String[] args) throws SolrServerException, IOException, MalformedURLException {
boolean addNewData = false;
// This is being tested on a multicore configuration
String url = "http://localhost:8983/solr/core0/";
SolrServer server = new CommonsHttpSolrServer(url);
// Use the following code if you want to start with a clean sweep,
// eliminating all contents of the index before proceeding.
/* try {
server.deleteByQuery("*:*");// Clean test up -- delete everything!
} catch (SolrException e) {
System.err.println("*** Delete failed ***");
System.err.println("(probably passed the wrong URL)");
}*/
if (addNewData) {
try {
// Creates a faux index record to test sending data to the Indexer.
// Note that this uses the IndexDatum, not the CRRA_Datum.
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("key", "12345");
doc1.addField("author", "Billy-bob Shakespeare");
doc1.addField("title", "Henry 500");
doc1.addField("date", "1950-?");
doc1.addField("note", "Not a real entry.");
doc1.addField("subject", "Hick classics.");
doc1.addField("text", "Some text here.");
doc1.addField("type", "MARC");
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
docs.add(doc1);
server.add(docs);
} catch (SolrException e) {
System.err.println("*** Add data failed ***");
System.err
.println("(probably mismatch with field names in schema.xml)");
}
}
server.commit(); // Always a good idea
}
// Uses the Index Datum class. It is recommended that you use indexCD (below)
// instead.
public static void indexID(LinkedHashSet<IndexDatum> setOfID, String urlSolr) throws SolrServerException, IOException{
SolrServer server = new CommonsHttpSolrServer(urlSolr);
Iterator<IndexDatum> iter = setOfID.iterator();
IndexDatum singleRecord = null;
SolrInputDocument doc1 = null;
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
System.out.println("Number records in Indexer: " + setOfID.size());
while (iter.hasNext()) { // Add each IndexDatum to the Index
singleRecord = (IndexDatum) iter.next();
try {
doc1 = new SolrInputDocument();
doc1.addField("key", singleRecord.returnField("key"));
doc1.addField("author", singleRecord.returnField("author"));
doc1.addField("title", singleRecord.returnField("title"));
doc1.addField("date", singleRecord.returnField("date"));
doc1.addField("note", singleRecord.returnField("note"));
doc1.addField("subject", singleRecord.returnField("subject"));
doc1.addField("text", singleRecord.returnField("text"));
doc1.addField("type", singleRecord.returnField("type"));
docs.add(doc1);
} catch (SolrException e) {
System.err.println("*** Add data failed ***");
System.err
.println("(probably mismatch with field names in schema.xml)");
}
}
server.add(docs);
server.commit();
} // end indexID method
// Uses the CRRA_Datum class. This is recommended.
public static void indexCD(LinkedHashSet<CRRA_Datum> setOfID, String urlSolr) throws SolrServerException, IOException{
SolrServer server = new CommonsHttpSolrServer(urlSolr);
Iterator<CRRA_Datum> iter = setOfID.iterator();
CRRA_Datum singleRecord = null;
SolrInputDocument doc1 = null;
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
String schema_names = CRRA_EADRetriever.returnCurrentCD().returnSchemaNames();
String[] schema_array = schema_names.split(" ");
System.out.println("Number records in Indexer: " + setOfID.size());
while (iter.hasNext()) { // Add each CRRA_Datum to the Index
singleRecord = (CRRA_Datum) iter.next();
try {
doc1 = new SolrInputDocument();
// Iterate through schema field names, passing them to singleRecord.
// Then add the associated results to doc1.
for (int i = 0; i < schema_array.length; i++){
String fieldName = schema_array[i];
doc1.addField(fieldName, singleRecord.returnField(fieldName));
}
docs.add(doc1);
} catch (SolrException e) {
System.err.println("*** Add data failed ***");
System.err
.println("(probably mismatch with field names in schema.xml)");
}
}
server.add(docs);
server.commit();
} // end indexCD method
// Deletes a record or records as indicated by the
// Solr-format query string. Example: "*:*" deletes everything.
public static void deleteRecord(String query, String url){
try {
SolrServer server = new CommonsHttpSolrServer(url);
server.deleteByQuery(query);
server.commit(); // Always a good idea
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment