A Little Goes a Long Way: Project 2 - Sending Data to the Index

The data thus collected had to be sent to our Solr installation.

/**
*
*/
package crrasolrindexer;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.*;

import org.apache.solr.client.solrj.*;
import org.apache.solr.client.solrj.impl.*;
import org.apache.solr.common.*;

/**
* @author slittle2
*
*   Indexer simply turns an IndexDatum into a URL and sends it to the
*   Solr server to be indexed. :-) Sounds simple, right?
*
*   Scratch that. Indexer *actually* uses Solrj to communicate with Solr.
*   No URLs to send, etc.
*
*/
public class Indexer {

    /**
    * @param args
    * @throws IOException
    * @throws SolrServerException
    */

    // Available for testing
    public static void main(String[] args) throws SolrServerException, IOException, MalformedURLException {

       boolean addNewData = false;

       // This is being tested on a multicore configuration
       String url = "http://localhost:8983/solr/core0/";
       SolrServer server = new CommonsHttpSolrServer(url);


       // Use the following code if you want to start with a clean sweep,
       // eliminating all contents of the index before proceeding.
       /* try {

           server.deleteByQuery("*:*");// Clean test up -- delete everything!
       } catch (SolrException e) {
           System.err.println("*** Delete failed ***");
           System.err.println("(probably passed the wrong URL)");
       }*/

       if (addNewData) {

           try {

               // Creates a faux index record to test sending data to the Indexer.
               // Note that this uses the IndexDatum, not the CRRA_Datum.
               SolrInputDocument doc1 = new SolrInputDocument();
               doc1.addField("key", "12345");
               doc1.addField("author", "Billy-bob Shakespeare");
               doc1.addField("title", "Henry 500");
               doc1.addField("date", "1950-?");
               doc1.addField("note", "Not a real entry.");
               doc1.addField("subject", "Hick classics.");
               doc1.addField("text", "Some text here.");
               doc1.addField("type", "MARC");

               Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
               docs.add(doc1);

               server.add(docs);

           } catch (SolrException e) {
               System.err.println("*** Add data failed ***");
               System.err
                       .println("(probably mismatch with field names in schema.xml)");
           }

       }

       server.commit(); // Always a good idea

    }

    // Uses the Index Datum class. It is recommended that you use indexCD (below)
    // instead.
    public static void indexID(LinkedHashSet<IndexDatum> setOfID, String urlSolr) throws SolrServerException, IOException{

       SolrServer server = new CommonsHttpSolrServer(urlSolr);
       Iterator<IndexDatum> iter = setOfID.iterator();
       IndexDatum singleRecord = null;

       SolrInputDocument doc1 = null;
       Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();

       System.out.println("Number records in Indexer: " + setOfID.size());

       while (iter.hasNext()) { // Add each IndexDatum to the Index

           singleRecord = (IndexDatum) iter.next();

           try {

               doc1 = new SolrInputDocument();
               doc1.addField("key", singleRecord.returnField("key"));
               doc1.addField("author", singleRecord.returnField("author"));
               doc1.addField("title", singleRecord.returnField("title"));
               doc1.addField("date", singleRecord.returnField("date"));
               doc1.addField("note", singleRecord.returnField("note"));
               doc1.addField("subject", singleRecord.returnField("subject"));
               doc1.addField("text", singleRecord.returnField("text"));
               doc1.addField("type", singleRecord.returnField("type"));

               docs.add(doc1);

           } catch (SolrException e) {
               System.err.println("*** Add data failed ***");
               System.err
                       .println("(probably mismatch with field names in schema.xml)");
           }

       }

       server.add(docs);

       server.commit();

    } // end indexID method


    // Uses the CRRA_Datum class. This is recommended.
    public static void indexCD(LinkedHashSet<CRRA_Datum> setOfID, String urlSolr) throws SolrServerException, IOException{

       SolrServer server = new CommonsHttpSolrServer(urlSolr);
       Iterator<CRRA_Datum> iter = setOfID.iterator();
       CRRA_Datum singleRecord = null;

       SolrInputDocument doc1 = null;
       Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();

       String schema_names = CRRA_EADRetriever.returnCurrentCD().returnSchemaNames();
       String[] schema_array = schema_names.split(" ");

       System.out.println("Number records in Indexer: " + setOfID.size());


       while (iter.hasNext()) { // Add each CRRA_Datum to the Index

           singleRecord = (CRRA_Datum) iter.next();

           try {

               doc1 = new SolrInputDocument();

               // Iterate through schema field names, passing them to singleRecord.
               // Then add the associated results to doc1.
               for (int i = 0; i < schema_array.length; i++){
                   String fieldName = schema_array[i];
                   doc1.addField(fieldName, singleRecord.returnField(fieldName));
               }

               docs.add(doc1);

           } catch (SolrException e) {
               System.err.println("*** Add data failed ***");
               System.err
                       .println("(probably mismatch with field names in schema.xml)");
           }

       }

       server.add(docs);

       server.commit();

    } // end indexCD method

    // Deletes a record or records as indicated by the
    // Solr-format query string. Example: "*:*" deletes everything.
    public static void deleteRecord(String query, String url){

       try {
           SolrServer server = new CommonsHttpSolrServer(url);
           server.deleteByQuery(query);
           server.commit(); // Always a good idea
       } catch (MalformedURLException e) {
           e.printStackTrace();
       } catch (SolrServerException e) {
           e.printStackTrace();
       } catch (IOException e) {
           e.printStackTrace();
       }
    }

}

A Little Goes a Long Way

Wednesday, July 21, 2010

Project 2 - Sending Data to the Index

No comments:

Post a Comment

Evolution

Followers