This is largely the same as in Project 1, though cleaned up a bit.
/**
*
*/
package crrasolrindexer;
import java.io.*;
import java.util.*;
import org.apache.solr.client.solrj.SolrServerException;
import org.marc4j.marc.*;
import org.marc4j.*;
/**
* @author slittle2
*
* MarcDataRetriever retrieves data from a MARC file and stores it in a
* LinkedHashSet of IndexDatum objects.
*
* For ease of use, one could modify the code to use CRRA_Datum instead.
*
*/
public class MarcDataRetriever {
/**
* @param args
*/
// Default file name for retrieving MARC record. Used for test purposes.
private static String marcFile = "C:/Documents and Settings/slittle2/Desktop/Index Data/crra.marc";
// main() routine for testing.
public static void main(String[] args) throws IOException, SolrServerException {
LinkedHashSet<IndexDatum> recordSet = new LinkedHashSet<IndexDatum>();
IndexDatum singleRecord = new IndexDatum();
recordSet = getMarcData(marcFile);
Iterator<IndexDatum> iter = recordSet.iterator();
while (iter.hasNext()) {
singleRecord = (IndexDatum) iter.next();
System.out.println(singleRecord.returnField("title"));
System.out.println(singleRecord.returnField("date"));
System.out.println(singleRecord.returnField("author"));
System.out.println(singleRecord.returnField("key"));
System.out.println(singleRecord.returnField("subject"));
System.out.println(singleRecord.returnField("note"));
System.out.println(singleRecord.returnField("type"));
System.out.println(singleRecord.returnField("text"));
}
Indexer.indexID(recordSet, "http://localhost:8983/solr/core0/");
System.out.println("Successfully indexed... we hope.");
}
// Returns a single IndexDatum object with data extracted from MARC record
private static IndexDatum extractMarcRecord(Record record) {
IndexDatum datum = new IndexDatum();
String input = null;
try {
// Extract title
input = getMarcData(record, "245", 'a');
input += getMarcData(record, "245", 'b');
input += getMarcData(record, "245", 'c');
input += getMarcData(record, "245", 'n');
input += getMarcData(record, "245", 'p');
datum.setField("title", input);
// Extract date
input = getMarcData(record, "260", 'c');
datum.setField("date", input);
// Extract notes
input = "";
for (int i = 500; i < 600; i++) {
String str = (new Integer(i)).toString();
input += getMarcData(record, str, 'a');
}
datum.setField("note", input);
// Extract key
input = getMarcData(record, "001");
datum.setField("key", input);
// Extract author
input = getMarcData(record, "100", 'a');
input += getMarcData(record, "100", 'b');
input += getMarcData(record, "100", 'c');
input += getMarcData(record, "110", 'a');
input += getMarcData(record, "111", 'a');
datum.setField("author", input);
// Extract subjects
input = "";
for (int i = 600; i < 700; i++) {
String str = (new Integer(i)).toString();
input += getMarcData(record, str, 'a');
}
datum.setField("subject", input);
// Set remaining fields: text, type
datum.setField("text", "");
datum.setField("type", "MARC");
} catch (IOException e) {
System.err.println("*** IO Exception while setting IndexDatum ***");
e.getStackTrace();
}
return datum;
}
// To retrieve data from a DataField, which does have subfields
private static String getMarcData(Record record, String fieldIndex,
char subfieldIndex) {
String newStringDatum = "";
DataField field = (DataField) record.getVariableField(fieldIndex);
Subfield subfield;
try {
subfield = field.getSubfield(subfieldIndex);
newStringDatum = subfield.getData();
} catch (NullPointerException npe) {
newStringDatum = " ";
}
return newStringDatum;
}
// To retrieve data from a ControlField, which has no subfields
private static String getMarcData(Record record, String fieldIndex) {
String newStringDatum = "";
ControlField field = (ControlField) record.getVariableField(fieldIndex);
try {
newStringDatum = field.getData();
} catch (NullPointerException npe) {
newStringDatum = " ";
}
return newStringDatum;
}
// Opens a file of MARC records, creates an iterator over it,
// reads in the appropriate data, and saves it to an IndexDatum
// which is then added to the LinkedHashSet
public static LinkedHashSet<IndexDatum> getMarcData(String fileName)
throws IOException {
// LinkedHashSet for storing IndexDatum object
LinkedHashSet<IndexDatum> MARCRecords = new LinkedHashSet<IndexDatum>();
InputStream in = null;
// Open the file & create the iterator
try {
in = new FileInputStream(fileName);
MarcReader reader = new MarcStreamReader(in);
// As long as there are unread records, read the data
while (reader.hasNext()) {
Record record = reader.next();
IndexDatum datum = null;
// Save the data to an IndexDatum
datum = extractMarcRecord(record);
// Add the IndexDatum to the Set
MARCRecords.add(datum);
} // end while
} catch (FileNotFoundException e) {
System.err.println("*** File Not Found ***");
} finally {
// Close input/output streams
if (in != null)
in.close();
}
// Return the LinkedHashSet
return MARCRecords;
}
}
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment