The class just posted required an enhanced data class to model the VuFind records. Here it is!
/**
*
*/
package crrasolrindexer;
import java.io.*;
import java.util.Iterator;
import java.util.LinkedHashSet;
/**
* @author slittle2
*
* The CRRA_Datum class is just like the IndeDatum class, but modified to work
* with the CRRA schema (or any other). It holds the kinds of data that are to be extracted
* from an indexed field, whether MARC or EAD (or anything else). All fields
* are private; only a protected method allows setting them, and a public
* method allows retrieving their contents. (The setField() method is
* protected so that one has to change the data through the appropriate
* class that indexes the data.
*
* This class is designed to be easily extensible for use with other kinds
* of data. UNLIKE IndexDatum, it may be passed as string of schema names from
* which it builds a new schema for its entries.
*
* Note that the schema names used here are independent of the schema *map* that
* the CRRA_EADRetriever class uses to map from EAD to VuFind. If inconsistences
* occur between the schema here and the schema_map (or between these and the actual
* VuFind schema), unpredictable behavior may result.
*
*/
public class CRRA_Datum {
private class Entry {
String name;
String content;
Entry() {
name = "";
content = "";
}
Entry(String n, String c){
name = n;
content = c;
}
}
// The default schema is that used in Vufind as of this coding (June 2010).
private String schema_names = "id fullrecord allfields institution collection building language format author author-letter authorStr auth_author auth_authorStr title title_sort title_sub title_short title_full title_fullStr title_auth physical publisher publisherStr publishDate edition description contents url thumbnail lccn ctrlnum isbn issn callnumber callnumber-a callnumber-first callnumber-first-code callnumber-subject callnumber-subject-code callnumber-label dewey-hundreds dewey-tens dewey-ones dewey-full dewey-sort author2 author2Str author2-role auth_author2 auth_author2Str author_additional author_additionalStr title_alt title_old title_new dateSpan series series2 topic genre geographic illustrated recordtype";
private LinkedHashSet<Entry> entries = null;
// Pass a string containing scheme file names separated by a ' '.
public CRRA_Datum(String schema_names) {
entries = new LinkedHashSet<Entry>();
String[] schema = schema_names.split(" ");
for(int i = 0; i < schema.length; i++){
entries.add(new Entry(schema[i], ""));
}
}
// Default contructor uses current (2010) Vufind schema names
public CRRA_Datum() {
entries = new LinkedHashSet<Entry>();
String[] schema = schema_names.split(" ");
for(int i = 0; i < schema.length; i++){
entries.add(new Entry(schema[i], ""));
}
}
// Return the names of the schema fields as a single string. This can then be parsed/tokenized as needed.
public String returnSchemaNames(){
return schema_names;
}
// Return a given field's value.
public String returnField(String fieldName) throws IOException {
Iterator<Entry> iter = entries.iterator();
while(iter.hasNext()){
Entry entry = (Entry) iter.next();
if(entry.name.equalsIgnoreCase(fieldName)){
return entry.content;
}
}
throw new IOException();
}
// Set the value of a given field. Completely overwrites the original.
protected void setField(String fieldName, String data) throws IOException {
Iterator<Entry> iter = entries.iterator();
while(iter.hasNext()){
Entry entry = (Entry) iter.next();
if(entry.name.equalsIgnoreCase(fieldName)){
entry.content = data;
return;
}
}
throw new IOException();
}
// Adds data to a field without overwriting it.
protected void concatenateField(String fieldName, String data) throws IOException {
Iterator<Entry> iter = entries.iterator();
while(iter.hasNext()){
Entry entry = (Entry) iter.next();
if(entry.name.equalsIgnoreCase(fieldName)){
entry.content += data;
return;
}
}
throw new IOException();
}
// Displays the fields/values of the entire Datum.
public String toString(){
String contents = "";
Iterator<Entry> iter = entries.iterator();
while(iter.hasNext()){
Entry entry = (Entry) iter.next();
contents += "\n " + entry.name + "\t\t"+ entry.content;
}
return contents;
}
}
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment