Once the Internet Archive's documents were mirrored locally, I had to add the local and IA URLs to the MARC records. In practice, since I was using a file and not directly accessing the MARC database, I saved the revised records to a new file, which could then be added to the database.
/**
*
*/
package iasearcher;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import org.marc4j.MarcReader;
import org.marc4j.MarcStreamReader;
import org.marc4j.MarcStreamWriter;
import org.marc4j.MarcWriter;
import org.marc4j.marc.DataField;
import org.marc4j.marc.MarcFactory;
import org.marc4j.marc.Record;
import org.marc4j.marc.Subfield;
/**
* @author slittle2
*
* Once files have been retrieved from the Internet Archive,
* UpdateMarc updates the MARC records with two things:
*
* - the URL of the IA directory
* - the URL of the local copy
*
* Each is saved into a new 856 subfield U
*
*/
public class UpdateMarc {
/**
* @param args
* @throws IOException
*/
// Here for testing purposes
public static void main(String[] args) throws IOException {
// Values for test run - may be changed as needed
String marcFile = "C:/Documents and Settings/slittle2/Desktop/updated.marc";
String tempFile = "C:/Documents and Settings/slittle2/Desktop/temp.marc";
String oneHitLog = "C:/Documents and Settings/slittle2/workspace/MarcRetriever/Success Files 5-26/oneResult.txt";
updater(marcFile, oneHitLog, tempFile);
}
public static void updater(String marcFile, String oneHitLog, String tempFile)
throws IOException {
LinkedHashSet<KeyDatum> keyData = searchKATIL(oneHitLog);
boolean append = true;
// Find and update the appropriate MARC record:
// Open MARC database
InputStream in = null;
OutputStream out = null;
try {
in = new FileInputStream(marcFile);
out = new FileOutputStream(tempFile, append);
MarcReader reader = new MarcStreamReader(in);
MarcWriter writer = new MarcStreamWriter(out);
// While iterator.hasNext(), search the MARC records for all
// matching author/title
while (reader.hasNext()) {
Record record = reader.next();
String author = "";
String title = "";
// Create iterator over keyData
Iterator<KeyDatum> iter = keyData.iterator();
// Match current record author/title against entire keyData list
author = getFullAuthor(record);
title = getTitle(record);
while(iter.hasNext()){
KeyDatum datum = (KeyDatum) iter.next();
// If found:
// Add 856$U w/ $Z "Original Location" & IA URL
// Add 856$U w/ $Z "Local Mirror" & local URL
if(author.equalsIgnoreCase(datum.author) & title.equalsIgnoreCase(datum.title)){
System.out.println("It matches!\t" + record);
// add a data field for IA URL
MarcFactory factory = MarcFactory.newInstance();
DataField df = factory.newDataField("856", '0', '4');
df.addSubfield(factory.newSubfield('u', datum.iaURL));
df.addSubfield(factory.newSubfield('z', "ORIGINAL LOCATION"));
record.addVariableField(df);
// add another data field for local URL
DataField dq = factory.newDataField("856", '0', '4');
dq.addSubfield(factory.newSubfield('u', datum.localURL));
dq.addSubfield(factory.newSubfield('z', "LOCAL MIRROR"));
record.addVariableField(dq);
writer.write(record);
System.out.println("Updated Record:\t" + record);
break;
}
} // end while
} // end while
writer.close();
} finally {
// Close input/output streams
if (out != null)
out.close();
if (in != null)
in.close();
}
}
private static String getTitle(Record record) {
// get data field 245
DataField field = (DataField) record.getVariableField("245");
Subfield subfield;
String title = "";
try {
// get the title proper
subfield = field.getSubfield('a');
title = subfield.getData();
} catch (NullPointerException npe) {
title = " ";
}
return title;
}
private static String getFullAuthor(Record record) {
String author1 = "";
String author2 = "";
String author3 = "";
// get data field 100
DataField field = (DataField) record
.getVariableField("100");
// get the author proper, part 1
Subfield subfield;
try {
subfield = field.getSubfield('a');
author1 = subfield.getData();
} catch (NullPointerException npe) {
author1 = " ";
}
// get the author proper, part 2
try {
subfield = field.getSubfield('b');
author2 = subfield.getData();
} catch (NullPointerException npe) {
author2 = " ";
}
// get the author proper, part 3
try {
subfield = field.getSubfield('c');
author3 = subfield.getData();
} catch (NullPointerException npe) {
author3 = " ";
}
return author1 + author2 + author3;
}
// Gets the Key, Author, Title, and IA & Local URL
private static LinkedHashSet<KeyDatum> searchKATIL(String oneHitLog) throws IOException {
LinkedHashSet<KeyDatum> kati = new LinkedHashSet<KeyDatum>();
LinkedHashSet<KeyDatum> previous = new LinkedHashSet<KeyDatum>();
// Open file
BufferedReader inFile = null; // create a new stream to open a file
BufferedReader inFile2 = null;
BufferedWriter outFile = null;
final String addressRoot = "http://www.archive.org/download/";
final String localRoot = "http://zoia.library.nd.edu//sandbox/books";
final String outFileLocation = "C:/Documents and Settings/slittle2/Desktop/outFile.txt";
try {
inFile = new BufferedReader((Reader) new FileReader(oneHitLog));
inFile2 = new BufferedReader ((Reader) new FileReader(outFileLocation));
String data = " ";
String data2 = " ";
boolean old = true; // This is true because all the results should be stored in a local file now.
// Load previous results into memory
while((data2 = inFile2.readLine()) != null) {
String[] splitData2 = data2.split("\t");
previous.add(new KeyDatum(splitData2[0],splitData2[1],splitData2[2],splitData2[3],splitData2[4]));
}
inFile2.close();
outFile = new BufferedWriter((Writer) new FileWriter(outFileLocation, true));
// Retrieve URLs from file & send to Internet Archive
while ((data = inFile.readLine()) != null) {
// Extract keys
String[] splitData = data.split("\t");
// Load each Key, Author, Title into a KeyDatum; leave other two
// blank
KeyDatum keyDatum = new KeyDatum(splitData[2], splitData[0],
splitData[1], "", "");
// Check and see if already in previous results
Iterator<KeyDatum> iter = previous.iterator();
while (iter.hasNext()) {
KeyDatum next = iter.next();
if (keyDatum.compareQuick(next)) {
old = true;
kati.add(next);
break;
}
}
if (!old) {
// Generate IA URL
keyDatum.iaURL = addressRoot + keyDatum.key + "/";
// Generate local URL
data = (keyDatum.iaURL).toString();
data = redirectAndTrim(data);
keyDatum.localURL = data.replaceFirst("http:/", localRoot);
outFile.append(keyDatum.toString("\t"));
// Adds the new KeyDatum to the LHS
kati.add(keyDatum);
System.out.println(keyDatum.toString("\t"));
}
}
} catch (MalformedURLException e) {
System.err.println("*** Malformed URL Exception ***");
} catch (FileNotFoundException e) {
System.err.println("*** File not found! ***");
e.printStackTrace();
} catch (IOException e) {
System.err.println("*** IO Exception ***");
e.getStackTrace();
} finally {
if (inFile != null)
inFile.close();
if (outFile != null)
outFile.close();
}
return kati;
}
// TODO Can't I just use the one in IASearcher?
protected static String redirectAndTrim(String key) throws IOException {
// Retrieve the redirected URL from IA
URI uri = null;
URL url = null;
InputStream inURI = null;
String newURL = "";
try {
// Open connection to IA
uri = new URI(key);
url = uri.toURL();
URLConnection yc = url.openConnection();
HttpURLConnection h = (HttpURLConnection) yc;
HttpURLConnection.setFollowRedirects(true);
h.getInputStream(); // Necessary to force redirect!
newURL = h.getURL().toString();
return newURL;
// Catching errors
} catch (URISyntaxException e) {
System.err.println("*** URI Syntax Exception ***");
e.printStackTrace();
} catch (MalformedURLException e) {
System.err.println("*** Malformed URL Exception ***");
e.printStackTrace();
} catch (FileNotFoundException e) {
System.err.println("*** File not found! ***");
e.printStackTrace();
} catch (IOException e) {
System.err.println("*** IO Exception ***");
e.getStackTrace();
} finally {
if (inURI != null)
inURI.close();
}
return null;
}
}
// Class for handling the various kinds of data
// Each key maps to 1 each of: author, title, IA URL, & local URL
class KeyDatum {
protected String key;
protected String author;
protected String title;
protected String iaURL;
protected String localURL;
KeyDatum() {
key = "";
author = "";
title = "";
iaURL = "";
localURL = "";
}
KeyDatum(String k, String a, String t, String i, String l) {
key = k;
author = a;
title = t;
iaURL = i;
localURL = l;
}
// Returns all fields as a string separated by the passed string (e.g. \n or \t
public String toString(String c){
return new String(key + c + author + c + title + c + iaURL + c + localURL + c + "\n");
}
public boolean compare(KeyDatum datum){
if(this.key.equalsIgnoreCase(datum.key) &
this.author.equalsIgnoreCase(datum.author) &
this.title.equalsIgnoreCase(datum.title) &
this.iaURL.equalsIgnoreCase(datum.iaURL) &
this.localURL.equalsIgnoreCase(datum.localURL))
return true;
return false;
}
public boolean compareQuick(KeyDatum datum) {
if(this.key.equalsIgnoreCase(datum.key))
return true;
return false;
}
}
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment