@@ 6,14 6,26 @@ package com.ajtjp.gopherarchiver;
* @author Andrew
*/
public class DocumentLine {
- String type; //todo: Enum
+ public static char TYPE_INFORMATIONAL = 'i';
+ public static char TYPE_FILE = '0';
+ public static char TYPE_DIRECTORY = '1';
+ public static char TYPE_CSO_PHONE_BOOK_SERVER = '2';
+ public static char TYPE_ERROR = '3';
+ public static char TYPE_BIN_HEXED_MAC_FILE = '4';
+ public static char TYPE_DOS_BINARY = '5';
+ public static char TYPE_UNIX_UUENCODED_FILE = '6';
+ public static char TYPE_INDEX_SEARCH_SERVER = '7';
+ public static char TYPE_TEXT_BASED_TELNET_SESSION = '8';
+ public static char TYPE_BINARY_FILE = '9';
+
+ char type; //todo: Enum
String displayText;
String selector;
String host;
int port;
public DocumentLine(String typeAndText, String selector, String host, String port) {
- type = typeAndText.substring(0, 1);
+ type = typeAndText.charAt(0);
displayText = typeAndText.substring(1);
this.selector = selector;
this.host = host;
@@ 26,7 38,18 @@ public class DocumentLine {
}
private DocumentLine(String type) {
- this.type = type;
+ this.type = type.charAt(0);
+ }
+
+ /**
+ * toString on this method returns the literal raw syntax
+ * of this Gopher line. This makes it ideal for exporting
+ * the contents to file.
+ * @return
+ */
+ @Override
+ public String toString() {
+ return type + displayText + '\t' + selector + '\t' + host + '\t' + port + '\r' + '\n';
}
public static final DocumentLine TERMINATOR = new DocumentLine(".");
@@ 1,6 1,8 @@
package com.ajtjp.gopherarchiver;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ 17,24 19,81 @@ public class Gopher {
static int bytesRead = 0;
- public static void main(String[]args) {
+ public static void main(String[]args) throws Exception {
try {
new Gopher();
}
catch(Exception ex) {
System.err.println("Ex: " + ex.getMessage());
+ throw ex;
}
}
public Gopher() throws IOException, InterruptedException {
+ String outputDir = "C:\\GopherTest";
+ burrow(ReferenceURLs.indianRecipes, outputDir);
+ }
+
+ public void burrow(GopherURL gopherHole, String outputDir) throws IOException, InterruptedException {
//TODO: It probably is a good idea to parse content as we go.
//That would be a good why of detecting the terminator; trade-off is it
//might make it slightly harder to archive the raw content.
//Postponing that decision till later.
- String pageContents = downloadPageContents(ReferenceURLs.floodgap);
+ String pageContents = downloadPageContents(gopherHole);
List<DocumentLine> parsedContents = new ArrayList<>();
parseContents(pageContents, parsedContents);
System.out.println("Read total of " + bytesRead + " bytes");
+
+ File archiveFile = new File(outputDir + gopherHole.selector + ".gar");
+ File parent = new File(archiveFile.getParent());
+ parent.mkdirs();
+ archiveFile.createNewFile();
+
+ try {
+ FileWriter fw = new FileWriter(archiveFile);
+ for (DocumentLine line : parsedContents) {
+ fw.write(line.toString());
+ }
+ fw.close();
+ }
+ catch(IOException ex) {
+
+ }
+
+ //TODO: Loop. Loop on items that have the same host, and
+ //whose selector starts with the current selector, and is not identical to the current selector.
+ for (DocumentLine line : parsedContents) {
+ if (!line.host.equals(gopherHole.host)) {
+ continue;
+ }
+ //Only burrow deeper on the same site.
+ if (line.selector.startsWith(gopherHole.selector) && line.selector.length() > gopherHole.selector.length()) {
+ if (line.type == DocumentLine.TYPE_DIRECTORY) {
+ GopherURL newURL = new GopherURL(line.host, line.port, line.selector);
+ burrow(newURL, outputDir);
+ }
+ else if (line.type == DocumentLine.TYPE_FILE) {
+ archiveFile(new GopherURL(line.host, line.port, line.selector), outputDir);
+ }
+ }
+ }
+ }
+
+ private void archiveFile(GopherURL gopherPage, String outputDir) throws IOException, InterruptedException {
+ String pageContents = downloadPageContents(gopherPage);
+ File archiveFile = new File(outputDir + gopherPage.selector);
+ File parent = new File(archiveFile.getParent());
+ parent.mkdirs();
+ archiveFile.createNewFile();
+
+ try {
+ FileWriter fw = new FileWriter(archiveFile);
+ fw.write(pageContents);
+ fw.close();
+ }
+ catch(IOException ex) {
+
+ }
}
private String downloadPageContents(GopherURL url) throws IOException, InterruptedException {
@@ 94,6 153,9 @@ wait: for (;;) {
if (segments[0].equals(".")) {
documentLines.add(DocumentLine.TERMINATOR);
}
+ else {
+ documentLines.add(new DocumentLine("i" + line, "", "", ""));
+ }
}
}
System.out.println("Lines");
@@ 103,6 165,11 @@ wait: for (;;) {
//line by itself, and closes the connection.
//TODO: Properly implement detection of the trailing bytes, rather than simply
//waiting up to a second for the end.
+ //NB: Around 5 AM, realized that parsing as DocumentLines
+ //as we go is perfect for detecting termination. It already
+ //figures out DocumentLine.TERMINATOR, so once that arrives,
+ //we know we are done; otherwise we know we aren't done.
+ //Thus the loop can be much simpler.
final static byte[] terminatingBytes = new byte[5];
static {