# HG changeset patch # User QuintillusCFC # Date 1549731039 18000 # Sat Feb 09 11:50:39 2019 -0500 # Node ID 79e9698e1b94776399e1af06bc9c6622e99ee298 # Parent eb6ea966790a4e027662aa0edd8ba61d0fd02c07 Loop through sub-directories and files, and handle (ASCII text) files properly. diff --git a/src/main/java/com/ajtjp/gopherarchiver/DocumentLine.java b/src/main/java/com/ajtjp/gopherarchiver/DocumentLine.java --- a/src/main/java/com/ajtjp/gopherarchiver/DocumentLine.java +++ b/src/main/java/com/ajtjp/gopherarchiver/DocumentLine.java @@ -6,14 +6,26 @@ * @author Andrew */ public class DocumentLine { - String type; //todo: Enum + public static char TYPE_INFORMATIONAL = 'i'; + public static char TYPE_FILE = '0'; + public static char TYPE_DIRECTORY = '1'; + public static char TYPE_CSO_PHONE_BOOK_SERVER = '2'; + public static char TYPE_ERROR = '3'; + public static char TYPE_BIN_HEXED_MAC_FILE = '4'; + public static char TYPE_DOS_BINARY = '5'; + public static char TYPE_UNIX_UUENCODED_FILE = '6'; + public static char TYPE_INDEX_SEARCH_SERVER = '7'; + public static char TYPE_TEXT_BASED_TELNET_SESSION = '8'; + public static char TYPE_BINARY_FILE = '9'; + + char type; //todo: Enum String displayText; String selector; String host; int port; public DocumentLine(String typeAndText, String selector, String host, String port) { - type = typeAndText.substring(0, 1); + type = typeAndText.charAt(0); displayText = typeAndText.substring(1); this.selector = selector; this.host = host; @@ -26,7 +38,18 @@ } private DocumentLine(String type) { - this.type = type; + this.type = type.charAt(0); + } + + /** + * toString on this method returns the literal raw syntax + * of this Gopher line. This makes it ideal for exporting + * the contents to file. + * @return + */ + @Override + public String toString() { + return type + displayText + '\t' + selector + '\t' + host + '\t' + port + '\r' + '\n'; } public static final DocumentLine TERMINATOR = new DocumentLine("."); diff --git a/src/main/java/com/ajtjp/gopherarchiver/Gopher.java b/src/main/java/com/ajtjp/gopherarchiver/Gopher.java --- a/src/main/java/com/ajtjp/gopherarchiver/Gopher.java +++ b/src/main/java/com/ajtjp/gopherarchiver/Gopher.java @@ -1,6 +1,8 @@ package com.ajtjp.gopherarchiver; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -17,24 +19,81 @@ static int bytesRead = 0; - public static void main(String[]args) { + public static void main(String[]args) throws Exception { try { new Gopher(); } catch(Exception ex) { System.err.println("Ex: " + ex.getMessage()); + throw ex; } } public Gopher() throws IOException, InterruptedException { + String outputDir = "C:\\GopherTest"; + burrow(ReferenceURLs.indianRecipes, outputDir); + } + + public void burrow(GopherURL gopherHole, String outputDir) throws IOException, InterruptedException { //TODO: It probably is a good idea to parse content as we go. //That would be a good why of detecting the terminator; trade-off is it //might make it slightly harder to archive the raw content. //Postponing that decision till later. - String pageContents = downloadPageContents(ReferenceURLs.floodgap); + String pageContents = downloadPageContents(gopherHole); List parsedContents = new ArrayList<>(); parseContents(pageContents, parsedContents); System.out.println("Read total of " + bytesRead + " bytes"); + + File archiveFile = new File(outputDir + gopherHole.selector + ".gar"); + File parent = new File(archiveFile.getParent()); + parent.mkdirs(); + archiveFile.createNewFile(); + + try { + FileWriter fw = new FileWriter(archiveFile); + for (DocumentLine line : parsedContents) { + fw.write(line.toString()); + } + fw.close(); + } + catch(IOException ex) { + + } + + //TODO: Loop. Loop on items that have the same host, and + //whose selector starts with the current selector, and is not identical to the current selector. + for (DocumentLine line : parsedContents) { + if (!line.host.equals(gopherHole.host)) { + continue; + } + //Only burrow deeper on the same site. + if (line.selector.startsWith(gopherHole.selector) && line.selector.length() > gopherHole.selector.length()) { + if (line.type == DocumentLine.TYPE_DIRECTORY) { + GopherURL newURL = new GopherURL(line.host, line.port, line.selector); + burrow(newURL, outputDir); + } + else if (line.type == DocumentLine.TYPE_FILE) { + archiveFile(new GopherURL(line.host, line.port, line.selector), outputDir); + } + } + } + } + + private void archiveFile(GopherURL gopherPage, String outputDir) throws IOException, InterruptedException { + String pageContents = downloadPageContents(gopherPage); + File archiveFile = new File(outputDir + gopherPage.selector); + File parent = new File(archiveFile.getParent()); + parent.mkdirs(); + archiveFile.createNewFile(); + + try { + FileWriter fw = new FileWriter(archiveFile); + fw.write(pageContents); + fw.close(); + } + catch(IOException ex) { + + } } private String downloadPageContents(GopherURL url) throws IOException, InterruptedException { @@ -94,6 +153,9 @@ if (segments[0].equals(".")) { documentLines.add(DocumentLine.TERMINATOR); } + else { + documentLines.add(new DocumentLine("i" + line, "", "", "")); + } } } System.out.println("Lines"); @@ -103,6 +165,11 @@ //line by itself, and closes the connection. //TODO: Properly implement detection of the trailing bytes, rather than simply //waiting up to a second for the end. + //NB: Around 5 AM, realized that parsing as DocumentLines + //as we go is perfect for detecting termination. It already + //figures out DocumentLine.TERMINATOR, so once that arrives, + //we know we are done; otherwise we know we aren't done. + //Thus the loop can be much simpler. final static byte[] terminatingBytes = new byte[5]; static {