79e9698e1b94 — QuintillusCFC 5 years ago
Loop through sub-directories and files, and handle (ASCII text) files properly.
M src/main/java/com/ajtjp/gopherarchiver/DocumentLine.java +26 -3
@@ 6,14 6,26 @@ package com.ajtjp.gopherarchiver;
  * @author Andrew
  */
 public class DocumentLine {
-    String type;    //todo: Enum
+    public static char TYPE_INFORMATIONAL = 'i';
+    public static char TYPE_FILE = '0';
+    public static char TYPE_DIRECTORY = '1';
+    public static char TYPE_CSO_PHONE_BOOK_SERVER = '2';
+    public static char TYPE_ERROR = '3';
+    public static char TYPE_BIN_HEXED_MAC_FILE = '4';
+    public static char TYPE_DOS_BINARY = '5';
+    public static char TYPE_UNIX_UUENCODED_FILE = '6';
+    public static char TYPE_INDEX_SEARCH_SERVER = '7';
+    public static char TYPE_TEXT_BASED_TELNET_SESSION = '8';
+    public static char TYPE_BINARY_FILE = '9';
+    
+    char type;    //todo: Enum
     String displayText;
     String selector;
     String host;
     int port;
     
     public DocumentLine(String typeAndText, String selector, String host, String port) {
-        type = typeAndText.substring(0, 1);
+        type = typeAndText.charAt(0);
         displayText = typeAndText.substring(1);
         this.selector = selector;
         this.host = host;

          
@@ 26,7 38,18 @@ public class DocumentLine {
     }
     
     private DocumentLine(String type) {
-        this.type = type;
+        this.type = type.charAt(0);
+    }
+    
+    /**
+     * toString on this method returns the literal raw syntax
+     * of this Gopher line.  This makes it ideal for exporting
+     * the contents to file.
+     * @return 
+     */
+    @Override
+    public String toString() {
+        return type + displayText + '\t' + selector + '\t' + host + '\t' + port + '\r' + '\n';
     }
     
     public static final DocumentLine TERMINATOR = new DocumentLine(".");

          
M src/main/java/com/ajtjp/gopherarchiver/Gopher.java +69 -2
@@ 1,6 1,8 @@ 
 
 package com.ajtjp.gopherarchiver;
 
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;

          
@@ 17,24 19,81 @@ public class Gopher {
     
     static int bytesRead = 0;
     
-    public static void main(String[]args) {
+    public static void main(String[]args) throws Exception {
         try {
             new Gopher();
         }
         catch(Exception ex) {
             System.err.println("Ex: " + ex.getMessage());
+            throw ex;
         }
     }
     
     public Gopher() throws IOException, InterruptedException {
+        String outputDir = "C:\\GopherTest";
+        burrow(ReferenceURLs.indianRecipes, outputDir);
+    }
+    
+    public void burrow(GopherURL gopherHole, String outputDir) throws IOException, InterruptedException {
         //TODO: It probably is a good idea to parse content as we go.
         //That would be a good why of detecting the terminator; trade-off is it
         //might make it slightly harder to archive the raw content.
         //Postponing that decision till later.
-        String pageContents = downloadPageContents(ReferenceURLs.floodgap);
+        String pageContents = downloadPageContents(gopherHole);
         List<DocumentLine> parsedContents = new ArrayList<>();
         parseContents(pageContents, parsedContents);
         System.out.println("Read total of " + bytesRead + " bytes");
+        
+        File archiveFile = new File(outputDir + gopherHole.selector + ".gar");
+        File parent = new File(archiveFile.getParent());
+        parent.mkdirs();
+        archiveFile.createNewFile();
+        
+        try {
+            FileWriter fw = new FileWriter(archiveFile);
+            for (DocumentLine line : parsedContents) {
+                fw.write(line.toString());
+            }
+            fw.close();
+        }
+        catch(IOException ex) {
+            
+        }
+        
+        //TODO: Loop.  Loop on items that have the same host, and
+        //whose selector starts with the current selector, and is not identical to the current selector.
+        for (DocumentLine line : parsedContents) {
+            if (!line.host.equals(gopherHole.host)) {
+                continue;
+            }
+            //Only burrow deeper on the same site.
+            if (line.selector.startsWith(gopherHole.selector) && line.selector.length() > gopherHole.selector.length()) {
+                if (line.type == DocumentLine.TYPE_DIRECTORY) {
+                    GopherURL newURL = new GopherURL(line.host, line.port, line.selector);
+                    burrow(newURL, outputDir);
+                }
+                else if (line.type == DocumentLine.TYPE_FILE) {
+                    archiveFile(new GopherURL(line.host, line.port, line.selector), outputDir);
+                }
+            }
+        }
+    }
+    
+    private void archiveFile(GopherURL gopherPage, String outputDir) throws IOException, InterruptedException {
+        String pageContents = downloadPageContents(gopherPage);
+        File archiveFile = new File(outputDir + gopherPage.selector);
+        File parent = new File(archiveFile.getParent());
+        parent.mkdirs();
+        archiveFile.createNewFile();
+        
+        try {
+            FileWriter fw = new FileWriter(archiveFile);
+            fw.write(pageContents);
+            fw.close();
+        }
+        catch(IOException ex) {
+            
+        }
     }
     
     private String downloadPageContents(GopherURL url) throws IOException, InterruptedException {

          
@@ 94,6 153,9 @@ wait:       for (;;) {
                 if (segments[0].equals(".")) {
                     documentLines.add(DocumentLine.TERMINATOR);
                 }
+                else {
+                    documentLines.add(new DocumentLine("i" + line, "", "", ""));
+                }
             }
         }
         System.out.println("Lines");

          
@@ 103,6 165,11 @@ wait:       for (;;) {
     //line by itself, and closes the connection.
     //TODO: Properly implement detection of the trailing bytes, rather than simply
     //waiting up to a second for the end.
+    //NB: Around 5 AM, realized that parsing as DocumentLines
+    //as we go is perfect for detecting termination.  It already
+    //figures out DocumentLine.TERMINATOR, so once that arrives,
+    //we know we are done; otherwise we know we aren't done.
+    //Thus the loop can be much simpler.
     final static byte[] terminatingBytes = new byte[5];
     
     static {