@@ 0,0 1,33 @@
+
+package com.ajtjp.gopherarchiver;
+
+/**
+ *
+ * @author Andrew
+ */
+public class DocumentLine {
+ String type; //todo: Enum
+ String displayText;
+ String selector;
+ String host;
+ int port;
+
+ public DocumentLine(String typeAndText, String selector, String host, String port) {
+ type = typeAndText.substring(0, 1);
+ displayText = typeAndText.substring(1);
+ this.selector = selector;
+ this.host = host;
+ try {
+ this.port = Integer.valueOf(port);
+ }
+ catch(NumberFormatException ex) {
+ this.port = 70;
+ }
+ }
+
+ private DocumentLine(String type) {
+ this.type = type;
+ }
+
+ public static final DocumentLine TERMINATOR = new DocumentLine(".");
+}
@@ 5,7 5,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.Socket;
-import javax.net.SocketFactory;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
/**
*
@@ 25,12 27,18 @@ public class Gopher {
}
public Gopher() throws IOException, InterruptedException {
- downloadPageContents(ReferenceURLs.floodgap);
-
+ //TODO: It probably is a good idea to parse content as we go.
+ //That would be a good why of detecting the terminator; trade-off is it
+ //might make it slightly harder to archive the raw content.
+ //Postponing that decision till later.
+ String pageContents = downloadPageContents(ReferenceURLs.floodgap);
+ List<DocumentLine> parsedContents = new ArrayList<>();
+ parseContents(pageContents, parsedContents);
System.out.println("Read total of " + bytesRead + " bytes");
}
private String downloadPageContents(GopherURL url) throws IOException, InterruptedException {
+ StringBuilder sb = new StringBuilder();
Socket s = new Socket(url.host, url.port);
OutputStream os = s.getOutputStream();
os.write((url.selector + "\r\n").getBytes());
@@ 50,6 58,7 @@ fetch: for (;;) {
bytesRead+=actual;
String valueRead = new String(buffer, "Windows-1252");
System.out.println(valueRead);
+ sb.append(valueRead);
}
wait: for (;;) {
Thread.sleep(10);
@@ 63,6 72,44 @@ wait: for (;;) {
}
break;
}
- return "";
+ return sb.toString();
+ }
+
+ private void parseContents(String pageContents, List<DocumentLine> documentLines) {
+ Pattern pattern = Pattern.compile("\r\n");
+ String[] pageLines = pageContents.split("\r\n");
+ for (String line : pageLines) {
+ String[] segments = line.split("\t");
+ if (segments.length >= 4) {
+ try {
+ DocumentLine parsedLine = new DocumentLine(segments[0], segments[1], segments[2], segments[3]);
+ documentLines.add(parsedLine);
+ System.out.println("Line segments");
+ }
+ catch(Exception ex) {
+ System.err.println(":(");
+ }
+ }
+ else if (segments.length == 1) {
+ if (segments[0].equals(".")) {
+ documentLines.add(DocumentLine.TERMINATOR);
+ }
+ }
+ }
+ System.out.println("Lines");
+ }
+
+ //The server responds with a block of text terminated with a period on a
+ //line by itself, and closes the connection.
+ //TODO: Properly implement detection of the trailing bytes, rather than simply
+ //waiting up to a second for the end.
+ final static byte[] terminatingBytes = new byte[5];
+
+ static {
+ terminatingBytes[0] = 0x0d;
+ terminatingBytes[1] = 0x0a;
+ terminatingBytes[2] = 0x2e;
+ terminatingBytes[3] = 0x0d;
+ terminatingBytes[4] = 0x0a;
}
}