M src/main/java/com/civfanatics/storyarchiver/StoryArchiver.java +18 -2
@@ 1,6 1,8 @@
package com.civfanatics.storyarchiver;
import com.civfanatics.storyarchiver.DBObjects.Database;
+import com.civfanatics.storyarchiver.download.DownloadResource;
+import com.civfanatics.storyarchiver.download.DownloadRunnable;
import com.civfanatics.storyarchiver.metrics.ThreadStatus;
import com.civfanatics.storyarchiver.wordpress.WordpressConfig;
import com.civfanatics.storyarchiver.wordpress.WordpressPage;
@@ 15,6 17,7 @@ import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Queue;
+import java.util.Stack;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.FileHandler;
@@ 142,16 145,29 @@ public class StoryArchiver {
}
else if (Options.wordpress) {
try {
+ //Stack because Stack, like Vector, is thread-safe, meaning
+ //this could be done in parallel.
+ Stack<DownloadResource> stack = new Stack<>();
+
//Structure
String pageFolder = Options.destinationFolder + "/wordpress/mainPage/";
String resourcesFolder = Options.destinationFolder + "/resources";
Files.createDirectories(Paths.get(resourcesFolder));
- WordpressConfig wpConfig = new WordpressConfig(Options.destinationFolder, resourcesFolder, pageFolder, pageFolder + "index.html");
+ WordpressConfig wpConfig = new WordpressConfig(Options.destinationFolder, resourcesFolder,
+ pageFolder, pageFolder + "index.html", stack);
Writer fw = new OutputStreamWriter(new FileOutputStream("C:/temp/cfcArchive.html"), "UTF-8");
WordpressPage cfc = new WordpressPage("https://www.civfanatics.com", wpConfig);
- cfc.download();
+ stack.add(cfc);
+
+
+ //Runnable it. Use a Stack to track what remains to be downloaded.
+ while (!stack.empty()) {
+ DownloadResource resource = stack.pop();
+ DownloadRunnable runnable = new DownloadRunnable("", resource);
+ runnable.run();
+ }
}
catch(Exception ex) {
logger.log(Level.SEVERE, "Exception", ex);
M src/main/java/com/civfanatics/storyarchiver/download/DownloadRunnable.java +0 -3
@@ 5,7 5,6 @@ import com.civfanatics.storyarchiver.Sto
import com.civfanatics.storyarchiver.metrics.ThreadStatus;
import static com.civfanatics.storyarchiver.metrics.ThreadStatus.deadThreads;
import static com.civfanatics.storyarchiver.metrics.ThreadStatus.doneThreads;
-import java.io.FileWriter;
import java.io.IOException;
import java.time.Duration;
@@ 23,14 22,12 @@ public class DownloadRunnable<T> extends
private String fullURL;
private DownloadResource download;
- private FileWriter writer;
static Logger logger = Logger.getLogger(DownloadRunnable.class.getName());
public DownloadRunnable(String fullURL, DownloadResource resource) {
this.fullURL = fullURL;
this.download = resource;
- this.writer = writer;
}
public void run() {
M src/main/java/com/civfanatics/storyarchiver/wordpress/WordpressConfig.java +12 -1
@@ 1,6 1,9 @@
package com.civfanatics.storyarchiver.wordpress;
+import com.civfanatics.storyarchiver.download.DownloadResource;
+import java.util.Stack;
+
/**
*
* @author Andrew
@@ 10,12 13,14 @@ public class WordpressConfig {
private final String resourcesFolder;
private final String pageFolder;
private final String pageName;
+ private final Stack<DownloadResource> stackRef;
- public WordpressConfig(String rootFolder, String resourcesFolder, String pageFolder, String pageName) {
+ public WordpressConfig(String rootFolder, String resourcesFolder, String pageFolder, String pageName, Stack stackRef) {
this.rootFolder = rootFolder;
this.resourcesFolder = resourcesFolder;
this.pageFolder = pageFolder;
this.pageName = pageName;
+ this.stackRef = stackRef;
}
public String getRootFolder() {
@@ 34,5 39,11 @@ public class WordpressConfig {
return pageName;
}
+ public void addToStack(WordpressPage page) {
+ stackRef.add(page);
+ }
+ public WordpressConfig cloneToChild(String childFolder, String childName) {
+ return new WordpressConfig(rootFolder, resourcesFolder, childFolder, childName, stackRef);
+ }
}
M src/main/java/com/civfanatics/storyarchiver/wordpress/WordpressPage.java +42 -1
@@ 10,8 10,8 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.ConnectException;
import java.net.HttpURLConnection;
+import java.net.URL;
import java.nio.file.Files;
-import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.logging.Logger;
@@ 93,6 93,47 @@ public class WordpressPage implements Do
cssCount++;
}
+ //First priority checks - navigation
+ URL ourURL = new URL(url);
+ String ourAuthority = ourURL.getAuthority();
+
+ Elements navLinks = doc.select("#site-navigation a[href]");
+ for (Element navLink : navLinks) {
+ System.out.println("Nav Link: " + navLink.attr("href") + " (" + navLink.text() + ")");
+
+ String href = navLink.attr("href");
+ if (href.startsWith("#") || href.equals(ourURL.toString())) {
+ //internal link or self-reference, skip
+ continue;
+ }
+ URL linkURL = new URL(navLink.attr("href"));
+
+ String authority = linkURL.getAuthority();
+ if (ourAuthority.equals(authority)) {
+ //TODO: Caching
+
+ //Page name.
+ String childName = "index.html";
+ String childPath = linkURL.getPath();
+ if (childPath.isEmpty()) {
+ continue;
+ }
+
+ if (!linkURL.getPath().endsWith("/")) {
+ int lastSlash = linkURL.getPath().lastIndexOf("/");
+ childName = childPath.substring(lastSlash + 1);
+ childPath = childPath.substring(0, lastSlash);
+ }
+ childPath = wpConfig.getRootFolder() + "wordpress/" + childPath;
+ childName = childPath + "/" + childName;
+
+ //Scan it as well
+ WordpressConfig childConfig = wpConfig.cloneToChild(childPath, childName);
+ WordpressPage childPage = new WordpressPage(linkURL.toString(), childConfig);
+ wpConfig.addToStack(childPage);
+ }
+ }
+
fw.write(header.outerHtml());
fw.write(primary.outerHtml());
fw.close();