fa50e82e3ede — Andrew@Edwin8 6 years ago
Initial commit.  Traverses the downloads database, prints to a text file everything that is on AtomicGamer, 3DDownloads, or Telefragged.
A => .hgignore +5 -0
@@ 0,0 1,5 @@ 
+\.orig$
+\.orig\..*$
+\.chg\..*$
+\.rej$
+\.conflict\~$

          
A => nbactions.xml +17 -0
@@ 0,0 1,17 @@ 
+<?xml version="1.0" encoding="UTF-8"?>
+<actions>
+        <action>
+            <actionName>run</actionName>
+            <packagings>
+                <packaging>jar</packaging>
+            </packagings>
+            <goals>
+                <goal>process-classes</goal>
+                <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
+            </goals>
+            <properties>
+                <exec.args>-classpath %classpath com.civfanatics.downloadtraverser.Main</exec.args>
+                <exec.executable>java</exec.executable>
+            </properties>
+        </action>
+    </actions>

          
A => pom.xml +20 -0
@@ 0,0 1,20 @@ 
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>com.mycompany</groupId>
+    <artifactId>CFCTraverser</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <packaging>jar</packaging>
+    <dependencies>
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.8.2</version>
+        </dependency>
+    </dependencies>
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <maven.compiler.source>1.7</maven.compiler.source>
+        <maven.compiler.target>1.7</maven.compiler.target>
+    </properties>
+</project>
  No newline at end of file

          
A => src/main/java/com/civfanatics/downloadtraverser/DownloadInfo.java +76 -0
@@ 0,0 1,76 @@ 
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package com.civfanatics.downloadtraverser;
+
+/**
+ *
+ * @author Andrew
+ */
+public class DownloadInfo {
+    
+    private String authorName;
+    private String authorPage;
+    
+    private String name;
+    private int size = 0;
+    private String description;
+    
+    private String url;
+
+    public DownloadInfo() {
+    }
+
+    public String getAuthorName() {
+        return authorName;
+    }
+
+    public void setAuthorName(String authorName) {
+        this.authorName = authorName;
+    }
+
+    public String getAuthorPage() {
+        return authorPage;
+    }
+
+    public void setAuthorPage(String authorPage) {
+        this.authorPage = authorPage;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        name = name.replace("&gt;", ">");
+        name = name.replace("&lt;", "<");
+        name = name.replace("&amp;", "&");
+        this.name = name;
+    }
+
+    public int getSize() {
+        return size;
+    }
+
+    public void setSize(int size) {
+        this.size = size;
+    }
+
+    public String getDescription() {
+        return description;
+    }
+
+    public void setDescription(String description) {
+        this.description = description;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+}

          
A => src/main/java/com/civfanatics/downloadtraverser/Main.java +230 -0
@@ 0,0 1,230 @@ 
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package com.civfanatics.downloadtraverser;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+/**
+ *
+ * @author Andrew
+ */
+public class Main {
+    
+    static String root = "http://forums.civfanatics.com/";
+    
+    final int function = 0; //check for links at particular sites
+    final String site = "atomicgamer.com";
+    
+    final static List<String> sites = new ArrayList<>();
+    
+    static FileWriter fw = null;
+    
+    public static void main(String[]args) {
+        sites.add("atomicgamer.com");
+        sites.add("3ddownloads.com");
+        sites.add("telefragged.com");
+        
+        try {
+            fw = new FileWriter("D:\\My Documents\\!Documents\\Civilization\\Atomic Gamer\\Civ3GraphicsModpacks.txt");
+        }
+        catch(IOException ex) {
+            System.out.println("IOException");
+            System.exit(1);
+        }
+        
+        traverseFolder(root + "downloads.php?do=cat&id=15", "Civilization III Graphics Modpacks", 0, 1);
+        
+        try {
+            fw.close();
+        }
+        catch(IOException ex) {
+            System.out.println("Couldn't close text file");
+        }
+        //traverseFolder(root + "downloads.php?do=cat&id=13", "Civilization III Downloads", 0);
+    }
+    
+    public static void traverseFolder(String folderUrl, String folderName, int levelsDeep, int page) {
+        
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < levelsDeep; i++) {
+            sb.append(" ");
+        }
+        String spaces = sb.toString();
+        
+        System.out.println("\n" + spaces + "Exploring " + folderName + " page " + page);
+        
+        try {
+            Document doc = Jsoup.connect(folderUrl).get();
+            
+            Elements potentialTables = doc.select("table[class=tborder] table[cellpadding=6]");
+            
+            for (Element table : potentialTables) {
+                Elements matches = table.select("a:containsOwn(Rating)");
+                if (matches.size() > 0) {
+                    //Grab downloads from this table.  Also will have to grab the Next Page element, if it exists.
+                    //Check if there's a page nav.  If so, we'll use that for navving, and use the other table for DL'ing
+                    Elements pageNav = table.select("div.pageNav");
+                    if (pageNav.size() == 0) {
+                        System.out.println(spaces + "Found downloads");
+                        
+                        Elements rows = table.select("tr");
+                        //For each download, figure out its name, size, author, href, etc.
+                        for (int i = 1; i < rows.size(); i++) {
+                            Element row = rows.get(i);
+                            Elements data = row.select("td");
+                            //Order is:
+                            //  Image
+                            //  Name/Link/Description/Rating
+                            //  Author
+                            //  Date
+                            //  Downloads
+                            Element nameLinkEtc = data.get(1);
+                            Element ahref = nameLinkEtc.select("a[href]").first();
+                            String url = ahref.attr("href");
+                            String name = ahref.select("strong").first().html();
+                            int brIndex = nameLinkEtc.html().indexOf("<br>");
+                            String descSize = nameLinkEtc.html().substring(brIndex + 4);
+                            int nbsp = descSize.lastIndexOf("&nbsp");
+                            String description = descSize.substring(0, nbsp);
+                            String sizeComments = descSize.substring(nbsp + 8);
+                            int semicolon = sizeComments.indexOf(";");
+                            String size = sizeComments.substring(0, semicolon);
+                            
+                            //Now get author
+                            Element author = data.get(2);
+                            String authorLink = "";
+                            String authorName = "";
+                            try {
+                                Element authorHref = author.select("a[href]").first();
+                                authorLink = authorHref.attr("href");
+                                authorName = authorHref.html();
+                            }
+                            catch(NullPointerException ex) {
+                                //A few downloads don't have links to the author, just the name.
+                                //In these cases, grab the name only.
+                                authorName = author.select("span").html();
+                            }
+                            
+                            DownloadInfo dlInfo = new DownloadInfo();
+                            dlInfo.setAuthorName(authorName);
+                            dlInfo.setAuthorPage(authorLink);
+                            dlInfo.setName(name);
+                            dlInfo.setDescription(description);
+                            dlInfo.setSize(0);
+                            dlInfo.setUrl(url);
+                            
+                            handleDownload(dlInfo);
+                            
+                            System.out.println("URL : " + url + ", name: " + name);
+                        }
+                    }
+                    else {
+                        System.out.println(spaces + "Found page nav");
+                        //And traverse pages...
+                        Elements topPageNavElements = pageNav.first().select("td");
+                        boolean breakNext = false;
+                        for (Element navElement : topPageNavElements) {
+                            if (breakNext) {
+                                //Drill down into this own, then break
+                                Element ahref = navElement.select("a[href]").first();
+                                String title = ahref.attr("title");
+                                if (title.contains("Prev Page")) {
+                                    //don't go in an infinite loop
+                                    break;
+                                }
+                                String href = ahref.attr("href");
+                                //Move to the next page.
+                                traverseFolder(root + href, folderName, levelsDeep, page + 1);
+                                break;
+                            }
+                            if (navElement.select("strong").size() > 0) {
+                                //Found current page
+                                System.out.println(spaces + "Found page ");
+                                breakNext = true;
+                            }
+                        }
+                    }
+                }
+                
+                Elements categoryMatches = table.select("td:contains(Category)");
+                if (categoryMatches.size() == 1) {
+                    //Get categories from this table.
+                    System.out.println("Found categories");
+                    
+                    //Grab each row, save the first.  Look in the first td to figure out where to go next, and the name of that category.
+                    Elements rows = table.select("tr");
+                    for (int i = 1; i < rows.size(); i++) {
+                        Element row = rows.get(i);
+                        Elements tds = row.select("td");
+                        Element link = tds.first();
+                        String categoryName = link.select("strong").html();
+                        Element ahref = row.select("a[href]").first();
+                        String href = ahref.attr("href");
+                        
+                        System.out.println(spaces + "Found category " + categoryName + " at " + href);
+                        
+                        traverseFolder(root + href, categoryName, levelsDeep + 1, 1);
+                    }
+                }
+            }
+        }
+        catch(IOException ex) {
+            System.out.println("Invalid URL");
+        }
+    }
+    
+    private static void handleDownload(DownloadInfo download) {
+        try {
+            Document document = Jsoup.connect(root + download.getUrl()).get();
+            //Find the link that contains the name of the download (ex. Download Grain Mills)
+            Element ahref = document.select("a[title*=" + download.getName()).first();
+            String link = "";
+            try {
+                link = ahref.attr("href").substring(1);
+            }
+            catch(NullPointerException ex) {
+                System.out.println("debug");
+            }
+            
+            URL url = new URL(root + link);
+            URLConnection urlConnection = url.openConnection();
+            HttpURLConnection.setFollowRedirects(false);
+            
+            urlConnection.connect();
+            
+            int responseCode = ((HttpURLConnection)urlConnection).getResponseCode();
+            Map<String, List<String>> map = urlConnection.getHeaderFields();
+            
+            if (responseCode == 302) {
+                String location = map.get("Location").get(0);
+                for (String site : sites) {
+                    if (location.contains(site)) {
+                        fw.write(download.getName() + " at " + download.getUrl() + " is at " + site + "\n");
+                        fw.flush();
+                        break;
+                    }
+                }
+                System.out.println("Redirected");
+            }
+            
+            System.out.println("Got download link");
+        }
+        catch(IOException ex) {
+            System.out.println("Invalid URL " + download.getUrl());
+        }
+    }
+}