Skip to content

Commit

Permalink
adding tests and improving code quality. rel #33
Browse files Browse the repository at this point in the history
  • Loading branch information
Gustavo Pinto committed May 23, 2013
1 parent de26663 commit 46ef0f5
Show file tree
Hide file tree
Showing 13 changed files with 156 additions and 131 deletions.
21 changes: 6 additions & 15 deletions src/java/main/br/ufpe/cin/groundhog/Project.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package br.ufpe.cin.groundhog;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

import br.ufpe.cin.groundhog.util.Dates;

/**
* Represents a software project in Groundhog
* @author fjsj, gustavopinto, rodrigoalvesvieira
Expand Down Expand Up @@ -330,14 +330,10 @@ public void setCreatedAt(Date createdAt) {
}

/**
*
* @param createdAtParam the String correspondent to the creation date of the project in question. e.g: 2012-04-28T15:40:35Z
* @throws java.text.ParseException
*/
public void setCreatedAt(String createdAtParam) throws java.text.ParseException {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date createAtDate = format.parse(createdAtParam.replace('T', ' ').replace("Z", ""));

public void setCreatedAt(String createdAtParam) {
Date createAtDate = new Dates("yyyy-MM-dd HH:mm:ss").format(createdAtParam);
this.createdAt = createAtDate;
}

Expand All @@ -359,16 +355,11 @@ public void setLastPushedAt(Date lastPushedAtParam) {
}

/**
*
* @param lastPushedAtParam the String correspondent to the date of the last push to the project
* in question. e.g: 2012-04-28T15:40:35Z
* @throws ParseException
* @throws java.text.ParseException
*/
public void setLastPushedAt(String lastPushedAtParam) throws ParseException, java.text.ParseException {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date lastPushDate = format.parse(lastPushedAtParam.replace('T', ' ').replace("Z", ""));

public void setLastPushedAt(String lastPushedAtParam){
Date lastPushDate = new Dates("yyyy-MM-dd HH:mm:ss").format(lastPushedAtParam);
this.lastPushedAt = lastPushDate;
}

Expand Down
9 changes: 5 additions & 4 deletions src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
*/
public class CrawlGitHub extends ForgeCrawler {

private final static Logger logger = LoggerFactory
.getLogger(CrawlGitHub.class);
private final static Logger logger = LoggerFactory.getLogger(CrawlGitHub.class);

private final GitClient gitClient;
private final File destinationFolder;

@Inject
public CrawlGitHub(GitClient gitClient, File destinationFolder) {
super(destinationFolder);
super();
this.gitClient = gitClient;
this.destinationFolder = destinationFolder;
}

@Override
Expand All @@ -41,7 +42,7 @@ protected File downloadProject(Project project)

logger.info(String.format("Downloading %s project..", project.getName()));

gitClient.clone(cloneUrl, projectFolder);
this.gitClient.clone(cloneUrl, projectFolder);
return projectFolder;
}
}
27 changes: 3 additions & 24 deletions src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGoogleCode.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Future;

import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.api.errors.InvalidRemoteException;
Expand All @@ -16,10 +13,6 @@
import br.ufpe.cin.groundhog.Project;
import br.ufpe.cin.groundhog.SCM;
import br.ufpe.cin.groundhog.scmclient.GitClient;
import br.ufpe.cin.groundhog.scmclient.ScmModule;

import com.google.inject.Guice;
import com.google.inject.Injector;

/**
* A concrete class to crawl GitHub.
Expand All @@ -30,10 +23,12 @@ public class CrawlGoogleCode extends ForgeCrawler {
private static Logger logger = LoggerFactory.getLogger(CrawlGoogleCode.class);

private final GitClient gitClient;
private final File destinationFolder;

public CrawlGoogleCode(GitClient gitClient, File destinationFolder) {
super(destinationFolder);
super();
this.gitClient = gitClient;
this.destinationFolder = destinationFolder;
}

@Override
Expand Down Expand Up @@ -71,20 +66,4 @@ protected File downloadProject(Project project)
}
return projectFolder;
}

public static void main(String[] args) throws Exception {
long time = System.nanoTime();
List<Project> projects = Arrays.asList(
new Project("epubcheck", ""));
File dest = new File("C:\\Users\\fjsj\\Downloads\\EponaProjects\\");

Injector injector = Guice.createInjector(new ScmModule());
GitClient gitClient = injector.getInstance(GitClient.class);

CrawlGoogleCode crawl = new CrawlGoogleCode(gitClient, dest);
List<Future<File>> fs = crawl.downloadProjects(projects);
crawl.shutdown();
for (Future<File> f : fs) f.get();
System.out.printf("Elapsed: %.2f", (System.nanoTime() - time) / 1000000000.0);
}
}
38 changes: 3 additions & 35 deletions src/java/main/br/ufpe/cin/groundhog/crawler/CrawlSourceForge.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Enumeration;
import java.util.List;
import java.util.Stack;
import java.util.Vector;
Expand All @@ -24,12 +23,9 @@

import br.ufpe.cin.groundhog.Project;
import br.ufpe.cin.groundhog.extractor.Formats;
import br.ufpe.cin.groundhog.http.HttpModule;
import br.ufpe.cin.groundhog.http.Requests;

import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.ning.http.client.AsyncCompletionHandler;
import com.ning.http.client.ListenableFuture;
import com.ning.http.client.Response;
Expand All @@ -40,13 +36,15 @@ public class CrawlSourceForge extends ForgeCrawler {
private ConcurrentHashMap<String, Date> mapModifiedDate;
private SimpleDateFormat dateFormat;
private Requests requests;
private File destinationFolder;

@Inject
public CrawlSourceForge(Requests requests, File destinationFolder) {
super(destinationFolder);
super();
this.mapModifiedDate = new ConcurrentHashMap<String, Date>();
this.dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
this.requests = requests;
this.destinationFolder = destinationFolder;
}

private void parseURLsFromPage(String project, String html,
Expand Down Expand Up @@ -190,34 +188,4 @@ protected File downloadProject(Project project) throws IOException,
}
return new File(destinationFolder, projectName);
}

private void setProjectsDirectoriesDates() {
Enumeration<String> e = mapModifiedDate.keys();
for (String relativePath = e.nextElement(); e.hasMoreElements(); relativePath = e
.nextElement()) {
Date modifiedDate = mapModifiedDate.get(relativePath);
File f = new File(destinationFolder, relativePath);
if (f.exists()) {
f.setLastModified(modifiedDate.getTime());
}
}
}

public static void main(String[] args) throws Exception {
long time = System.nanoTime();
List<Project> projects = Arrays.asList(new Project("geom-java", ""),
new Project("im4java", ""));
File dest = new File("C:\\Users\\fjsj\\Downloads\\EponaProjects\\");
Injector injector = Guice.createInjector(new HttpModule());
Requests requests = injector.getInstance(Requests.class);

CrawlSourceForge crawl = new CrawlSourceForge(requests, dest);
List<Future<File>> fs = crawl.downloadProjects(projects);
crawl.shutdown();
for (Future<File> f : fs)
f.get();
crawl.setProjectsDirectoriesDates();
System.out.printf("Elapsed: %.2f",
(System.nanoTime() - time) / 1000000000.0);
}
}
14 changes: 7 additions & 7 deletions src/java/main/br/ufpe/cin/groundhog/crawler/ForgeCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,14 @@
*
*/
public abstract class ForgeCrawler {

private ExecutorService ex;
protected File destinationFolder;

/**
* Constructs a new ForgeCrawler with a given destinationFolder.
*
* @param destinationFolder folder into which projects will be downloaded
*/
protected ForgeCrawler(File destinationFolder) {
ex = Executors.newFixedThreadPool(JsonInput.getMaxThreads());
this.destinationFolder = destinationFolder;
protected ForgeCrawler() {
this.ex = Executors.newFixedThreadPool(JsonInput.getMaxThreads());
}

/**
Expand Down Expand Up @@ -63,14 +60,17 @@ public File call() throws Exception {
});
fs.add(f);
}

shutdown();

return fs;
}

/**
* Guarantees downloads to be executed, but no new downloads will be accepted.
* Should be called after downloadProjects.
*/
public void shutdown() {
private void shutdown() {
ex.shutdownNow();
}

Expand Down
3 changes: 1 addition & 2 deletions src/java/main/br/ufpe/cin/groundhog/main/CmdMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,8 @@ public void run() {
}
}));
}
crawler.shutdown();

ex.shutdown();

for (int i = 0; i < analysisFutures.size(); i++) {
try {
analysisFutures.get(i).get();
Expand Down
3 changes: 0 additions & 3 deletions src/java/main/br/ufpe/cin/groundhog/main/TestMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ public static void gitHubExample(String term) throws Exception {
logger.info("2 - Download 1st result...");
ForgeCrawler crawler = new CrawlGitHub(injector.getInstance(GitClient.class), downloadFolder);
List<Future<File>> futures = crawler.downloadProjects(projects);
crawler.shutdown();
File repositoryFolder = null;
for (Future<File> f : futures) { // wait for download
repositoryFolder = f.get();
Expand Down Expand Up @@ -111,7 +110,6 @@ public static void sourceForgeExample() throws Exception {

ForgeCrawler crawler = new CrawlSourceForge(requests, downloadFolder);
List<Future<File>> futures = crawler.downloadProjects(projects);
crawler.shutdown();
File repositoryFolder = null;
for (Future<File> f : futures) { // wait for download
repositoryFolder = f.get();
Expand Down Expand Up @@ -153,7 +151,6 @@ public static void googleCodeExample(String term) throws Exception {
logger.info("2 - Download 1st result...");
ForgeCrawler crawler = new CrawlGoogleCode(injector.getInstance(GitClient.class), downloadFolder);
List<Future<File>> futures = crawler.downloadProjects(projects);
crawler.shutdown();
File repositoryFolder = null;
for (Future<File> f : futures) { // wait for download
repositoryFolder = f.get();
Expand Down
48 changes: 18 additions & 30 deletions src/java/main/br/ufpe/cin/groundhog/search/SearchGoogleCode.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package br.ufpe.cin.groundhog.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Future;
Expand All @@ -26,6 +25,7 @@
*
*/
public class SearchGoogleCode implements ForgeSearch {

private static String root = "http://code.google.com";
private final Requests requests;

Expand All @@ -38,17 +38,12 @@ public SearchGoogleCode(Requests requests) {
* Fetches and returns the checkout command String for the project
* @param html the HTML content of the page to be parsed
* @return the checkout command within the given HTML page
* @throws IOException
*/
private String parseCheckoutCommand(String html) throws IOException {
private String parseCheckoutCommand(String html) {
Document doc = Jsoup.parse(html);
Elements es = doc.select("#checkoutcmd");

if (es.isEmpty()) {
return "";
} else {
return es.first().text();
}
return es.isEmpty() ? "" : es.first().text();
}

/**
Expand All @@ -57,18 +52,15 @@ private String parseCheckoutCommand(String html) throws IOException {
* @param project the project to which the checkout must be applied
*/
private void setCheckoutCommandToProject(String command, Project project) {
String url = command.split(" ")[2];
project.setScmURL(url);

if (command.startsWith("svn")) {
String url = command.split(" ")[2];
project.setSCM(SCM.SVN);
project.setScmURL(url);
} else if (command.startsWith("git")) {
String url = command.split(" ")[2];
project.setSCM(SCM.GIT);
project.setScmURL(url);
} else if (command.startsWith("hg")) {
String url = command.split(" ")[2];
project.setSCM(SCM.HG);
project.setScmURL(url);
} else if (command.equals("")) {
project.setSCM(SCM.NONE);
} else {
Expand All @@ -79,36 +71,32 @@ private void setCheckoutCommandToProject(String command, Project project) {
public List<Project> getProjects(String term, int page) throws SearchException {
try {
List<Project> projects = new ArrayList<Project>();
String paramsStr =
new ParamBuilder().
add("q", term + " label:Java").
add("start", String.valueOf((page - 1) * 10)).
build();
String params = new ParamBuilder()
.add("q", term + " label:Java")
.add("start", String.valueOf((page - 1) * 10))
.build();

Document doc = Jsoup.parse(requests.get(root + "/hosting/search?" + paramsStr));
Document doc = Jsoup.parse(requests.get(root + "/hosting/search?" + params));
for (Element tr : doc.select("#serp table tbody tr")) {
Element el = tr.child(0).child(0);

// The span element within the main search result that contains the number
// of people watching the project on Google Code
Element span = tr.child(1).child(2).child(0);

String projectName, description, imgSrc, iconURL, sourceCodeUrl;
int stars;

projectName = el.attr("href").split("/")[2];
description = tr.child(1).ownText();
imgSrc = el.child(0).attr("src");
iconURL = imgSrc;
stars = Integer.parseInt(span.text());
String projectName = el.attr("href").split("/")[2];
String description = tr.child(1).ownText();
String iconURL = el.child(0).attr("src");

if (imgSrc.startsWith("/")) {
if (iconURL.startsWith("/")) {
iconURL = root + iconURL;
}

sourceCodeUrl = "https://code.google.com/p/" + projectName + "/source/browse/";
String sourceCodeUrl = "https://code.google.com/p/" + projectName + "/source/browse/";

Project forgeProject = new Project(projectName, description, iconURL, sourceCodeUrl);

int stars = Integer.parseInt(span.text());
forgeProject.setWatchersCount(stars);
forgeProject.setFollowersCount(stars);
projects.add(forgeProject);
Expand Down
Loading

0 comments on commit 46ef0f5

Please sign in to comment.