From ea3e82d065ad12fbf6cd6135a79e80b9b984cf7a Mon Sep 17 00:00:00 2001 From: Rodrigo Alves Date: Thu, 5 Sep 2013 17:37:04 -0300 Subject: [PATCH 1/5] Adding Gitective [1] as a dependency and creating the class that will be responsible for the local Git repository analysis within Groundhog. Starting with non-parameterized commit extraction and extraction of commits from a given user. Nothing finished just yet. Code in main package only present for current debugging and testing purposes, once the commit message extraction is done it will be moved to the groundhog-case-study [2] project. [1]: https://github.com/kevinsawicki/gitective [2]: github.com/gustavopinto/groundhog-case-study Related to #55 and #60 --- pom.xml | 9 +- .../main/br/ufpe/cin/groundhog/Project.java | 1 + .../cin/groundhog/crawler/CrawlGitHub.java | 2 - .../extractor/GitCommitExtractor.java | 84 +++++++++++++++++++ .../br/ufpe/cin/groundhog/main/TestMain.java | 20 +++++ 5 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java diff --git a/pom.xml b/pom.xml index a724a05..631e676 100644 --- a/pom.xml +++ b/pom.xml @@ -173,7 +173,12 @@ junit junit 4.11 - + + + org.gitective + gitective-core + 0.9.9 + @@ -270,4 +275,4 @@ - \ No newline at end of file + diff --git a/src/java/main/br/ufpe/cin/groundhog/Project.java b/src/java/main/br/ufpe/cin/groundhog/Project.java index e7e5110..efec53c 100644 --- a/src/java/main/br/ufpe/cin/groundhog/Project.java +++ b/src/java/main/br/ufpe/cin/groundhog/Project.java @@ -85,6 +85,7 @@ public Project(String name, String description) { public Project(User user, String name) { this.user = user; this.name = name; + this.scmURL = "https://github.com/" + user.getLogin() + "/" + name + ".git"; } /** diff --git a/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java b/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java index 1e5959c..983cf53 100644 --- a/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java +++ b/src/java/main/br/ufpe/cin/groundhog/crawler/CrawlGitHub.java @@ -16,7 +16,6 @@ * @author fjsj, gustavopinto */ public class CrawlGitHub extends ForgeCrawler { - private final static Logger logger = LoggerFactory.getLogger(CrawlGitHub.class); private final GitClient gitClient; @@ -35,7 +34,6 @@ public File downloadProject(Project project) throws DownloadException { File projectDestinationFolder = new File(destinationFolder, projectName); logger.info(String.format("Downloading %s project..", project.getName())); - try { this.gitClient.clone(projectUrl, projectDestinationFolder); diff --git a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java new file mode 100644 index 0000000..35c9e3a --- /dev/null +++ b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java @@ -0,0 +1,84 @@ +package br.ufpe.cin.groundhog.extractor; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import br.ufpe.cin.groundhog.Commit; +import br.ufpe.cin.groundhog.User; + +import org.eclipse.jgit.lib.PersonIdent; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.storage.file.FileRepositoryBuilder; +import org.gitective.core.CommitFinder; +import org.gitective.core.filter.commit.AndCommitFilter; +import org.gitective.core.filter.commit.AuthorFilter; +import org.gitective.core.filter.commit.CommitCountFilter; +import org.gitective.core.filter.commit.CommitListFilter; + +/** + * Extract Commit data from Git repositories + * @author Rodrigo Alves + * + */ +public class GitCommitExtractor { + private CommitCountFilter commits; + + public GitCommitExtractor() { + this.commits = new CommitCountFilter(); + } + + /** + * + * @param project + * @return + * @throws IOException + */ + public List extractCommits(File project) throws IOException { + List commits = new ArrayList<>(); + String path = project.getAbsolutePath(); + + CommitFinder finder = new CommitFinder(path); + FileRepositoryBuilder builder = new FileRepositoryBuilder(); + Repository repository = builder.setGitDir(new File(path)).readEnvironment() + .findGitDir() + .build(); + + CommitListFilter list = new CommitListFilter(); + + for (RevCommit rev : list.getCommits()){ + System.out.println(rev.getName()); + System.out.println(rev.getAuthorIdent().getName()); + System.out.println(rev.getShortMessage()); + } + + return commits; + } + + /** + * Extracts only the commits from a given {@link User} + * TODO: implement this method + * @return a {@link List} of {@link Commit} objects + * @throws IOException + */ + public List extractCommitFromUser(User user, File project) throws IOException { + List commits = new ArrayList<>(); + + String path = project.getAbsolutePath(); + + CommitFinder finder = new CommitFinder(path); + FileRepositoryBuilder builder = new FileRepositoryBuilder(); + Repository repository = builder.setGitDir(new File(path)).readEnvironment() + .findGitDir() + .build(); + + CommitListFilter list = new CommitListFilter(); + AndCommitFilter filters = new AndCommitFilter(); + PersonIdent author = new PersonIdent(user.getName(), user.getEmail()); + + filters.add(new AuthorFilter(author)); + return commits; + } +} \ No newline at end of file diff --git a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java index 2fd6f29..be87fe5 100644 --- a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java +++ b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java @@ -14,6 +14,7 @@ import br.ufpe.cin.groundhog.Project; import br.ufpe.cin.groundhog.SCM; +import br.ufpe.cin.groundhog.User; import br.ufpe.cin.groundhog.codehistory.CodeHistoryModule; import br.ufpe.cin.groundhog.codehistory.GitCodeHistory; import br.ufpe.cin.groundhog.codehistory.SFCodeHistory; @@ -21,6 +22,7 @@ import br.ufpe.cin.groundhog.crawler.CrawlGoogleCode; import br.ufpe.cin.groundhog.crawler.CrawlSourceForge; import br.ufpe.cin.groundhog.crawler.ForgeCrawler; +import br.ufpe.cin.groundhog.extractor.GitCommitExtractor; import br.ufpe.cin.groundhog.http.HttpModule; import br.ufpe.cin.groundhog.http.Requests; import br.ufpe.cin.groundhog.parser.java.JavaParser; @@ -184,6 +186,24 @@ public static void googleCodeExample(String term) throws Exception { public static void main(String[] args) throws Exception { // gitHubExample("restfulie-java"); + GitClient gitClient = new GitClient(); + File folder = new File("/Users/rodrigovieira/Desktop"); + + CrawlGitHub crawler = new CrawlGitHub(gitClient, folder); + User u = new User("gustavopinto"); + + Project pr = new Project(u, "groundhog-case-study"); + + System.out.println("url e: " + pr.getScmURL()); + + crawler.downloadProject(pr); + + File project = new File("/Users/rodrigovieira/Desktop/groundhog-case-study"); + + GitCommitExtractor extractor = new GitCommitExtractor(); + extractor.extractCommits(project); + + System.out.println("Pronto!"); // sourceForgeExample(); // googleCodeExample("facebook-java-api"); // Google Code SVN // googleCodeExample("guava-libraries"); // Google Code Git From 35a9edf555b84df6ae305f83839c5092921ae63b Mon Sep 17 00:00:00 2001 From: Gustavo Pinto Date: Mon, 9 Sep 2013 20:28:08 -0300 Subject: [PATCH 2/5] searching commits with gitective --- .../extractor/GitCommitExtractor.java | 32 +++++++------------ .../br/ufpe/cin/groundhog/main/TestMain.java | 22 ++++++------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java index 35c9e3a..ca31240 100644 --- a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java +++ b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java @@ -4,31 +4,27 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import br.ufpe.cin.groundhog.Commit; -import br.ufpe.cin.groundhog.User; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; -import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.file.FileRepositoryBuilder; import org.gitective.core.CommitFinder; +import org.gitective.core.PathFilterUtils; import org.gitective.core.filter.commit.AndCommitFilter; import org.gitective.core.filter.commit.AuthorFilter; import org.gitective.core.filter.commit.CommitCountFilter; import org.gitective.core.filter.commit.CommitListFilter; +import br.ufpe.cin.groundhog.Commit; +import br.ufpe.cin.groundhog.User; + /** * Extract Commit data from Git repositories * @author Rodrigo Alves * */ public class GitCommitExtractor { - private CommitCountFilter commits; - - public GitCommitExtractor() { - this.commits = new CommitCountFilter(); - } /** * @@ -37,24 +33,20 @@ public GitCommitExtractor() { * @throws IOException */ public List extractCommits(File project) throws IOException { - List commits = new ArrayList<>(); - String path = project.getAbsolutePath(); + CommitListFilter list = new CommitListFilter(); + String path = project.getAbsolutePath() + "/.git"; CommitFinder finder = new CommitFinder(path); - FileRepositoryBuilder builder = new FileRepositoryBuilder(); - Repository repository = builder.setGitDir(new File(path)).readEnvironment() - .findGitDir() - .build(); - - CommitListFilter list = new CommitListFilter(); - for (RevCommit rev : list.getCommits()){ - System.out.println(rev.getName()); - System.out.println(rev.getAuthorIdent().getName()); + finder.setFilter(list).find(); + + for(RevCommit rev : list.getCommits()){ + System.out.print(rev.getName() + " "); + System.out.print(rev.getAuthorIdent().getName() + " "); System.out.println(rev.getShortMessage()); } - return commits; + return null; } /** diff --git a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java index be87fe5..bec24af 100644 --- a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java +++ b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java @@ -186,19 +186,15 @@ public static void googleCodeExample(String term) throws Exception { public static void main(String[] args) throws Exception { // gitHubExample("restfulie-java"); - GitClient gitClient = new GitClient(); - File folder = new File("/Users/rodrigovieira/Desktop"); - - CrawlGitHub crawler = new CrawlGitHub(gitClient, folder); - User u = new User("gustavopinto"); - - Project pr = new Project(u, "groundhog-case-study"); - - System.out.println("url e: " + pr.getScmURL()); - - crawler.downloadProject(pr); - - File project = new File("/Users/rodrigovieira/Desktop/groundhog-case-study"); +// File folder = new File("/tmp"); +// CrawlGitHub crawler = new CrawlGitHub(new GitClient(), folder); +// Project pr = new Project(new User("gustavopinto"), "groundhog-case-study"); +// +// System.out.println("url e: " + pr.getScmURL()); +// +// crawler.downloadProject(pr); + + File project = new File("/tmp/groundhog-case-study"); GitCommitExtractor extractor = new GitCommitExtractor(); extractor.extractCommits(project); From cd8c27f1fbd37a168a979f3d9ac2701c4759d95e Mon Sep 17 00:00:00 2001 From: Gustavo Pinto Date: Mon, 9 Sep 2013 20:31:05 -0300 Subject: [PATCH 3/5] filtering commits that have altered java code. r #60 --- .../extractor/GitCommitExtractor.java | 22 +++++++++++++------ .../br/ufpe/cin/groundhog/main/TestMain.java | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java index ca31240..9d9c654 100644 --- a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java +++ b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java @@ -26,13 +26,7 @@ */ public class GitCommitExtractor { - /** - * - * @param project - * @return - * @throws IOException - */ - public List extractCommits(File project) throws IOException { + public List extractCommits(File project) { CommitListFilter list = new CommitListFilter(); String path = project.getAbsolutePath() + "/.git"; @@ -49,6 +43,20 @@ public List extractCommits(File project) throws IOException { return null; } + public int numberOfCommits(File project) { + CommitCountFilter commits = new CommitCountFilter(); + String path = project.getAbsolutePath() + "/.git"; + + CommitFinder finder = new CommitFinder(path); + finder.setFilter(PathFilterUtils.andSuffix(".java")); + finder.setMatcher(commits); + finder.find(); + + System.out.println(commits.getCount()); + + return (int) commits.getCount(); + } + /** * Extracts only the commits from a given {@link User} * TODO: implement this method diff --git a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java index bec24af..88acf80 100644 --- a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java +++ b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java @@ -14,7 +14,6 @@ import br.ufpe.cin.groundhog.Project; import br.ufpe.cin.groundhog.SCM; -import br.ufpe.cin.groundhog.User; import br.ufpe.cin.groundhog.codehistory.CodeHistoryModule; import br.ufpe.cin.groundhog.codehistory.GitCodeHistory; import br.ufpe.cin.groundhog.codehistory.SFCodeHistory; @@ -198,6 +197,7 @@ public static void main(String[] args) throws Exception { GitCommitExtractor extractor = new GitCommitExtractor(); extractor.extractCommits(project); + extractor.numberOfCommits(project); System.out.println("Pronto!"); // sourceForgeExample(); From 3d2e1ef4b3336e669547f7f5b901be2821d4341f Mon Sep 17 00:00:00 2001 From: Rodrigo Alves Date: Fri, 4 Oct 2013 22:35:38 -0300 Subject: [PATCH 4/5] Refactoring method for counting commits that include a given file extension --- .../extractor/GitCommitExtractor.java | 23 ++++++++++++------- .../br/ufpe/cin/groundhog/main/TestMain.java | 6 +++-- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java index 9d9c654..3e051fb 100644 --- a/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java +++ b/src/java/main/br/ufpe/cin/groundhog/extractor/GitCommitExtractor.java @@ -34,25 +34,32 @@ public List extractCommits(File project) { finder.setFilter(list).find(); - for(RevCommit rev : list.getCommits()){ - System.out.print(rev.getName() + " "); - System.out.print(rev.getAuthorIdent().getName() + " "); - System.out.println(rev.getShortMessage()); + for (RevCommit rev : list.getCommits()){ + System.out.println(rev.getName() + " " + rev.getAuthorIdent().getName() + " " + rev.getShortMessage()); } return null; } - public int numberOfCommits(File project) { + /** + * A method that returns the number of commits that contain files with a given file extension + * Example usage: + * + * File project = new File("/tmp/project"); + * numberOfCommitsWithExtension(project, ".java") // Returns the number of commits in project that includes Java files + * + * @param project A {@link file} object for the directory where the Git repository is located + * @param extension The extension to be searched within the commits + * @return the number of commits that includes files with the given extension + */ + public int numberOfCommitsWithExtension(File project, String extension) { CommitCountFilter commits = new CommitCountFilter(); String path = project.getAbsolutePath() + "/.git"; CommitFinder finder = new CommitFinder(path); - finder.setFilter(PathFilterUtils.andSuffix(".java")); + finder.setFilter(PathFilterUtils.andSuffix("." + extension)); finder.setMatcher(commits); finder.find(); - - System.out.println(commits.getCount()); return (int) commits.getCount(); } diff --git a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java index 88acf80..2b081f6 100644 --- a/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java +++ b/src/java/main/br/ufpe/cin/groundhog/main/TestMain.java @@ -193,11 +193,13 @@ public static void main(String[] args) throws Exception { // // crawler.downloadProject(pr); - File project = new File("/tmp/groundhog-case-study"); + File project = new File("/Users/rodrigovieira/Desktop/groundhog-case-study"); GitCommitExtractor extractor = new GitCommitExtractor(); extractor.extractCommits(project); - extractor.numberOfCommits(project); + + System.out.println(extractor.numberOfCommitsWithExtension(project, "md")); + System.out.println("Pronto!"); // sourceForgeExample(); From 33c4bec30f58fcdab87103dc49ee06c554ae9280 Mon Sep 17 00:00:00 2001 From: Rodrigo Alves Date: Sat, 5 Oct 2013 10:48:26 -0300 Subject: [PATCH 5/5] Adding some documentation --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 57c7c26..a90c5a7 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,19 @@ Project project = new Project(user, "rails"); // project github.com/rails/rails List contributors = searchGitHub.getAllProjectContributors(project); ``` +## Local Data Extraction + +In addition to the metadata extraction allowed via the GitHub API, Groundhog covers local data extraction onto repositories via a Git interface + +You can, for example, count the number of commits in a project that include a Java file, via a `GitCommitExtractor` object: + +```java +GitCommitExtractor extractor = new GitCommitExtractor(); +File project = new File("/tmp/elasticsearch"); + +extractor.numberOfCommitsWithExtension(project, "java"); +``` + ## Documentation Groundhog features a [Wiki], where you can browse for more information.