Skip to content

Commit

Permalink
Add Homework 02 assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
100yo committed Dec 11, 2024
1 parent 25f5053 commit 95744ac
Show file tree
Hide file tree
Showing 16 changed files with 11,767 additions and 4 deletions.
2 changes: 1 addition & 1 deletion 09-threads/lab/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Sentiment analysis, или анализ на настроения, е техни
"I love java" = 3 (moderately positive)
"I hate bugs" = -2 (slightly negative)

За да има смисъл от паралелна обработка обаче, може да си представим, че се анализират големи много файлове, с много текст в тях, например цели книги.
За да има смисъл от паралелна обработка обаче, може да си представим, че се анализират много и големи файлове, с много текст в тях, например цели книги.

Ще работим с няколко текстови потока:
- поток със "стоп думи" - това са често срещани думи в даден език, които обикновено нямат особена семантична стойност или не допринасят за анализа на текста. Например "the", "is", "at", "which", "on" и т.н.
Expand Down
268 changes: 268 additions & 0 deletions homeworks/02-goodreads/README.md

Large diffs are not rendered by default.

11,037 changes: 11,037 additions & 0 deletions homeworks/02-goodreads/resources/goodreads_data.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package bg.sofia.uni.fmi.mjt.goodreads;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;
import com.opencsv.CSVReader;
import com.opencsv.exceptions.CsvException;

import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import java.util.stream.Collectors;

public class BookLoader {

public static Set<Book> load(Reader reader) {

try (CSVReader csvReader = new CSVReader(reader)) {
return csvReader.readAll().stream()
.skip(1)
.map(Book::of)
.collect(Collectors.toSet());

} catch (IOException | CsvException ex) {
throw new IllegalArgumentException("Could not load dataset", ex);
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package bg.sofia.uni.fmi.mjt.goodreads.book;

import java.util.ArrayList;
import java.util.List;

public record Book(
String ID,
String title,
String author,
String description,
List<String> genres,
double rating,
int ratingCount,
String URL
) {

public static Book of(String[] tokens) {
throw new UnsupportedOperationException("Not yet implemented");

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package bg.sofia.uni.fmi.mjt.goodreads.finder;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;
import bg.sofia.uni.fmi.mjt.goodreads.sanitizer.StringTokenHandler;

import java.util.List;
import java.util.Set;

public class BookFinder implements BookFinderAPI {

public BookFinder(Set<Book> books, StringTokenHandler tokenizer) {}

public Set<Book> allBooks() {
throw new UnsupportedOperationException("Not yet implemented");
}

@Override
public List<Book> searchByAuthor(String authorName) {
throw new UnsupportedOperationException("Not yet implemented");
}

@Override
public Set<String> allGenres() {
throw new UnsupportedOperationException("Not yet implemented");
}

@Override
public List<Book> searchByGenres(Set<String> genres, MatchOption option) {
throw new UnsupportedOperationException("Not yet implemented");
}

@Override
public List<Book> searchByKeywords(Set<String> keywords, MatchOption option) {
throw new UnsupportedOperationException("Not yet implemented");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package bg.sofia.uni.fmi.mjt.goodreads.finder;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;

import java.util.List;
import java.util.Set;

public interface BookFinderAPI {

/**
* Retrieves all books
*
* @return a Set of all books.
*/
Set<Book> allBooks();

/**
* Retrieves all genres
*
* @return a Set of all genres.
*/
Set<String> allGenres();

/**
* Searches for books by the specified author name.
*
* @param authorName the name of the author to search for.
* @throws IllegalArgumentException if the author name if null or empty
* @return a List of books written by the specified author.
* Returns an empty list if no books are found.
*/
List<Book> searchByAuthor(String authorName);

/**
* Searches for books that belong to the specified genres.
* The search can be based on different match options (all or any genres).
*
* @param genres a Set of genres to search for.
* @throws IllegalArgumentException if {@param genres} is null
* @return a List of books that match the given genres according to the MatchOption
* Returns an empty list if no books are found.
*/
List<Book> searchByGenres(Set<String> genres, MatchOption option);

/**
* Searches for books that match the specified keywords.
* The search can be based on different match options (all or any keywords).
*
* @param keywords a {@code Set} of keywords to search for.
* @param option the {@code MatchOption} that defines the search criteria
* (either {@link MatchOption#MATCH_ALL} or {@link MatchOption#MATCH_ANY}).
* @return a List of books that match the given keywords according to the MatchOption
* Returns an empty list if no books are found.
*/
List<Book> searchByKeywords(Set<String> keywords, MatchOption option);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package bg.sofia.uni.fmi.mjt.goodreads.finder;

public enum MatchOption {
MATCH_ALL,
MATCH_ANY
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package bg.sofia.uni.fmi.mjt.goodreads.recommender;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;

import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class BookRecommender implements BookRecommenderAPI {

public BookRecommender(Set<Book> initialBooks, SimilarityCalculator calculator) {}


@Override
public Map<Book, Double> recommendBooks(Book origin, int maxN) {
throw new UnsupportedOperationException("Not yet implemented");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package bg.sofia.uni.fmi.mjt.goodreads.recommender;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;

import java.util.Map;

public interface BookRecommenderAPI {

/**
* Searches for books that are similar to the provided one.
*
* @param originBook the book we should calculate similarity with.
* @param maxN - the maximum number of entries returned
* @throws IllegalArgumentException if the originBook is null.
* @throws IllegalArgumentException if maxN is smaller or equal to 0.
* @return a Map<Book, Double> representing the top maxN closest books with their similarity to originBook ordered by their similarity score
*/
Map<Book, Double> recommendBooks(Book originBook, int maxN);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package bg.sofia.uni.fmi.mjt.goodreads.recommender.similarityCalculator.composite;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;
import bg.sofia.uni.fmi.mjt.goodreads.recommender.similarityCalculator.SimilarityCalculator;

import java.util.Map;

public class CompositeSimilarityCalculator implements SimilarityCalculator {

public CompositeSimilarityCalculator(Map<SimilarityCalculator, Double> similarityCalculatorMap) {}


@Override
public double calculateSimilarity(Book first, Book second) {
throw new UnsupportedOperationException("Not yet implemented");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package bg.sofia.uni.fmi.mjt.goodreads.recommender.similarityCalculator.descriptions;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;
import bg.sofia.uni.fmi.mjt.goodreads.recommender.similarityCalculator.SimilarityCalculator;
import bg.sofia.uni.fmi.mjt.goodreads.sanitizer.StringTokenHandler;

import java.util.*;
import java.util.stream.Collectors;

public class TFIDF implements SimilarityCalculator {

public TFIDF(Set<Book> books, TextTokenizer tokenizer) {}

@Override
public double descriptionSimilarity(Book first, Book second) {
throw new UnsupportedOperationException("Not yet implemented");
}

public Map<String, Double> computeTFIDF(Book book) {
throw new UnsupportedOperationException("Not yet implemented");
}

public Map<String, Double> computeTF(Book book) {
throw new UnsupportedOperationException("Not yet implemented");
}

public Map<String, Double> computeIDF(Book book) {
throw new UnsupportedOperationException("Not yet implemented");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package bg.sofia.uni.fmi.mjt.goodreads.recommender.similarityCalculator.genres;

import bg.sofia.uni.fmi.mjt.goodreads.book.Book;
import bg.sofia.uni.fmi.mjt.goodreads.recommender.similarityCalculator.SimilarityCalculator;

public class GenresOverlapSimilarityCalculator implements SimilarityCalculator {

@Override
public double genresSimilarity(Book first, Book second) {
throw new UnsupportedOperationException("Not yet implemented");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package bg.sofia.uni.fmi.mjt.goodreads.sanitizer;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class TextTokenizer {

private final Set<String> stopwords;

public TextTokenizer(Reader stopwordsReader) {
try (var br = new BufferedReader(stopwordsReader)) {
stopwords = br.lines().collect(Collectors.toSet());
} catch (IOException ex) {
throw new IllegalArgumentException("Could not load dataset", ex);
}
}

public List<String> tokenize(String input) {
throw new UnsupportedOperationException("Not yet implemented");
}

public Set<String> stopwords() {
return stopwords;
}

}
Loading

0 comments on commit 95744ac

Please sign in to comment.