Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Class version #126

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions csvy/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from __future__ import annotations

import logging
from abc import ABC, abstractmethod
from itertools import zip_longest
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -270,3 +272,95 @@ def read_to_list(
data.append(row)

return data, header


class ReaderBase(ABC):
"""Base class for CSVY readers.

This class is meant to be subclassed by other classes that implement the read_data
method. It provides a common interface for reading CSVY files.
"""

def __init__(self, filename: Path | str, marker: str = "---"):
"""Initializes the ReaderBase object.

Args:
filename: Name of the file to read.
marker: The marker characters that indicate the yaml header.
"""
self._filename: Path | str = filename
self._marker: str = marker
self._nlines: int | None = None
self._comment: str | None = None
self._header: dict[str, Any] | None = None

def read(
self, csv_options: dict[str, Any], yaml_options: dict[str, Any]
) -> tuple[Any, dict[str, Any]]:
"""Reads the file and returns the data.

Args:
csv_options: Options to pass to the read_data method.
yaml_options: Options to pass to the read_header method.

Returns:
Tuple containing: The data and the header.
"""
header = self._header or self.read_header(**yaml_options)
data = self.read_data(**csv_options)
return data, header

def read_header(self, **kwargs) -> dict[str, Any]:
"""Reads the header from the file.

Args:
**kwargs: Arguments to pass to the read_header method.

Returns:
The header as a dictionary.
"""
self._header, self._nlines, self._comment = read_header(
self._filename, self._marker, **kwargs
)
return self._header

@abstractmethod
def read_data(self, **kwargs) -> Any:
"""Reads the data from the file."""


class ListReader(ReaderBase):
"""Reader class for reading CSVY files into a list of lists."""

def read_data(
self, in_columns: bool = False, fillvalue: str = "", **kwargs
) -> list[list]:
"""Reads the data from the file.

Args:
in_columns: Whether to read the data in columns.
fillvalue: Value to use for missing data when reading in columns.
**kwargs: Arguments to pass to the csv.reader function.

Returns:
The data as a list of lists.
"""
import csv

if self._nlines is None:
self.read_header()

data: list[list[str]] = []
with open(self._filename, newline="") as csvfile:
csvreader = csv.reader(csvfile, **kwargs)

for _ in range(self._nlines):
next(csvreader)

for row in csvreader:
data.append(row)

if in_columns:
data = list(map(list, zip_longest(*data, fillvalue=fillvalue)))

return data
10 changes: 10 additions & 0 deletions tests/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,13 @@ def test_read_to_list(array_data_path):
assert len(data[0]) == 4
assert isinstance(header, dict)
assert len(header) > 0


@pytest.mark.parametrize("in_columns,num", [(False, 15), (True, 4)])
def test_ListReader_read_data(in_columns, num, array_data_path):
"""Test the read_to_dict function."""
from csvy.readers import ListReader

data = ListReader(array_data_path).read_data(in_columns=in_columns, delimiter=",")
assert isinstance(data, list)
assert len(data) == num
Loading