Skip to content

Commit

Permalink
feat(diff): diff improved. Parameter --num-lines introduced when --on…
Browse files Browse the repository at this point in the history
…ly-deltas is set to True. Diff styles improved
  • Loading branch information
marcosschroh committed Jan 27, 2025
1 parent 25019b5 commit e5bba9e
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 87 deletions.
137 changes: 74 additions & 63 deletions dc_avro/_diff.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
import difflib
from typing import Sequence
from dataclasses import dataclass
from typing import Dict, NamedTuple, Optional, Sequence

from rich.style import Style
from rich.table import Table
from rich.text import Text

DELETE_COLOR = "red"
ADD_COLOR = "green"


@dataclass
class Content:
line_number: int
sequence: str

@property
def diff_format(self) -> str:
return f"{self.line_number} {self.sequence}".replace("\t", "").replace("\n", "")


class DiffLine(NamedTuple):
content_one: Content
content_two: Content
has_diff: bool


class TableDiff:
Expand All @@ -24,73 +46,49 @@ def __init__(
whole resource in the diff result. Default to False.
"""
self.table = Table(title=title, highlight=True)
self.table.add_column(source_name, style="cyan")
self.table.add_column(target_name, style="magenta")
self.content: list[str] = []
self.table.add_column(source_name)
self.table.add_column(target_name)
self.only_deltas = only_deltas
self.line_number = 1

def add_content(self, content: str) -> None:
def add_content(self, diff_line: DiffLine) -> None:
"""
If the line starts with a "-" character, it is added to the left column.
If the line starts with a "+" character, it is added to the right column.
If the line starts with a space character, it is added to both columns.
If the line starts with a "?" character, it is ignored.
Args:
content (str): new content to be added to the table
If diff_line has differences, add a new row to the table with
the differences styles, othewrise add a new row without sytles
"""
if content.startswith("?"):
# Ignore the line
self.line_number += 1
return
elif content.startswith(" "):
if self.only_deltas:
self.line_number += 1
return
content = f"{self.line_number} {content}"
self.add_row(source_colum=content, target_column=content)

if diff_line.has_diff:
self.add_row(
source_colum=self.build_text(
text=diff_line.content_one.diff_format,
style={"color": DELETE_COLOR},
),
target_column=self.build_text(
text=diff_line.content_two.diff_format,
style={"color": ADD_COLOR},
),
)
else:
# add the content to check later
self.content.append(content)

if len(self.content) == 2:
# here we are in the situation where we have a complete row
# the combination can be: ("-", "+"), ("-", "-") or ("+", "+")
content_one, content_two = self.content

if content_one.startswith("-") and content_two.startswith("+"):
self.add_row(
source_colum=f"{self.line_number} {content_one}",
target_column=f"{self.line_number} {content_two}",
)
elif content_one.startswith("-") and content_two.startswith("-"):
self.add_row(
source_colum=f"{self.line_number} {content_one}",
target_column=f"{self.line_number}",
)
self.add_row(
source_colum=f"{self.line_number} {content_two}",
target_column=f"{self.line_number}",
)
else:
self.add_row(
source_colum=f"{self.line_number}",
target_column=f"{self.line_number} {content_one}",
)
self.add_row(
source_colum=f"{self.line_number}",
target_column=f"{self.line_number} {content_two}",
)

self.content = []

def add_row(self, *, source_colum: str, target_column: str) -> None:
self.add_row(
source_colum=self.build_text(
text=diff_line.content_one.diff_format,
),
target_column=self.build_text(
text=diff_line.content_two.diff_format,
),
)

@staticmethod
def build_text(text: str, style: Optional[Dict[str, str]] = None) -> Text:
style = style or {}
return Text(text=text, style=Style(**style)) # type: ignore

def add_row(
self, *, source_colum: Text, target_column: Text, style: Optional[str] = None
) -> None:
"""
Add new row to table.
"""
self.table.add_row(source_colum, target_column)
self.line_number += 1
self.table.add_row(source_colum, target_column, style=style)


def diff_resources(
Expand All @@ -100,15 +98,28 @@ def diff_resources(
source_name: str,
target_name: str,
only_deltas: bool = False,
num_lines: int = 5,
) -> Table:
table_diff = TableDiff(
title="Schema Diff",
source_name=source_name,
target_name=target_name,
only_deltas=only_deltas,
)
diff = difflib.ndiff(source_resource, target_resource)

if only_deltas and num_lines >= 0:
context_lines = num_lines
else:
context_lines = None

diff = difflib._mdiff(source_resource, target_resource, context=context_lines) # type: ignore

for line in diff:
table_diff.add_content(line.replace("\n", "").replace("\t", ""))
if None not in line:
content_one, content_two, has_diff = line
diff_line = DiffLine(
content_one=Content(content_one[0], sequence=content_one[1]),
content_two=Content(content_two[0], sequence=content_two[1]),
has_diff=has_diff,
)
table_diff.add_content(diff_line=diff_line)
return table_diff.table
4 changes: 4 additions & 0 deletions dc_avro/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def schema_diff(
),
default=False,
),
num_lines: int = typer.Option(
5, help="Number of lines to show in the diff when context is set to True"
),
) -> None:
source_resource = get_raw_resource(
path=source_path,
Expand All @@ -119,6 +122,7 @@ def schema_diff(
target_resource=target_resource,
target_name=target_path or target_url,
only_deltas=only_deltas,
num_lines=num_lines,
)
)

Expand Down
10 changes: 5 additions & 5 deletions docs/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -622,15 +622,15 @@ If we run the `schema-diff` command we have the following result:
dc-avro schema-diff --source-path ./tests/schemas/example.avsc --target-path ./tests/schemas/example_v2.avsc
```

![type:video](statics/schema_diff.mp4)
![type:video](statics/schema_diff.mov)

By default the whole files are shown. You can provide the option `--only-deltas` to see only the lines that has changed:
By default the whole files are shown. You can provide the option `--only-deltas` to see only the lines that has changed. The command gives a `default` context of `5` lines. To provide more or less context you can use the parameter `--num-lines`

```bash
dc-avro schema-diff --source-path ./tests/schemas/example.avsc --target-path ./tests/schemas/example_v2.avsc --only-deltas
dc-avro schema-diff --source-path ./tests/schemas/example.avsc --target-path ./tests/schemas/example_v2.avsc --only-deltas --num-lines 3
```

![type:video](statics/schema_diff_deltas.mp4)
![type:video](statics/schema_diff_deltas.mov)

## Generate fake data from schema

Expand Down Expand Up @@ -664,4 +664,4 @@ Usage: dc-avro generate-data [OPTIONS] [RESOURCE]
│ --count INTEGER Number of data to generate, more than one prints a list [default: 1] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
```
Binary file added docs/statics/schema_diff.mov
Binary file not shown.
Binary file removed docs/statics/schema_diff.mp4
Binary file not shown.
Binary file added docs/statics/schema_diff_deltas.mov
Binary file not shown.
Binary file removed docs/statics/schema_diff_deltas.mp4
Binary file not shown.
3 changes: 2 additions & 1 deletion tests/schemas/example_v2.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
"name": "address",
"type": [
"null",
"string"
"string",
"int"
],
"default": null
}
Expand Down
9 changes: 7 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,12 @@ def test_generate_model_from_url(
assert expected_output == result.stdout


@pytest.mark.parametrize("only_deltas, total_output_len", ((True, 1134), (False, 6075)))
@pytest.mark.parametrize(
"only_deltas, num_lines, total_output_len",
((True, 5, 2535), (True, 10, 2940), (False, 0, 6018)),
)
def test_schema_diff_from_path(
only_deltas: bool, total_output_len: int, schema_dir: str
only_deltas: bool, num_lines: int, total_output_len: int, schema_dir: str
):
result = runner.invoke(
app,
Expand All @@ -115,6 +118,8 @@ def test_schema_diff_from_path(
"--target-path",
os.path.join(schema_dir, "example_v2.avsc"),
"--only-deltas" if only_deltas else "--no-only-deltas",
"--num-lines",
str(num_lines),
],
)
assert result.exit_code == 0
Expand Down
88 changes: 72 additions & 16 deletions tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from rich.table import Table

from dc_avro._diff import diff_resources
from dc_avro._diff import ADD_COLOR, DELETE_COLOR, TableDiff, diff_resources


@pytest.mark.parametrize(
Expand All @@ -19,50 +19,106 @@
["First line", "Second line"],
[],
(
["1 - First line", "1"],
["2 - Second line", "2"],
[
TableDiff.build_text(
text="1 \x00-First line\x01", style={"color": DELETE_COLOR}
),
TableDiff.build_text(text=" ", style={"color": ADD_COLOR}),
],
[
TableDiff.build_text(
text="2 \x00-Second line\x01", style={"color": DELETE_COLOR}
),
TableDiff.build_text(text=" ", style={"color": ADD_COLOR}),
],
), # only source resource
),
(
[],
["First line", "Second line"],
(
["1", "1 + First line"],
["2", "2 + Second line"],
[
TableDiff.build_text(text=" ", style={"color": DELETE_COLOR}),
TableDiff.build_text(
text="1 \x00+First line\x01", style={"color": ADD_COLOR}
),
],
[
TableDiff.build_text(text=" ", style={"color": DELETE_COLOR}),
TableDiff.build_text(
text="2 \x00+Second line\x01", style={"color": ADD_COLOR}
),
],
), # only target resource
),
(
["First line", "Second line"],
["First line", "Second line"],
(
["1 First line", "1 First line"],
["2 Second line", "2 Second line"],
[
TableDiff.build_text(text="1 First line"),
TableDiff.build_text(text="1 First line"),
], # noqa: E501
[
TableDiff.build_text(text="2 Second line"),
TableDiff.build_text(text="2 Second line"),
],
), # same resources
),
(
["First line", "Second line"],
["First line", ""],
(
["1 First line", "1 First line"],
["2 - Second line", "2 + "],
[
TableDiff.build_text(text="1 First line"),
TableDiff.build_text(text="1 First line"),
],
[
TableDiff.build_text(
text="2 \x00-Second line\x01", style={"color": DELETE_COLOR}
),
TableDiff.build_text(
text="2 \x00+ \x01", style={"color": ADD_COLOR}
),
],
), # different resources
),
(
["First line", ""],
["First line", "Second line"],
(
["1 First line", "1 First line"],
["2 - ", "2 + Second line"],
[
TableDiff.build_text(text="1 First line"),
TableDiff.build_text(text="1 First line"),
],
[
TableDiff.build_text(text="2 \x00- \x01"),
TableDiff.build_text(
text="2 \x00+Second line\x01", style={"color": ADD_COLOR}
),
],
), # different resources
),
(
["First line", "Second line"],
["Not related", "No clue line"],
(
["1 - First line", "1"],
["2 - Second line", "2"],
["3", "3 + Not related"],
["4", "4 + No clue line"],
[
TableDiff.build_text(
text="1 \x00-First line\x01", style={"color": DELETE_COLOR}
),
TableDiff.build_text(
text="1 \x00+Not related\x01", style={"color": ADD_COLOR}
),
],
[
TableDiff.build_text(
text="2 \x00-Second line\x01", style={"color": DELETE_COLOR}
),
TableDiff.build_text(
text="2 \x00+No clue line\x01", style={"color": ADD_COLOR}
),
],
), # completely different resources
),
),
Expand Down Expand Up @@ -93,7 +149,7 @@ def test_diff(
assert result.columns == table.columns


@pytest.mark.parametrize("only_deltas, total_rows", ((True, 9), (False, 68)))
@pytest.mark.parametrize("only_deltas, total_rows", ((True, 26), (False, 69)))
def test_diff_with_and_without_deltas(
only_deltas: bool, total_rows: int, schema_dir: str
) -> None:
Expand Down

0 comments on commit e5bba9e

Please sign in to comment.