Skip to content

Commit

Permalink
Github Child Team Membership (#1397)
Browse files Browse the repository at this point in the history
### NOTE: all the below still holds by the relationships between the
nodes has been changed from 'CHILD_TEAM' to 'MEMBER_OF_TEAM'

### Summary

This PR adds to the Github graph, adding child team members of teams.

This is very similar to, and can be considered a follow-up to the prior
PR #1395, which added 'immediate' user members of teams. Now this PR is
adding child-teams. Between these two, the graph can now answer the
question of who is a member of a team, either directly or via child
teams.

We think this is a valuable addition to the graph because our broad
intent is to understand all access a user has. Since access is
frequently granted to teams, we need the complete picture of team
membership.

#### Illustration of the intention
![Cartography AMPS Team Child Team
Membership](https://github.com/user-attachments/assets/9620f50b-310a-43d1-a15b-28fe8480bbc4)

#### Screencaps

**EXAMPLE ALL TEAMS LOOKUP**

BEFORE
(nothing is returned because the relationship does not exist)
![Screenshot 2024-12-06 at 11 23
54 AM](https://github.com/user-attachments/assets/cd2310cd-d2ff-4faf-876a-00e798cfe1e9)

AFTER
(details not visible here but hopefully this related a sense of the
relationships)
![Screenshot 2024-12-06 at 11 53
11 AM](https://github.com/user-attachments/assets/5dfdd96f-526c-4299-9c1e-464f84d6e9d9)

**EXAMPLE SINGLE TEAM LOOKUP**

BEFORE
(nothing is returned because the relationship does not exist)
![Screenshot 2024-12-06 at 12 44
55 PM](https://github.com/user-attachments/assets/5dc0419b-ac4d-4038-9005-e2b41703563a)

AFTER
![Screenshot 2024-12-06 at 12 44
32 PM](https://github.com/user-attachments/assets/926776e2-c497-4c10-a80a-05cf54de532b)

#### Note on loops / cyclic graphs

GitHub does not seem to allow the creation of loops, eg `Team A <-- Team
B <-- Team A`, so this PR has no handling or guarding against that sort
of thing.



### Related issues or links

None.


### Checklist

Provide proof that this works (this makes reviews move faster). Please
perform one or more of the following:
- [x] Update/add unit or integration tests.
- [x] Include a screenshot showing what the graph looked like before and
after your changes.
- [ ] Include console log trace showing what happened before and after
your changes.

If you are changing a node or relationship:
- [x] Update the
[schema](https://github.com/lyft/cartography/tree/master/docs/root/modules)
and
[readme](https://github.com/lyft/cartography/blob/master/docs/schema/README.md).

**Not applicable** If you are implementing a new intel module:
- [ ] Use the NodeSchema [data
model](https://cartography-cncf.github.io/cartography/dev/writing-intel-modules.html#defining-a-node).

---------

Signed-off-by: Daniel Brauer <[email protected]>
  • Loading branch information
danbrauer authored Dec 20, 2024
1 parent e257b9f commit 4fa4b82
Show file tree
Hide file tree
Showing 6 changed files with 405 additions and 32 deletions.
132 changes: 115 additions & 17 deletions cartography/intel/github/teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission'])
# A team member's role: https://docs.github.com/en/graphql/reference/enums#teammemberrole
UserRole = namedtuple('UserRole', ['user_url', 'role'])
# Unlike the other tuples here, there is no qualification (like 'role' or 'permission') to the relationship.
# A child team is just a child team: https://docs.github.com/en/graphql/reference/objects#teamconnection
ChildTeam = namedtuple('ChildTeam', ['team_url'])


def backoff_handler(details: Dict) -> None:
Expand Down Expand Up @@ -53,6 +56,9 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
members(membership: IMMEDIATE) {
totalCount
}
childTeams {
totalCount
}
}
pageInfo{
endCursor
Expand Down Expand Up @@ -166,6 +172,21 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG
return team_repos


def _get_teams_users_inner_func(
org: str, api_url: str, token: str, team_name: str,
user_urls: List[str], user_roles: List[str],
) -> None:
logger.info(f"Loading team users for {team_name}.")
team_users = _get_team_users(org, api_url, token, team_name)
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
for user in team_users.nodes or []:
user_urls.append(user['url'])
# The `or []` is because `.edges` can be None.
for edge in team_users.edges or []:
user_roles.append(edge['role'])


def _get_team_users_for_multiple_teams(
team_raw_data: list[dict[str, Any]],
org: str,
Expand All @@ -185,21 +206,7 @@ def _get_team_users_for_multiple_teams(
user_urls: List[str] = []
user_roles: List[str] = []

def get_teams_users_inner_func(
org: str, api_url: str, token: str, team_name: str,
user_urls: List[str], user_roles: List[str],
) -> None:
logger.info(f"Loading team users for {team_name}.")
team_users = _get_team_users(org, api_url, token, team_name)
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
for user in team_users.nodes or []:
user_urls.append(user['url'])
# The `or []` is because `.edges` can be None.
for edge in team_users.edges or []:
user_roles.append(edge['role'])

retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)(
retries_with_backoff(_get_teams_users_inner_func, TypeError, 5, backoff_handler)(
org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles,
)

Expand Down Expand Up @@ -252,11 +259,90 @@ def _get_team_users(org: str, api_url: str, token: str, team: str) -> PaginatedG
return team_users


def _get_child_teams_inner_func(
org: str, api_url: str, token: str, team_name: str, team_urls: List[str],
) -> None:
logger.info(f"Loading child teams for {team_name}.")
child_teams = _get_child_teams(org, api_url, token, team_name)
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
for cteam in child_teams.nodes or []:
team_urls.append(cteam['url'])
# No edges to process here, the GitHub response for child teams has no relevant info in edges.


def _get_child_teams_for_multiple_teams(
team_raw_data: list[dict[str, Any]],
org: str,
api_url: str,
token: str,
) -> dict[str, list[ChildTeam]]:
result: dict[str, list[ChildTeam]] = {}
for team in team_raw_data:
team_name = team['slug']
team_count = team['childTeams']['totalCount']

if team_count == 0:
# This team has no child teams so let's move on
result[team_name] = []
continue

team_urls: List[str] = []

retries_with_backoff(_get_child_teams_inner_func, TypeError, 5, backoff_handler)(
org=org, api_url=api_url, token=token, team_name=team_name, team_urls=team_urls,
)

result[team_name] = [ChildTeam(url) for url in team_urls]
return result


def _get_child_teams(org: str, api_url: str, token: str, team: str) -> PaginatedGraphqlData:
team_users_gql = """
query($login: String!, $team: String!, $cursor: String) {
organization(login: $login) {
url
login
team(slug: $team) {
slug
childTeams(first: 100, after: $cursor) {
totalCount
nodes {
url
}
pageInfo {
endCursor
hasNextPage
}
}
}
}
rateLimit {
limit
cost
remaining
resetAt
}
}
"""
team_users, _ = fetch_all(
token,
api_url,
org,
team_users_gql,
'team',
resource_inner_type='childTeams',
team=team,
)
return team_users


def transform_teams(
team_paginated_data: PaginatedGraphqlData,
org_data: Dict[str, Any],
team_repo_data: dict[str, list[RepoPermission]],
team_user_data: dict[str, list[UserRole]],
team_child_team_data: dict[str, list[ChildTeam]],
) -> list[dict[str, Any]]:
result = []
for team in team_paginated_data.nodes:
Expand All @@ -267,13 +353,15 @@ def transform_teams(
'description': team['description'],
'repo_count': team['repositories']['totalCount'],
'member_count': team['members']['totalCount'],
'child_team_count': team['childTeams']['totalCount'],
'org_url': org_data['url'],
'org_login': org_data['login'],
}
repo_permissions = team_repo_data[team_name]
user_roles = team_user_data[team_name]
child_teams = team_child_team_data[team_name]

if not repo_permissions and not user_roles:
if not repo_permissions and not user_roles and not child_teams:
result.append(repo_info)
continue

Expand All @@ -289,6 +377,15 @@ def transform_teams(
repo_info_copy = repo_info.copy()
repo_info_copy[role] = user_url
result.append(repo_info_copy)
if child_teams:
for (team_url,) in child_teams:
repo_info_copy = repo_info.copy()
# GitHub speaks of team-team relationships as 'child teams', as in the graphql query
# or here: https://docs.github.com/en/graphql/reference/enums#teammembershiptype
# We label the relationship as 'MEMBER_OF_TEAM' here because it is in line with
# other similar relationships in Cartography.
repo_info_copy['MEMBER_OF_TEAM'] = team_url
result.append(repo_info_copy)
return result


Expand Down Expand Up @@ -325,7 +422,8 @@ def sync_github_teams(
teams_paginated, org_data = get_teams(organization, github_url, github_api_key)
team_repos = _get_team_repos_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
team_users = _get_team_users_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users)
team_children = _get_child_teams_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users, team_children)
load_team_repos(neo4j_session, processed_data, common_job_parameters['UPDATE_TAG'], org_data['url'])
common_job_parameters['org_url'] = org_data['url']
cleanup(neo4j_session, common_job_parameters)
17 changes: 17 additions & 0 deletions cartography/models/github/teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ class GitHubTeamToOrganizationRel(CartographyRelSchema):
properties: GitHubTeamToOrganizationRelProperties = GitHubTeamToOrganizationRelProperties()


@dataclass(frozen=True)
class GitHubTeamToChildTeamRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class GitHubTeamChildTeamRel(CartographyRelSchema):
target_node_label: str = 'GitHubTeam'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('MEMBER_OF_TEAM')},
)
direction: LinkDirection = LinkDirection.INWARD
rel_label: str = "MEMBER_OF_TEAM"
properties: GitHubTeamToChildTeamRelProperties = GitHubTeamToChildTeamRelProperties()


@dataclass(frozen=True)
class GitHubTeamSchema(CartographyNodeSchema):
label: str = 'GitHubTeam'
Expand All @@ -136,6 +152,7 @@ class GitHubTeamSchema(CartographyNodeSchema):
GitHubTeamWriteRepoRel(),
GitHubTeamMaintainerUserRel(),
GitHubTeamMemberUserRel(),
GitHubTeamChildTeamRel(),
],
)
sub_resource_relationship: GitHubTeamToOrganizationRel = GitHubTeamToOrganizationRel()
13 changes: 13 additions & 0 deletions docs/root/modules/github/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,19 @@ A GitHubTeam [organization object](https://docs.github.com/en/graphql/reference/
(GitHubOrganization)-[RESOURCE]->(GitHubTeam)
```
- GitHubTeams may be children of other teams:
```
(GitHubTeam)-[MEMBER_OF_TEAM]->(GitHubTeam)
```
- GitHubUsers may be ['immediate'](https://docs.github.com/en/graphql/reference/enums#teammembershiptype) members of a team (as opposed to being members via membership in a child team), with their membership [role](https://docs.github.com/en/graphql/reference/enums#teammemberrole) being MEMBER or MAINTAINER.
```
(GitHubUser)-[MEMBER|MAINTAINER]->(GitHubTeam)
```
### GitHubUser
Representation of a single GitHubUser [user object](https://developer.github.com/v4/object/user/). This node contains minimal data for the GitHub User.
Expand Down Expand Up @@ -205,6 +212,12 @@ WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphq
(GitHubUser)-[UNAFFILIATED]->(GitHubOrganization)
```
- GitHubTeams may be children of other teams:
```
(GitHubTeam)-[MEMBER_OF_TEAM]->(GitHubTeam)
```
- GitHubUsers may be ['immediate'](https://docs.github.com/en/graphql/reference/enums#teammembershiptype) members of a team (as opposed to being members via membership in a child team), with their membership [role](https://docs.github.com/en/graphql/reference/enums#teammemberrole) being MEMBER or MAINTAINER.
```
Expand Down
13 changes: 13 additions & 0 deletions tests/data/github/teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,39 @@
'description': None,
'repositories': {'totalCount': 0},
'members': {'totalCount': 0},
'childTeams': {'totalCount': 0},
},
{
'slug': 'team-b',
'url': 'https://github.com/orgs/example_org/teams/team-b',
'description': None,
'repositories': {'totalCount': 3},
'members': {'totalCount': 0},
'childTeams': {'totalCount': 0},
},
{
'slug': 'team-c',
'url': 'https://github.com/orgs/example_org/teams/team-c',
'description': None,
'repositories': {'totalCount': 0},
'members': {'totalCount': 3},
'childTeams': {'totalCount': 0},
},
{
'slug': 'team-d',
'url': 'https://github.com/orgs/example_org/teams/team-d',
'description': 'Team D',
'repositories': {'totalCount': 0},
'members': {'totalCount': 0},
'childTeams': {'totalCount': 2},
},
{
'slug': 'team-e',
'url': 'https://github.com/orgs/example_org/teams/team-e',
'description': 'some description here',
'repositories': {'totalCount': 0},
'members': {'totalCount': 0},
'childTeams': {'totalCount': 0},
},
],
edges=[],
Expand Down Expand Up @@ -71,3 +76,11 @@
{'role': 'MAINTAINER'},
],
)

GH_TEAM_CHILD_TEAM = PaginatedGraphqlData(
nodes=[
{'url': 'https://github.com/orgs/example_org/teams/team-a'},
{'url': 'https://github.com/orgs/example_org/teams/team-b'},
],
edges=[],
)
14 changes: 13 additions & 1 deletion tests/integration/cartography/intel/github/test_teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import cartography.intel.github.teams
from cartography.intel.github.teams import sync_github_teams
from tests.data.github.teams import GH_TEAM_CHILD_TEAM
from tests.data.github.teams import GH_TEAM_DATA
from tests.data.github.teams import GH_TEAM_REPOS
from tests.data.github.teams import GH_TEAM_USERS
Expand All @@ -16,10 +17,11 @@
FAKE_API_KEY = 'asdf'


@patch.object(cartography.intel.github.teams, '_get_child_teams', return_value=GH_TEAM_CHILD_TEAM)
@patch.object(cartography.intel.github.teams, '_get_team_users', return_value=GH_TEAM_USERS)
@patch.object(cartography.intel.github.teams, '_get_team_repos', return_value=GH_TEAM_REPOS)
@patch.object(cartography.intel.github.teams, 'get_teams', return_value=GH_TEAM_DATA)
def test_sync_github_teams(mock_teams, mock_team_repos, mock_team_users, neo4j_session):
def test_sync_github_teams(mock_teams, mock_team_repos, mock_team_users, mock_child_teams, neo4j_session):
# Arrange
test_repos._ensure_local_neo4j_has_test_data(neo4j_session)
test_users._ensure_local_neo4j_has_test_data(neo4j_session)
Expand Down Expand Up @@ -139,3 +141,13 @@ def test_sync_github_teams(mock_teams, mock_team_repos, mock_team_users, neo4j_s
('https://github.com/orgs/example_org/teams/team-c', 'https://example.com/lmsimpson'),
('https://github.com/orgs/example_org/teams/team-c', 'https://example.com/mbsimpson'),
}
assert check_rels(
neo4j_session,
'GitHubTeam', 'id',
'GitHubTeam', 'id',
'MEMBER_OF_TEAM',
rel_direction_right=False,
) == {
('https://github.com/orgs/example_org/teams/team-d', 'https://github.com/orgs/example_org/teams/team-a'),
('https://github.com/orgs/example_org/teams/team-d', 'https://github.com/orgs/example_org/teams/team-b'),
}
Loading

0 comments on commit 4fa4b82

Please sign in to comment.