Skip to content

Commit 36ca8a8

Browse files
Merge pull request #28 from jitsecurity/bigquery
Bigquery
2 parents df69853 + 671eff3 commit 36ca8a8

9 files changed

Lines changed: 201 additions & 107 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ venv*/*
1919
.ipynb_checkpoints
2020
.DS_*
2121
node_modules/
22+
teams*

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
pydantic==2.1.1
22
PyGithub==1.59.1
33
python-dotenv==1.0.0
4+
pytz==2024.1
5+
google-cloud-bigquery==3.19.0
46
requests==2.31.0
57
loguru==0.7.0

src/scripts/sync_teams/sync_teams.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,31 @@
2121
logger.add(sys.stderr, format=logger_format)
2222

2323

24-
def parse_input_file() -> Organization:
24+
def parse_input_file() -> Tuple[Organization, bool, bool]:
2525
"""
26-
Parse the input JSON file and return an Organization object.
26+
Parse the input JSON file and return an Organization object, the skip_no_resources flag,
27+
and the verify_github_membership flag.
2728
2829
Returns:
29-
Organization: The parsed organization object.
30+
Tuple[Organization, bool, bool]: The parsed organization object, the skip_no_resources flag,
31+
and the verify_github_membership flag.
3032
"""
31-
# Create the argument parser
3233
parser = argparse.ArgumentParser(description="Retrieve teams and assets")
33-
34-
# Add the file argument
3534
parser.add_argument("file", help="Path to a JSON file")
3635

37-
# Parse the command line arguments
36+
# Default behavior is True, override with --no-skip-no-resources and --no-verify-github-membership
37+
parser.add_argument("--skip-no-resources", dest='skip_no_resources', action="store_true",
38+
help="Skip teams with no active resources", default=True)
39+
parser.add_argument("--no-skip-no-resources", dest='skip_no_resources', action="store_false",
40+
help="Do not skip teams with no active resources")
41+
42+
parser.add_argument("--verify-github-membership", dest='verify_github_membership', action="store_true",
43+
help="Verify GitHub membership when setting team members", default=True)
44+
parser.add_argument("--no-verify-github-membership", dest='verify_github_membership', action="store_false",
45+
help="Do not verify GitHub membership when setting team members")
46+
3847
args = parser.parse_args()
3948

40-
# Check if the file exists and is a JSON file
4149
if not os.path.isfile(args.file):
4250
logger.error("Error: File does not exist.")
4351
sys.exit(1)
@@ -46,14 +54,13 @@ def parse_input_file() -> Organization:
4654
sys.exit(1)
4755
logger.info(f"Reading file: {args.file}")
4856

49-
# Read the JSON file
5057
with open(args.file, "r") as file:
5158
json_data = file.read()
52-
53-
# Parse the JSON data
59+
logger.info(f"JSON data: {json_data}")
5460
try:
5561
data = json.loads(json_data)
56-
return Organization(teams=[TeamStructure(**team) for team in data["teams"]])
62+
return Organization(teams=[TeamStructure(**team) for team in data["teams"]]), \
63+
args.skip_no_resources, args.verify_github_membership
5764
except (ValidationError, KeyError) as e:
5865
logger.error(f"Failed to validate input file: {e}")
5966
sys.exit(1)
@@ -125,14 +132,15 @@ def get_teams_to_delete(topic_names: List[str], existing_team_names: List[str])
125132
return get_different_items_in_lists(existing_team_names, topic_names)
126133

127134

128-
def get_desired_teams(assets: List[Asset], organization: Organization) -> List[str]:
135+
def get_desired_teams(assets: List[Asset], organization: Organization, skip_no_resources: bool) -> List[str]:
129136
"""
130137
Get the desired teams based on the assets and organization.
131138
Also filter out teams that match the TEAM_WILDCARD_TO_EXCLUDE environment variable.
132139
133140
Args:
134141
assets (List[Asset]): The list of assets.
135142
organization (Organization): The organization object.
143+
skip_no_resources (bool): Whether to skip teams with no active resources.
136144
137145
Returns:
138146
List[str]: The names of the desired teams.
@@ -143,7 +151,7 @@ def get_desired_teams(assets: List[Asset], organization: Organization) -> List[s
143151
for resource in team.resources:
144152
if resource.type == ResourceType.GithubRepo and resource.name in [asset.asset_name for asset in assets]:
145153
team_resources.append(resource.name)
146-
if team_resources:
154+
if team_resources or not skip_no_resources:
147155
desired_teams.append(team.name)
148156
else:
149157
logger.info(
@@ -165,7 +173,8 @@ def get_desired_teams(assets: List[Asset], organization: Organization) -> List[s
165173

166174

167175
def process_teams(token, organization, assets: List[Asset],
168-
existing_teams: List[TeamAttributes]) -> Tuple[List[str], List[TeamAttributes]]:
176+
existing_teams: List[TeamAttributes],
177+
skip_no_resources: bool) -> Tuple[List[str], List[TeamAttributes]]:
169178
"""
170179
Process the teams in the organization and create or delete teams as necessary.
171180
We will delete the teams at a later stage to avoid possible synchronization issues.
@@ -174,13 +183,13 @@ def process_teams(token, organization, assets: List[Asset],
174183
token (str): The JWT token.
175184
organization (Organization): The organization object.
176185
existing_teams (List[TeamAttributes]): The existing teams.
177-
186+
skip_no_resources (bool): Whether to skip teams with no active resources.
178187
Returns:
179188
Tuple[List[str], List[TeamAttributes]]: The names of the teams to delete and the created teams.
180189
"""
181190
logger.info("Determining required changes in teams.")
182191

183-
desired_teams = get_desired_teams(assets, organization)
192+
desired_teams = get_desired_teams(assets, organization, skip_no_resources)
184193
existing_team_names = [team.name for team in existing_teams]
185194
teams_to_create = get_teams_to_create(desired_teams, existing_team_names)
186195
teams_to_delete = get_teams_to_delete(desired_teams, existing_team_names)
@@ -193,7 +202,7 @@ def process_teams(token, organization, assets: List[Asset],
193202

194203

195204
def process_members(token: str, organization: Organization, existing_teams: List[TeamAttributes],
196-
desired_teams: List[str]) -> None:
205+
desired_teams: List[str], verify_github_membership: bool) -> None:
197206
logger.info("Processing team members.")
198207
for team_structure in organization.teams:
199208
try:
@@ -210,7 +219,7 @@ def process_members(token: str, organization: Organization, existing_teams: List
210219
f"Only the first {MAX_MEMBERS_PER_TEAM} members will be set.")
211220
team_members = team_members[:MAX_MEMBERS_PER_TEAM]
212221
set_manual_team_members(
213-
token, team_id, team_members, team_name)
222+
token, team_id, team_members, team_name, verify_github_membership)
214223
else:
215224
logger.warning(
216225
f"Team '{team_name}' not found in existing teams. Skipping member processing.")
@@ -248,19 +257,24 @@ def main():
248257
logger.error("Failed to retrieve JWT token. Exiting...")
249258
return
250259

251-
organization: Organization = parse_input_file()
260+
organization, skip_no_resources, verify_github_membership = parse_input_file()
261+
logger.info(
262+
f"Running with {skip_no_resources=}, {verify_github_membership=}")
252263
if not organization:
253264
logger.error("Failed to parse input file. Exiting...")
254265
return
255266

256267
assets: List[Asset] = list_assets(jit_token)
257268

258269
existing_teams = get_existing_teams(jit_token)
270+
logger.info(
271+
f"Found {len(existing_teams)} existing team(s) in the organization: {[team.name for team in existing_teams]}")
259272
teams_to_delete, created_teams = process_teams(
260-
jit_token, organization, assets, existing_teams)
273+
jit_token, organization, assets, existing_teams, skip_no_resources)
261274
existing_teams: List[TeamAttributes] = existing_teams + created_teams
262-
desired_teams = get_desired_teams(assets, organization)
263-
process_members(jit_token, organization, existing_teams, desired_teams)
275+
desired_teams = get_desired_teams(assets, organization, skip_no_resources)
276+
process_members(jit_token, organization, existing_teams,
277+
desired_teams, verify_github_membership)
264278
update_assets(jit_token, assets, organization, existing_teams)
265279

266280
if teams_to_delete:

src/shared/clients/google.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import os
2+
3+
from google.cloud import bigquery
4+
from google.oauth2 import service_account
5+
from loguru import logger
6+
7+
from src.shared.models import TeamStructure, Resource, Organization, ResourceType
8+
9+
bigquery_view_name = os.getenv('BIGQUERY_VIEW_NAME')
10+
credentials_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
11+
12+
13+
def get_teams_from_bigquery_view() -> Organization:
14+
try:
15+
num_repos = 0
16+
logger.info(
17+
f"Retrieving teams from BigQuery View: {os.getenv('BIGQUERY_VIEW_NAME')}")
18+
19+
# Explicitly use service account credentials by specifying the private key file.
20+
credentials = service_account.Credentials.from_service_account_file(
21+
credentials_path)
22+
client = bigquery.Client(credentials=credentials)
23+
24+
teams = {}
25+
fields = [
26+
"ownership_team_name",
27+
"repos",
28+
"managers",
29+
"manager_usernames",
30+
"members",
31+
"member_usernames",
32+
"slack_alerting_channel"
33+
]
34+
35+
# Perform a query
36+
query = "SELECT {fields} FROM `{view}` WHERE NOT CONTAINS_SUBSTR(ownership_team_name, 'Kaluza')".format(
37+
fields=", ".join(fields), view=bigquery_view_name)
38+
query_job = client.query(query) # API request
39+
rows = query_job.result() # Waits for query to finish
40+
41+
for row in rows:
42+
resources = [Resource(type=ResourceType.GithubRepo, name=repo)
43+
for repo in row.repos]
44+
num_repos += len(resources)
45+
members = list(dict.fromkeys(
46+
row.managers + row.members)) # Remove duplicates & keep the same member order
47+
teams[row.ownership_team_name] = TeamStructure(
48+
name=row.ownership_team_name, members=members, resources=resources,
49+
slack_channel=row.slack_alerting_channel)
50+
logger.info(
51+
f"Retrieved({len(teams.keys())}) teams {list(teams.keys())} with {num_repos} repos"
52+
"from Google BigQuery successfully.")
53+
return Organization(teams=list(teams.values()))
54+
except Exception as e:
55+
logger.error(f"Failed to retrieve teams from GitHub: {str(e)}")
56+
return Organization(teams=[])

src/shared/clients/jit.py

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -53,42 +53,38 @@ def list_assets(token: str) -> List[Asset]:
5353

5454

5555
def get_existing_teams(token: str) -> List[TeamAttributes]:
56-
def _handle_resoponse(response, existing_teams):
57-
response = response.json()
58-
data = response['data']
59-
existing_teams.extend(data)
60-
after = response['metadata']['after']
61-
return after
56+
headers = {
57+
'authorization': f'Bearer {token}',
58+
}
6259

63-
try:
64-
# Make a GET request to the asset API
65-
url = f"{get_jit_endpoint_base_url()}/teams?limit=100"
60+
params = {
61+
'limit': '50',
62+
'sort_by': 'score',
63+
'sort_order': 'desc',
64+
'include_members': 'true',
65+
}
6666

67-
headers = get_request_headers(token)
68-
response = requests.get(url, headers=headers)
69-
existing_teams = []
70-
# Check if the request was successful
67+
all_teams = []
68+
logger.info("Retrieving teams from with pagination.")
69+
while True:
70+
response = requests.get(
71+
f'{get_jit_endpoint_base_url()}/teams', params=params, headers=headers)
7172
if response.status_code == 200:
72-
after = _handle_resoponse(response, existing_teams)
73-
while after:
74-
response = requests.get(
75-
f"{url}&after={after}", headers=headers)
76-
if response.status_code == 200:
77-
after = _handle_resoponse(response, existing_teams)
78-
else:
79-
logger.error(
80-
f"Failed to retrieve teams. Status code: {response.status_code}, {response.text}")
81-
return []
73+
response_data = response.json()
74+
teams = response_data.get('data', [])
75+
all_teams.extend(teams)
76+
after = response_data.get('metadata', {}).get('after')
77+
logger.info(f"Retrieved {len(teams)} teams in page.")
78+
if not after:
79+
break
8280

83-
logger.info("Retrieved existing teams successfully.")
84-
return [TeamAttributes(**team) for team in existing_teams]
81+
params['after'] = after
8582
else:
8683
logger.error(
8784
f"Failed to retrieve teams. Status code: {response.status_code}, {response.text}")
88-
return []
89-
except Exception as e:
90-
logger.error(f"Failed to retrieve teams: {str(e)}")
91-
return []
85+
break
86+
87+
return [TeamAttributes(**team) for team in all_teams]
9288

9389

9490
def delete_teams(token, team_names):
@@ -170,12 +166,14 @@ def add_teams_to_asset(token, asset: Asset, teams: List[str]):
170166

171167

172168
def _perform_set_manual_team_members(token: str, team_id: str,
173-
members: List[str], team_name: str) -> Optional[List[str]]:
169+
members: List[str], team_name: str,
170+
verify_github_membership: bool) -> Optional[List[str]]:
174171
try:
175172
url = f"{get_jit_endpoint_base_url()}/teams/{team_id}/members"
176173
headers = get_request_headers(token)
177174
payload = {
178-
"members": members
175+
"members": members,
176+
"verify_github_membership": verify_github_membership
179177
}
180178
response = requests.put(url, json=payload, headers=headers)
181179
if response.status_code == 200:
@@ -197,13 +195,14 @@ def _perform_set_manual_team_members(token: str, team_id: str,
197195
return None
198196

199197

200-
def set_manual_team_members(token: str, team_id: str, members: List[str], team_name: str) -> None:
198+
def set_manual_team_members(token: str, team_id: str, members: List[str],
199+
team_name: str, verify_github_membership: bool) -> None:
201200
retry_count = 0
202201
failed_members = _perform_set_manual_team_members(
203-
token, team_id, members, team_name)
202+
token, team_id, members, team_name, verify_github_membership)
204203
while retry_count <= MAX_RETRIES and failed_members:
205204
failed_members = _perform_set_manual_team_members(
206-
token, team_id, members, team_name)
205+
token, team_id, members, team_name, verify_github_membership)
207206
# We send all members, not just the failed ones. Otherwise it would set the list
208207
# to only the failed members
209208
retry_count += 1

src/shared/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class TeamStructure(BaseModel):
3131
name: str
3232
members: List[str] = []
3333
resources: List[Resource] = []
34+
slack_channel: Optional[str] = None
3435

3536

3637
class Tag(BaseModel):
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import sys
2+
3+
from dotenv import load_dotenv
4+
from loguru import logger
5+
6+
from src.shared.clients.google import get_teams_from_bigquery_view
7+
8+
# Load environment variables from .env file.
9+
load_dotenv()
10+
logger.remove() # Remove default handler
11+
logger_format = "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <2}</level> | {message}"
12+
logger.add(sys.stderr, format=logger_format)
13+
14+
15+
def generate_teams_json_file_from_bigquery():
16+
teams = get_teams_from_bigquery_view()
17+
with open("teams.json", "w") as file:
18+
file.write(teams.model_dump_json(indent=2))
19+
20+
21+
if __name__ == '__main__':
22+
generate_teams_json_file_from_bigquery()

0 commit comments

Comments
 (0)