Skip to content

Commit 3541300

Browse files
committed
updated regex for raw and api style urls
1 parent e19b243 commit 3541300

4 files changed

Lines changed: 78 additions & 19 deletions

File tree

package/cloudshell/iac/terraform/downloaders/gitlab_downloader.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,16 @@ class GitLabApiUrlData(CommonGitLabUrlData):
2626
api_version: str
2727
project_id: int
2828
api_endpoint: str
29+
sha: str
2930

3031

3132
def extract_data_from_raw_url(url) -> GitLabRawUrlData:
3233
"""
3334
Take api style url and extract data
34-
Sample Raw Browser url: "http://192.168.85.26/quali_natti/terraformstuff/-/tree/test-branch/rds"
35+
Sample Raw Browser url: "http://192.168.85.26/quali_natti/terraformstuff/-/tree/test-branch/rds/project1"
3536
"""
36-
37-
pattern = (r'^(?P<protocol>https?)://(?P<domain>[^/]+)/(?P<user>[^/]+)/(?P<project>[^/]+)/'
38-
r'(-/tree/(?P<branch>[^/]+))?(?P<path>/.*)?$')
37+
pattern = (r'^(?P<protocol>https?)://(?P<domain>[^/]+)/(?P<user>[^/]+)/(?P<project>[^/]+)/-/tree/'
38+
r'(?P<branch>[^/]+)/(?P<path>.*)?$')
3939

4040
match = re.match(pattern, url)
4141
if not match:
@@ -54,23 +54,47 @@ def extract_data_from_raw_url(url) -> GitLabRawUrlData:
5454
def extract_data_from_api_url(url) -> GitLabApiUrlData:
5555
"""
5656
Take user style url and extract data
57+
supports url-encoded style paths as well
5758
Sample Api url: "http://192.168.85.26/api/v4/projects/2/repository/archive.zip?path=rds"
5859
"""
59-
6060
pattern = (r'^(?P<protocol>https?)://(?P<domain>[^/]+)(?P<api_version>/api/v\d+)?'
61-
r'(?P<api_endpoint>/projects/(?P<project_id>\d+)/repository/archive\.zip)(\?path=(?P<path>[^&]+))?$')
61+
r'(?P<api_endpoint>/projects/(?P<project_id>\d+)/repository/archive\.zip)'
62+
r'(?P<params>\?([^&]+=[^&]+&)*[^&]+=[^&]+$)')
6263

6364
match = re.match(pattern, url)
6465
if not match:
6566
raise ValueError(f"No GitLab url data found in API url '{url}'")
6667

6768
groups = match.groupdict()
69+
query_params = groups['params']
70+
71+
# remove the leading '?'
72+
query_params = query_params.split("?")[-1]
73+
74+
# split into 2D list [[k1,v1],[k2,v2]]
75+
params_list = [x.split("=") for x in query_params.split("&")]
76+
77+
# search for target params
78+
path_param_search = [x for x in params_list if x[0] == "path"]
79+
sha_param_search = [x for x in params_list if x[0] == "sha"]
80+
ref_param_search = [x for x in params_list if x[0] == "ref"]
81+
sha = sha_param_search[0][1] if sha_param_search else ""
82+
path = path_param_search[0][1] if path_param_search else ""
83+
ref = ref_param_search[0][1] if ref_param_search else ""
84+
85+
# take sha param if passed, otherwise use the ref
86+
sha = sha if sha else ref
87+
88+
# url encoded path not necessary for archive api
89+
path = path.replace("%2F", "/").replace("%2D", "-")
90+
sha = sha.replace("%2D", "-")
6891
return GitLabApiUrlData(protocol=groups['protocol'],
6992
domain=groups['domain'],
7093
api_version=groups['api_version'],
7194
project_id=groups['project_id'],
7295
api_endpoint=groups['api_endpoint'],
73-
path=groups['path'],
96+
path=path,
97+
sha=sha,
7498
full_url=url)
7599

76100

package/cloudshell/iac/terraform/services/gitlab_api_handler.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,30 +55,41 @@ def get_project_directory_info(self, project_id: int, path: str, branch: str = "
5555
raise ValueError(f"No data found at repo path '{path}' for branch '{branch}'")
5656
return directory_info
5757

58-
def get_directory_zip_bytes(self, project_id: int, path: str, sha: str = "main") -> bytes:
58+
def get_directory_zip_bytes(self, project_id: int, path="", sha="") -> bytes:
5959
"""
60-
sha can be a branch or commit id
60+
nested path does not have to be url encoded
61+
"parent-dir/rds" is fine
62+
63+
sha can be a branch or commit id - resolve to default branch
6164
https://docs.gitlab.com/ee/api/repositories.html#get-file-archive
65+
66+
empty path resolves to entire project directory
6267
"""
6368
url = f"{self.base_url}/projects/{project_id}/repository/archive.zip"
69+
params = {}
70+
if path:
71+
params["path"] = path
72+
if sha:
73+
params["sha"] = sha
6474
with self.session as session:
65-
response = session.get(url=url, params={"path": path, "sha": sha})
75+
response = session.get(url=url, params=params)
6676
self._validate_response(response)
6777
archive_bytes = response.content
6878
if not archive_bytes:
69-
raise ValueError(f"No data found at repo path '{path}' for sha '{sha}'")
79+
raise ValueError(f"No archive data found. Project ID: {project_id}. Path: '{path}'. SHA: '{sha}'")
7080
return archive_bytes
7181

72-
def download_zip(self, project_id: int, path: str, output_file_path: str, sha: str = "main"):
82+
def download_zip(self, project_id: int, path: str, output_file_path: str, sha: str):
7383
"""
7484
get bytes in response and dump to file
85+
nested path slashes do not have to be url encoded. "parent-dir/rds" is okay
7586
Output file has structure <output_file_path>.zip/<gitlab-generated-name>/git-parent-folder/folder2/file.txt
7687
"""
7788
binary_data = self.get_directory_zip_bytes(project_id, path, sha)
7889
with open(output_file_path, "wb+") as file:
7990
file.write(binary_data)
8091

81-
def download_archive_to_temp_dir(self, project_id: int, path: str, sha="main", zip_name="repo.zip", repo_dir_name="REPO"):
92+
def download_archive_to_temp_dir(self, project_id: int, path: str, sha: str, zip_name="repo.zip", repo_dir_name="REPO"):
8293
binary_data = self.get_directory_zip_bytes(project_id, path, sha)
8394
working_dir = self._prepare_working_dir(repo_zip_file_name=zip_name,
8495
path_in_repo=path,
@@ -112,3 +123,13 @@ def _prepare_working_dir(repo_zip_file_name: str, path_in_repo: str, zip_bytes:
112123
for path_dir in path_in_repo.split("/"):
113124
working_dir_path = os.path.join(working_dir_path, path_dir)
114125
return working_dir_path
126+
127+
128+
if __name__ == "__main__":
129+
api = GitlabApiHandler(host="192.168.85.26",
130+
token="glpat-Kx6s8n2maL34CMc-AZ6s",
131+
is_https=False)
132+
res = api.download_archive_to_temp_dir(project_id=2,
133+
path="parent-dir/hello-world",
134+
sha="test-branch")
135+
pass

package/tests/unit_tests/test_gitlab_url_extractor.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,36 @@
33

44

55
class TestGitlabUrlExtractor(unittest.TestCase):
6-
RAW_URL = "http://192.168.85.26/quali_natti/terraformstuff/-/tree/test-branch/rds"
7-
API_URL = "http://192.168.85.26/api/v4/projects/2/repository/archive.zip?path=rds"
6+
"""
7+
raw url as seen in browser has format
8+
http://<domain>/<user>/<projectname>/-/tree/<branch>/<folderpath>
9+
10+
api style url:
11+
http://<domain>/api/v4/projects/<project_id>/repository/archive.zip?path=<path>=<branch>
12+
"""
13+
RAW_URL = "http://192.168.85.26/quali_natti/terraformstuff/-/tree/test-branch/parent-dir/hello-world"
14+
API_URL = "http://192.168.85.26/api/v4/projects/2/repository/archive.zip?path=parent%2Ddir%2Fhello%2Dworld&sha=test%2Dbranch"
815

916
def test_raw_url(self):
1017
url_data = gitlab_downloader.extract_data_from_raw_url(self.RAW_URL)
1118
assert url_data
1219

13-
def test_api_url_extract(self):
20+
def test_api_url(self):
1421
url_data = gitlab_downloader.extract_data_from_api_url(self.API_URL)
1522
assert url_data
1623

17-
def test_api_validate(self):
24+
def test_api_url_validate(self):
1825
assert gitlab_downloader.is_gitlab_api_url(self.API_URL)
1926
assert not gitlab_downloader.is_gitlab_api_url(self.RAW_URL)
2027

28+
def test_raw_vs_url(self):
29+
raw_data = gitlab_downloader.extract_data_from_raw_url(self.RAW_URL)
30+
api_data = gitlab_downloader.extract_data_from_api_url(self.API_URL)
31+
self.assertEqual(raw_data.domain, api_data.domain)
32+
self.assertEqual(raw_data.path, api_data.path)
33+
self.assertEqual(raw_data.branch, api_data.sha)
34+
2135
def test_raises(self):
22-
url_arg = ""
36+
url_arg = "http://www.google.com"
2337
self.assertRaises(ValueError, gitlab_downloader.extract_data_from_api_url, url_arg)
2438
self.assertRaises(ValueError, gitlab_downloader.extract_data_from_raw_url, url_arg)

shells/generic_terraform_service/shell-definition.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ node_types:
2121
type: string
2222
tags: [ ]
2323
Git Terraform Module URL:
24-
description: "Git url to the Terraform module. Supports the same URL format from a browser. The entire repo will be downloaded. Url to a folder: https://github.com/ACCOUNT/REPO/tree/BRANCH/PATH_TO_FOLDER or url to a TF file: https://github.com/ACCOUNT/REPO/blob/BRANCH/PATH/filename.tf"
24+
description: "Git url to the Terraform module. Supports the same URL format from a browser. For Github, the entire repo will be downloaded. Url to a folder: https://github.com/ACCOUNT/REPO/tree/BRANCH/PATH_TO_FOLDER or url to a TF file: https://github.com/ACCOUNT/REPO/blob/BRANCH/PATH/filename.tf. Gitlab: 'http://<GITLAB_DOMAIN>/<USER>/<PROJECT_NAME>/-/tree/<BRANCH>/<FOLDER_PATH>'"
2525
type: string
2626
tags: [ user_input ]
2727
Terraform Version:

0 commit comments

Comments
 (0)