From 5ff988b519217ecd3890369d617ae1ae1c8106c4 Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Thu, 13 Feb 2025 21:09:08 -0500 Subject: [PATCH 01/11] chore: better HTTP debugging --- github2gitlab/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 1b2cc39..0f5d532 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -21,6 +21,7 @@ import git import gitdb import hashlib +from http.client import HTTPConnection import json import logging import os @@ -81,9 +82,11 @@ def __init__(self, args): if self.args.verbose: level = logging.DEBUG + HTTPConnection.debuglevel = 1 else: level = logging.INFO + logging.getLogger("urllib3").setLevel(level) logging.getLogger('github2gitlab').setLevel(level) self.tmpdir = "/tmp" From cc8348ab8b45a20bacd076924053f8de4091f853 Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Thu, 13 Feb 2025 21:09:27 -0500 Subject: [PATCH 02/11] fix: https://github.com/ubc/github2gitlab/issues/3 --- github2gitlab/main.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 0f5d532..0744913 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -58,8 +58,11 @@ def __init__(self, args): if not self.args.gitlab_repo: self.args.gitlab_repo = self.args.github_repo - (self.args.gitlab_namespace, - self.args.gitlab_name) = self.args.gitlab_repo.split('/') + + repo_parts = self.args.gitlab_repo.split('/') + self.args.gitlab_name = repo_parts.pop() + self.args.gitlab_namespace = '/'.join(repo_parts) + self.args.gitlab_repo = parse.quote_plus(self.args.gitlab_repo) self.github = { @@ -75,6 +78,7 @@ def __init__(self, args): 'host': self.args.gitlab_url, 'name': self.args.gitlab_name, 'namespace': self.args.gitlab_namespace, + 'group': None, 'url': self.args.gitlab_url + "/api/v4", 'repo': self.args.gitlab_repo, 'token': self.args.gitlab_token, @@ -89,6 +93,14 @@ def __init__(self, args): logging.getLogger("urllib3").setLevel(level) logging.getLogger('github2gitlab').setLevel(level) + g = self.gitlab + url = g['url'] + "/groups" + query = {'private_token': g['token'], 'all_available': 'true', 'per_page': 10000} + groups = requests.get(url, params=query).json() + matches = list(filter(lambda group: group['full_path'] == g['namespace'].lower(), groups)) + if any(matches): + g['group_id'] = matches[0]['id'] + self.tmpdir = "/tmp" @staticmethod @@ -130,6 +142,9 @@ def get_parser(): parser.add_argument('--clean', action='store_const', const=True, help='Remove the repo after sync') + parser.add_argument('--visibility', + help='Visbility of created repos (public, internal, private)', + default='public') return parser @staticmethod @@ -288,15 +303,21 @@ def add_project(self): g = self.gitlab url = g['url'] + "/projects/" + g['repo'] query = {'private_token': g['token']} + if g['group_id']: + query['group_id'] = g['group_id'] + if (requests.get(url, params=query).status_code == requests.codes.ok): log.debug("project " + url + " already exists") return None else: - log.info("add project " + g['repo']) url = g['url'] + "/projects" - query['public'] = 'true' - query['namespace'] = g['namespace'] + query['visibility'] = self.args.visibility query['name'] = g['name'] + if g['group_id']: + query['namespace_id'] = g['group_id'] + else: + query['namespace'] = g['namespace'] + log.info("add project " + g['repo']) result = requests.post(url, params=query) if result.status_code != requests.codes.created: raise ValueError(result.text) From 783f283cab48c68d96862d5a4bec5826a4d7e917 Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Thu, 13 Feb 2025 21:09:54 -0500 Subject: [PATCH 03/11] fix: https://github.com/ubc/github2gitlab/issues/2 --- README.rst | 4 ++-- github2gitlab/main.py | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index c500cf7..c5fab28 100644 --- a/README.rst +++ b/README.rst @@ -38,7 +38,7 @@ were never merged. --gitlab-url http://workbench.dachary.org \ --gitlab-token sxQJ67SQKihMrGWVf \ --gitlab-repo ceph/ceph-backports \ - --github-token 64933d355fda9844aadd4e224d \ + --github-auth githubusername:64933d355fda9844aadd4e224d \ --github-repo ceph/ceph \ --ignore-closed @@ -90,7 +90,7 @@ Hacking --gitlab-url http://workbench.dachary.org \ --gitlab-token XXXXXXXXX \ --gitlab-repo dachary/testrepo2 \ - --github-token XXXXXXXXX \ + --github-auth XXXXXXXXX \ --github-repo dachary/testrepo \ --ssh-public-key ~/.ssh/id_rsa.pub \ --verbose diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 0744913..92ae5eb 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -69,7 +69,7 @@ def __init__(self, args): 'url': "https://api.github.com", 'git': "https://github.com", 'repo': self.args.github_repo, - 'token': self.args.github_token, + 'auth': self.args.github_auth or None } if self.args.branches: self.github['branches'] = self.args.branches.split(',') @@ -116,8 +116,8 @@ def get_parser(): required=True) parser.add_argument('--gitlab-repo', help='Gitlab repo (for instance ceph/ceph)') - parser.add_argument('--github-token', - help='GitHub authentication token') + parser.add_argument('--github-auth', + help='GitHub auth credentials, in the form username:token') parser.add_argument('--github-repo', help='GitHub repo (for instance ceph/ceph)', required=True) @@ -197,9 +197,15 @@ def gitlab_create_remote(self, repo): def git_mirror(self): name = self.gitlab['name'] + url = self.github['git'] + + if(self.github['auth']): + url = self.github['git'].replace('https://', 'https://{}@'.format(self.github['auth'])) + if not os.path.exists(name): - self.sh("git clone --bare " + self.github['git'] + + self.sh("git clone --bare " + url + "/" + self.github['repo'] + " " + name) + repo = git.Repo(name) os.chdir(name) if not hasattr(repo.remotes, 'gitlab'): @@ -508,8 +514,8 @@ def get_pull_requests(self): "https://developer.github.com/v3/pulls/#list-pull-requests" g = self.github query = {'state': 'all'} - if self.args.github_token: - query['access_token'] = g['token'] + if g['auth']: + query['access_token'] = g['auth'].split(':')[1] def f(pull): if self.args.ignore_closed: From c6bda68ddb9bc5604607ef0a2a44743650169bb7 Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Thu, 13 Feb 2025 21:10:57 -0500 Subject: [PATCH 04/11] fix: regex contained incorrect escaped sequence --- github2gitlab/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 92ae5eb..0fef6c8 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -243,7 +243,7 @@ def git_mirror(self): def git_mirror_optimize(self, repo): self.sh("git fetch origin +refs/pull/*:refs/remotes/origin/pull/*") for head in repo.refs: - pr = re.search('^origin/pull/(\d+)/head$', head.name) + pr = re.search('^origin/pull/(\\d+)/head$', head.name) if not pr: continue pr = pr.group(1) From 8a14defcd33cc32e56e83532b343b9406e60c3d1 Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Fri, 14 Feb 2025 11:35:03 -0500 Subject: [PATCH 05/11] fix: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used --- github2gitlab/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 0fef6c8..657fc9c 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -171,8 +171,7 @@ def sh(self, command): args=command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - shell=True, - bufsize=1) + shell=True) lines = [] with proc.stdout: for line in iter(proc.stdout.readline, b''): From de9f68b2e6512f923fa2de2bb311b1fbb0d1820d Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Fri, 14 Feb 2025 11:35:23 -0500 Subject: [PATCH 06/11] fix: create_merge_request and update_merge_request contains sensitive info; change logging to debug --- github2gitlab/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 657fc9c..0061269 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -540,7 +540,7 @@ def create_merge_request(self, query): g = self.gitlab query['private_token'] = g['token'] url = g['url'] + "/projects/" + g['repo'] + "/merge_requests" - log.info('create_merge_request: ' + str(query)) + log.debug('create_merge_request: ' + str(query)) result = requests.post(url, params=query) if result.status_code != requests.codes.created: raise ValueError(result.text) @@ -576,7 +576,7 @@ def put_merge_request(self, merge_request, updates): updates['private_token'] = g['token'] url = (g['url'] + "/projects/" + g['repo'] + "/merge_requests/" + str(merge_request['iid'])) - log.info('update_merge_request: ' + url + ' <= ' + str(updates)) + log.debug('update_merge_request: ' + url + ' <= ' + str(updates)) return requests.put(url, params=updates).json() def verify_merge_update(self, updates, result): From ff8d05062748ec0c3bdc64544f975b6611a05297 Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Fri, 14 Feb 2025 11:38:09 -0500 Subject: [PATCH 07/11] feat: allow user not to add SSH key to Gitlab --- github2gitlab/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 0061269..8552126 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -130,6 +130,9 @@ def get_parser(): parser.add_argument('--ignore-closed', action='store_const', const=True, help='ignore pull requests closed and not merged') + parser.add_argument('--skip-add-key', action='store_const', + const=True, + help='do not attempt to add local SSH key to Gitlab') parser.add_argument('--skip-pull-requests', action='store_const', const=True, help='do not mirror PR to MR') @@ -152,7 +155,8 @@ def factory(argv): return GitHub2GitLab(GitHub2GitLab.get_parser().parse_args(argv)) def run(self): - self.add_key() + if not self.args.skip_add_key: + self.add_key() if self.add_project(): self.unprotect_branches() self.git_mirror() From 9202ae4e82a285f026fc3c95d12bf4ac519d4730 Mon Sep 17 00:00:00 2001 From: Jose Date: Mon, 16 Mar 2026 15:44:41 +0800 Subject: [PATCH 08/11] Improve error reporting Enhance error reporting by capturing and logging subprocess output on failure. This ensures that the specific reasons for Git command failures (such as GitLab pre-receive hook rejections) are visible in the logs. Before this change, the script would only report that a command failed with "exit status 1," leaving the actual cause a mystery. Now, it provides the full context needed for debugging. --- github2gitlab/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 8552126..aa12cee 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -183,9 +183,12 @@ def sh(self, command): lines.append(line) log.debug(str(line.strip())) if proc.wait() != 0: + output = "".join(lines) + log.error("Command failed: " + command + "\n" + output) raise subprocess.CalledProcessError( returncode=proc.returncode, - cmd=command + cmd=command, + output=output ) return "".join(lines) From ef64531b176472757d3839b31085ad632a6d4fc8 Mon Sep 17 00:00:00 2001 From: Jose Date: Mon, 16 Mar 2026 16:03:06 +0800 Subject: [PATCH 09/11] Fix KeyError: 'group_id' Rename key `group` to `group_id` of self.gitlab hashmap to avoid exception below Traceback (most recent call last): File ".../venv/github2gitlab/bin/github2gitlab", line 25, in sys.exit(GitHub2GitLab.factory(sys.argv[1:]).run()) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File ".../venv/github2gitlab/github2gitlab/main.py", line 163, in run if self.add_project(): ^^^^^^^^^^^^^^^^^^ File ".../venv/github2gitlab/github2gitlab/main.py", line 321, in add_project if g['group_id']: ~^^^^^^^^^^^^ KeyError: 'group_id' --- github2gitlab/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index aa12cee..03b9435 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -78,7 +78,7 @@ def __init__(self, args): 'host': self.args.gitlab_url, 'name': self.args.gitlab_name, 'namespace': self.args.gitlab_namespace, - 'group': None, + 'group_id': None, 'url': self.args.gitlab_url + "/api/v4", 'repo': self.args.gitlab_repo, 'token': self.args.gitlab_token, From 444fa1e1e0c3d902d8726b93f7e9041b6150b2a0 Mon Sep 17 00:00:00 2001 From: Jose Date: Mon, 16 Mar 2026 16:19:49 +0800 Subject: [PATCH 10/11] Truncate Merge Request titles Truncate Merge Request titles to 255 characters to comply with GitLab API limits. This prevents crashes when syncing GitHub Pull Requests with very long titles and ensures stable comparisons during updates. 1. Creation Logic (sync): Truncates the GitHub pull request title to 255 characters when creating a new Merge Request on GitLab. This prevents the initial ValueError you encountered. 2. Update Logic (field_update): Ensures that if a title is updated later (e.g., someone renames the PR on GitHub), the new title sent to GitLab is also truncated to 255 characters. 3. Comparison Logic (field_equal): Truncates both the GitHub and GitLab titles before comparing them. This is crucial because it prevents the script from thinking the titles are different just because one is 300 characters (on GitHub) and the other is 255 characters (on GitLab), which would otherwise cause an infinite loop of unnecessary update attempts. --- github2gitlab/main.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index 03b9435..df279f1 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -34,6 +34,7 @@ import shutil DESCRIPTION_MAX = 1024 +TITLE_MAX = 255 logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s') @@ -380,6 +381,13 @@ def field_equal(pull, pull_field, pull_value, merge_value = merge_value.replace(GitHub2GitLab.TAG_MERGED, '') return (pull_value[:DESCRIPTION_MAX] == merge_value[:DESCRIPTION_MAX]) + elif pull_field == 'title': + if merge_value is None: + merge_value = '' + if pull_value is None: + pull_value = '' + return (pull_value[:TITLE_MAX] == + merge_value[:TITLE_MAX]) else: return pull_value == merge_value @@ -399,6 +407,8 @@ def field_update(pull, pull_field, pull_value, return ('state_event', value) elif pull_field == 'body': return (merge_field, pull_value[:DESCRIPTION_MAX]) + elif pull_field == 'title': + return (merge_field, pull_value[:TITLE_MAX]) else: return (merge_field, pull_value) @@ -418,7 +428,7 @@ def sync(self): target_branch = pull['base']['ref'] if (self.rev_parse(pull, source_branch) and self.rev_parse(pull, target_branch)): - data = {'title': pull['title'], + data = {'title': pull['title'][:TITLE_MAX], 'source_branch': source_branch, 'target_branch': target_branch} if pull['body']: From d1334063a062c7b214d7199ee4ef40cc3be8b0ea Mon Sep 17 00:00:00 2001 From: Nick Marden Date: Mon, 30 Mar 2026 22:55:43 -0400 Subject: [PATCH 11/11] Fix group lookup failing on large GitLab instances The groups API query used per_page=10000, which GitLab silently caps at 100. On instances with more than 100 groups, the target namespace may not appear in results, causing group_id to stay None and project creation to fall back to the user's personal namespace. Fix: add search= parameter using the leaf component of the namespace so the API filters server-side, making pagination irrelevant. --- github2gitlab/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github2gitlab/main.py b/github2gitlab/main.py index df279f1..d8f3ccc 100644 --- a/github2gitlab/main.py +++ b/github2gitlab/main.py @@ -96,7 +96,7 @@ def __init__(self, args): g = self.gitlab url = g['url'] + "/groups" - query = {'private_token': g['token'], 'all_available': 'true', 'per_page': 10000} + query = {'private_token': g['token'], 'all_available': 'true', 'per_page': 100, 'search': g['namespace'].split('/')[-1]} groups = requests.get(url, params=query).json() matches = list(filter(lambda group: group['full_path'] == g['namespace'].lower(), groups)) if any(matches):