You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 lines
15 KiB

#!/usr/bin/env python3
"""
Automated GitHub to Gitea Repository Migration Tool
This script automatically discovers all repositories from a GitHub user
and migrates them to a Gitea instance, preserving all branches, tags, and history.
"""
import subprocess
import requests
import json
import os
import shutil
from pathlib import Path
from typing import List, Dict, Optional
import time
class RepoMigrator:
def __init__(self, github_username: str, gitea_base_url: str, gitea_username: str):
"""
Initialize the repository migrator.
Args:
github_username: Your GitHub username (source)
gitea_base_url: Base URL of your Gitea instance (e.g., 'https://git.edfast.se')
gitea_username: Your username on Gitea (destination)
"""
self.github_username = github_username
self.gitea_base_url = gitea_base_url.rstrip('/')
self.gitea_username = gitea_username
self.temp_dir = Path('/tmp/repo_migration')
# Create temp directory for cloning repos
self.temp_dir.mkdir(exist_ok=True)
def get_github_repos(self, github_token: Optional[str] = None) -> List[Dict]:
"""
Fetch all repositories from GitHub user.
Args:
github_token: Optional GitHub personal access token for private repos
Returns:
List of repository dictionaries with name, clone_url, etc.
"""
headers = {}
if github_token:
headers['Authorization'] = f'token {github_token}'
repos = []
page = 1
print(f"🔍 Discovering repositories for {self.github_username}...")
while True:
# GitHub API endpoint for user repositories
url = f'https://api.github.com/users/{self.github_username}/repos'
params = {'page': page, 'per_page': 100, 'type': 'all'}
response = requests.get(url, headers=headers, params=params)
if response.status_code != 200:
raise Exception(f"Failed to fetch GitHub repos: {response.status_code} - {response.text}")
page_repos = response.json()
if not page_repos:
break
repos.extend(page_repos)
page += 1
print(f" Found {len(page_repos)} repos on page {page-1}")
print(f"✅ Total repositories found: {len(repos)}")
return repos
def check_gitea_repo_exists(self, repo_name: str, gitea_token: Optional[str] = None) -> bool:
"""
Check if repository already exists on Gitea.
Args:
repo_name: Name of the repository
gitea_token: Optional Gitea access token
Returns:
True if repository exists, False otherwise
"""
headers = {}
if gitea_token:
headers['Authorization'] = f'token {gitea_token}'
url = f'{self.gitea_base_url}/api/v1/repos/{self.gitea_username}/{repo_name}'
response = requests.get(url, headers=headers)
return response.status_code == 200
def create_gitea_repo(self, repo_name: str, description: str, private: bool, gitea_token: str) -> bool:
"""
Create a new repository on Gitea.
Args:
repo_name: Name for the new repository
description: Repository description
private: Whether the repository should be private
gitea_token: Gitea access token
Returns:
True if successful, False otherwise
"""
headers = {
'Authorization': f'token {gitea_token}',
'Content-Type': 'application/json'
}
data = {
'name': repo_name,
'description': description,
'private': private,
'auto_init': False # Don't initialize with README since we're migrating
}
url = f'{self.gitea_base_url}/api/v1/user/repos'
response = requests.post(url, headers=headers, json=data)
if response.status_code == 201:
print(f" ✅ Created repository '{repo_name}' on Gitea")
return True
else:
print(f" ❌ Failed to create repository '{repo_name}': {response.status_code} - {response.text}")
return False
def run_command(self, command: List[str], cwd: Optional[Path] = None) -> tuple[bool, str]:
"""
Execute a shell command and return success status and output.
Args:
command: List of command parts
cwd: Working directory for the command
Returns:
Tuple of (success: bool, output: str)
"""
try:
result = subprocess.run(
command,
cwd=cwd,
capture_output=True,
text=True,
check=True
)
return True, result.stdout
except subprocess.CalledProcessError as e:
return False, f"Command failed: {e.stderr}"
def get_authenticated_clone_url(self, repo: Dict, github_token: Optional[str] = None) -> str:
"""
Get the appropriate clone URL based on available authentication.
Args:
repo: Repository dictionary from GitHub API
github_token: Optional GitHub personal access token
Returns:
Clone URL with authentication if available
"""
if github_token:
# Use HTTPS with token authentication
return f"https://{github_token}@github.com/{self.github_username}/{repo['name']}.git"
elif 'ssh_url' in repo and repo['ssh_url']:
# Use SSH URL if available (works with VS Code authentication)
return repo['ssh_url']
else:
# Fall back to HTTPS clone URL
return repo['clone_url']
def migrate_repository(self, repo: Dict, gitea_token: str, github_token: Optional[str] = None) -> bool:
"""
Migrate a single repository from GitHub to Gitea.
Args:
repo: Repository dictionary from GitHub API
gitea_token: Gitea access token for authentication
github_token: Optional GitHub personal access token
Returns:
True if migration successful, False otherwise
"""
repo_name = repo['name']
# Use authenticated clone URL if token is available
clone_url = self.get_authenticated_clone_url(repo, github_token)
description = repo.get('description', '')
private = repo['private']
print(f"\n📦 Migrating repository: {repo_name}")
# Check if repo already exists on Gitea
if self.check_gitea_repo_exists(repo_name, gitea_token):
print(f" Repository '{repo_name}' already exists on Gitea. Skipping...")
return True
# Create repository on Gitea first
if not self.create_gitea_repo(repo_name, description, private, gitea_token):
return False
# Clone repository with all branches and tags
repo_path = self.temp_dir / f"{repo_name}.git"
# Remove existing clone if it exists
if repo_path.exists():
shutil.rmtree(repo_path)
print(f" 📥 Cloning {repo_name} from GitHub with full history...")
# Use --mirror to get complete repository content including all refs
# This ensures we get everything: all branches, tags, and the complete history
success, output = self.run_command([
'git', 'clone', '--mirror', clone_url, str(repo_path)
])
if not success:
print(f" ❌ Failed to clone repository: {output}")
return False
# Verify we have the complete repository content
success, refs_output = self.run_command([
'git', 'show-ref'
], cwd=repo_path)
if success and refs_output.strip():
ref_lines = refs_output.strip().split('\n')
print(f" 📋 Found {len(ref_lines)} references (branches/tags)")
# Check total commit count across all branches
success, log_output = self.run_command([
'git', 'rev-list', '--count', '--all'
], cwd=repo_path)
if success and log_output.strip():
commit_count = int(log_output.strip())
print(f" 📊 Total commits across all branches: {commit_count}")
# Show file structure of the repository at HEAD
success, ls_output = self.run_command([
'git', 'ls-tree', '-r', '--name-only', 'HEAD'
], cwd=repo_path)
if success and ls_output.strip():
files = ls_output.strip().split('\n')
print(f" 📁 Files in repository: {len(files)} total")
# Show first few files to verify content
for file in files[:10]:
print(f" - {file}")
if len(files) > 10:
print(f" ... and {len(files) - 10} more files")
else:
print(f" No files found in HEAD - repository might be empty or have issues")
else:
print(f" No commits found - this is likely an empty repository")
else:
print(f" No refs found - this might be an empty repository")
# Set up Gitea remote URL with authentication
gitea_url = f'{self.gitea_base_url}/{self.gitea_username}/{repo_name}.git'
print(f" 📤 Pushing complete repository to Gitea...")
# For a mirrored clone, we need to set the push URL and push everything
success, output = self.run_command([
'git', 'remote', 'set-url', '--push', 'origin', gitea_url
], cwd=repo_path)
if not success:
print(f" ❌ Failed to set Gitea remote URL: {output}")
return False
# Push everything (all branches, tags, and refs) to Gitea
# The --mirror flag pushes all refs including branches and tags
success, output = self.run_command([
'git', 'push', '--mirror'
], cwd=repo_path)
if not success:
print(f" ❌ Failed to push to Gitea: {output}")
# Show more detailed error info
print(f" Error details: {output}")
return False
# Verify the push was successful by checking what branches exist on Gitea
print(f" 🔍 Verifying push to Gitea...")
success, branch_output = self.run_command([
'git', 'ls-remote', gitea_url
])
if success and branch_output.strip():
remote_refs = branch_output.strip().split('\n')
print(f" 📋 Confirmed {len(remote_refs)} references pushed to Gitea")
# Check if we have a main/master branch
has_main = any('refs/heads/main' in ref for ref in remote_refs)
has_master = any('refs/heads/master' in ref for ref in remote_refs)
if has_main:
print(f" 🌿 Default branch 'main' is available")
elif has_master:
print(f" 🌿 Default branch 'master' is available")
else:
# List available branches
branches = [ref.split('\t')[1] for ref in remote_refs if 'refs/heads/' in ref]
if branches:
print(f" 🌿 Available branches: {', '.join([b.replace('refs/heads/', '') for b in branches])}")
else:
print(f" Could not verify remote refs, but push appeared successful")
print(f" ✅ Successfully migrated {repo_name}")
# Clean up temporary clone
shutil.rmtree(repo_path)
return True
def migrate_all_repositories(self, github_token: Optional[str] = None, gitea_token: Optional[str] = None):
"""
Main method to migrate all repositories from GitHub to Gitea.
Args:
github_token: Optional GitHub personal access token
gitea_token: Optional Gitea access token (required for creating repos)
"""
if not gitea_token:
raise ValueError("Gitea token is required for creating repositories")
print(f"🚀 Starting migration from GitHub ({self.github_username}) to Gitea ({self.gitea_base_url}/{self.gitea_username})")
print("=" * 80)
# Get all repositories from GitHub
try:
repos = self.get_github_repos(github_token)
except Exception as e:
print(f"❌ Failed to fetch repositories from GitHub: {e}")
return
if not repos:
print("No repositories found to migrate.")
return
# Migrate each repository
successful_migrations = 0
failed_migrations = 0
for repo in repos:
try:
if self.migrate_repository(repo, gitea_token, github_token):
successful_migrations += 1
else:
failed_migrations += 1
# Small delay between migrations to be respectful
time.sleep(1)
except Exception as e:
print(f" ❌ Unexpected error migrating {repo['name']}: {e}")
failed_migrations += 1
# Summary
print("\n" + "=" * 80)
print(f"🎉 Migration completed!")
print(f"✅ Successfully migrated: {successful_migrations} repositories")
if failed_migrations > 0:
print(f"❌ Failed migrations: {failed_migrations} repositories")
# Clean up temp directory
if self.temp_dir.exists():
shutil.rmtree(self.temp_dir)
def main():
"""
Main function to run the migration script.
Set your configuration here and run the script.
"""
# Configuration - Update these values
GITHUB_USERNAME = 'lasseedfast'
GITEA_BASE_URL = 'https://git.edfast.se'
GITEA_USERNAME = 'lasse'
# Optional: Set these environment variables or hardcode them
# For security, it's better to use environment variables
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') # Optional, for private repos
GITEA_TOKEN = os.getenv('GITEA_TOKEN') # Required for creating repos
if not GITEA_TOKEN:
print("❌ GITEA_TOKEN environment variable is required!")
print(" You can get a token from: {}/user/settings/applications".format(GITEA_BASE_URL))
print(" Then run: export GITEA_TOKEN='your_token_here'")
return
# Create migrator and run migration
migrator = RepoMigrator(GITHUB_USERNAME, GITEA_BASE_URL, GITEA_USERNAME)
migrator.migrate_all_repositories(GITHUB_TOKEN, GITEA_TOKEN)
if __name__ == '__main__':
main()