|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Git Repository Cleaner |
| 4 | +
|
| 5 | +This script provides various cleanup operations for git repositories and GitHub issues. |
| 6 | +
|
| 7 | +Usage: |
| 8 | + python cleaner.py tags -n 3 [--dry-run] [--confirm] |
| 9 | + python cleaner.py issues --max-issues 100 [--dry-run] |
| 10 | +""" |
| 11 | + |
| 12 | +import argparse |
| 13 | +import subprocess |
| 14 | +import sys |
| 15 | +import os |
| 16 | +import requests |
| 17 | +from collections import defaultdict |
| 18 | +from typing import List, Dict, Tuple |
| 19 | + |
| 20 | + |
| 21 | +def run_command(command: List[str], dry_run: bool = False) -> str: |
| 22 | + """Execute a shell command or print it in dry-run mode.""" |
| 23 | + cmd_str = ' '.join(command) |
| 24 | + |
| 25 | + if dry_run: |
| 26 | + print(f"[DRY RUN] Would execute: {cmd_str}") |
| 27 | + return "" |
| 28 | + |
| 29 | + try: |
| 30 | + result = subprocess.run(command, capture_output=True, text=True, check=True) |
| 31 | + return result.stdout.strip() |
| 32 | + except subprocess.CalledProcessError as e: |
| 33 | + print(f"Error executing command: {cmd_str}") |
| 34 | + print(f"Error: {e.stderr}") |
| 35 | + return "" |
| 36 | + |
| 37 | + |
| 38 | +def get_all_tags() -> List[str]: |
| 39 | + """Get all git tags from the repository.""" |
| 40 | + try: |
| 41 | + result = subprocess.run(['git', 'tag', '-l'], capture_output=True, text=True, check=True) |
| 42 | + return result.stdout.strip().split('\n') if result.stdout.strip() else [] |
| 43 | + except subprocess.CalledProcessError as e: |
| 44 | + print(f"Error getting tags: {e.stderr}") |
| 45 | + return [] |
| 46 | + |
| 47 | + |
| 48 | +def parse_tag(tag: str) -> Tuple[str, str]: |
| 49 | + """Parse a tag into stack name and version.""" |
| 50 | + if '@' not in tag: |
| 51 | + raise ValueError(f"Invalid tag format: {tag}. Expected format: stack-name@version") |
| 52 | + |
| 53 | + stack_name, version = tag.split('@', 1) |
| 54 | + if not stack_name or not version: |
| 55 | + raise ValueError(f"Invalid tag format: {tag}") |
| 56 | + |
| 57 | + return stack_name, version |
| 58 | + |
| 59 | + |
| 60 | +def group_tags_by_stack(tags: List[str]) -> Dict[str, List[Tuple[str, str]]]: |
| 61 | + """Group tags by stack name and sort versions for each stack.""" |
| 62 | + stacks = defaultdict(list) |
| 63 | + |
| 64 | + for tag in tags: |
| 65 | + try: |
| 66 | + stack_name, version = parse_tag(tag) |
| 67 | + stacks[stack_name].append((tag, version)) |
| 68 | + except ValueError as e: |
| 69 | + print(f"Warning: {e}", file=sys.stderr) |
| 70 | + continue |
| 71 | + |
| 72 | + # Sort versions for each stack (newest first) |
| 73 | + for stack_name in stacks: |
| 74 | + stacks[stack_name].sort(key=lambda x: x[1], reverse=True) |
| 75 | + |
| 76 | + return stacks |
| 77 | + |
| 78 | + |
| 79 | +def get_tags_to_delete(stacks: Dict[str, List[Tuple[str, str]]], keep_count: int) -> List[str]: |
| 80 | + """Determine which tags should be deleted based on keep_count.""" |
| 81 | + tags_to_delete = [] |
| 82 | + |
| 83 | + for stack_name, tags in stacks.items(): |
| 84 | + if len(tags) > keep_count: |
| 85 | + tags_to_delete.extend([tag[0] for tag in tags[keep_count:]]) |
| 86 | + |
| 87 | + return tags_to_delete |
| 88 | + |
| 89 | + |
| 90 | +def delete_tag(tag: str, dry_run: bool = False) -> bool: |
| 91 | + """Delete a tag both locally and remotely.""" |
| 92 | + print(f"Processing tag: {tag}") |
| 93 | + |
| 94 | + # Delete local tag |
| 95 | + local_result = run_command(['git', 'tag', '-d', tag], dry_run) |
| 96 | + print(f"Delete local tag: {local_result}") |
| 97 | + # Delete remote tag |
| 98 | + remote_result = run_command(['git', 'push', 'origin', f':refs/tags/{tag}'], dry_run) |
| 99 | + print(f"Delete remote tag: {remote_result}") |
| 100 | + return True |
| 101 | + |
| 102 | + |
| 103 | +def clean_tags(args): |
| 104 | + """Clean up old git tags for stacks.""" |
| 105 | + if args.keep_count < 1: |
| 106 | + print("Error: keep-count must be at least 1", file=sys.stderr) |
| 107 | + sys.exit(1) |
| 108 | + |
| 109 | + print(f"Git Tag Cleaner - Keeping {args.keep_count} latest versions per stack") |
| 110 | + |
| 111 | + # Get all tags |
| 112 | + all_tags = get_all_tags() |
| 113 | + if not all_tags: |
| 114 | + print("No tags found in repository") |
| 115 | + return |
| 116 | + |
| 117 | + print(f"Found {len(all_tags)} total tags") |
| 118 | + |
| 119 | + # Group tags by stack |
| 120 | + stacks = group_tags_by_stack(all_tags) |
| 121 | + print(f"Found {len(stacks)} stacks") |
| 122 | + |
| 123 | + # Show tag distribution |
| 124 | + for stack_name, tags in sorted(stacks.items()): |
| 125 | + print(f" {stack_name}: {len(tags)} versions") |
| 126 | + |
| 127 | + # Get tags to delete |
| 128 | + tags_to_delete = get_tags_to_delete(stacks, args.keep_count) |
| 129 | + |
| 130 | + if not tags_to_delete: |
| 131 | + print("No tags to delete - all stacks have <= {} versions".format(args.keep_count)) |
| 132 | + return |
| 133 | + |
| 134 | + print(f"\nFound {len(tags_to_delete)} tags to delete:") |
| 135 | + for tag in sorted(tags_to_delete): |
| 136 | + print(f" - {tag}") |
| 137 | + |
| 138 | + # Confirm deletion |
| 139 | + if not args.dry_run and not args.confirm: |
| 140 | + response = input(f"\nDelete these {len(tags_to_delete)} tags? (y/N): ") |
| 141 | + if response.lower() != 'y': |
| 142 | + print("Aborted by user") |
| 143 | + return |
| 144 | + |
| 145 | + # Delete tags |
| 146 | + success_count = 0 |
| 147 | + for tag in tags_to_delete: |
| 148 | + try: |
| 149 | + delete_tag(tag, args.dry_run) |
| 150 | + success_count += 1 |
| 151 | + except Exception as e: |
| 152 | + print(f"Error deleting tag {tag}: {e}", file=sys.stderr) |
| 153 | + |
| 154 | + print(f"\nCompleted: {success_count}/{len(tags_to_delete)} tags processed") |
| 155 | + |
| 156 | + |
| 157 | +def clean_github_issues(args): |
| 158 | + """Clean GitHub issues to keep only the most recent ones.""" |
| 159 | + github_headers = {'Authorization': 'token %s' % os.environ.get("GITHUB_TOKEN")} |
| 160 | + |
| 161 | + if not os.environ.get("GITHUB_TOKEN"): |
| 162 | + print("Error: GITHUB_TOKEN environment variable is required") |
| 163 | + return |
| 164 | + |
| 165 | + max_issues = args.max_issues |
| 166 | + all_issues = _fetch_all_issues(github_headers) |
| 167 | + |
| 168 | + if len(all_issues) <= max_issues: |
| 169 | + print(f"No issues to clean. Found {len(all_issues)} total issues, keeping {max_issues}") |
| 170 | + return |
| 171 | + |
| 172 | + issues_to_process = all_issues[max_issues:] |
| 173 | + print(f"Found {len(all_issues)} total issues") |
| 174 | + print(f"Will keep {max_issues} most recent issues") |
| 175 | + print(f"Will close and delete {len(issues_to_process)} oldest issues") |
| 176 | + |
| 177 | + for issue in issues_to_process: |
| 178 | + _process_issue_with_graphql(issue, args.dry_run, github_headers) |
| 179 | + |
| 180 | + |
| 181 | +def _fetch_all_issues(headers: Dict[str, str]) -> List[Dict]: |
| 182 | + """Fetch all GitHub issues with pagination.""" |
| 183 | + all_issues = [] |
| 184 | + page = 1 |
| 185 | + per_page = 100 |
| 186 | + |
| 187 | + while True: |
| 188 | + issues_url = "https://api.github.com/repos/drycc/stacks/issues" |
| 189 | + params = { |
| 190 | + 'state': 'all', |
| 191 | + 'sort': 'created', |
| 192 | + 'direction': 'desc', |
| 193 | + 'per_page': per_page, |
| 194 | + 'page': page |
| 195 | + } |
| 196 | + |
| 197 | + try: |
| 198 | + response = requests.get(issues_url, headers=headers, params=params) |
| 199 | + response.raise_for_status() |
| 200 | + issues = response.json() |
| 201 | + |
| 202 | + if not issues: |
| 203 | + break |
| 204 | + |
| 205 | + all_issues.extend(issues) |
| 206 | + page += 1 |
| 207 | + |
| 208 | + if page > 50: # Max 5000 issues |
| 209 | + break |
| 210 | + |
| 211 | + except requests.exceptions.RequestException as e: |
| 212 | + print(f"Error fetching issues on page {page}: {e}") |
| 213 | + break |
| 214 | + |
| 215 | + return all_issues |
| 216 | + |
| 217 | + |
| 218 | +def _process_issue_with_graphql(issue: Dict, dry_run: bool, headers: Dict[str, str]) -> None: |
| 219 | + """Process a single issue using GraphQL API to delete by node_id.""" |
| 220 | + import time |
| 221 | + |
| 222 | + issue_number = issue['number'] |
| 223 | + issue_title = issue['title'] |
| 224 | + |
| 225 | + if dry_run: |
| 226 | + print(f"[DRY RUN] Would delete issue #{issue_number}: {issue_title}") |
| 227 | + return |
| 228 | + |
| 229 | + # First, get the node_id for the issue |
| 230 | + node_id = _get_issue_node_id(issue_number, headers) |
| 231 | + if not node_id: |
| 232 | + print(f"Could not get node_id for issue #{issue_number}, attempting to close instead") |
| 233 | + _close_issue(issue_number, issue_title, headers) |
| 234 | + return |
| 235 | + |
| 236 | + # Use GraphQL to delete the issue |
| 237 | + graphql_url = "https://api.github.com/graphql" |
| 238 | + graphql_query = { |
| 239 | + "query": f'mutation {{ deleteIssue(input: {{issueId: "{node_id}"}}) {{ clientMutationId }} }}' |
| 240 | + } |
| 241 | + |
| 242 | + try: |
| 243 | + response = requests.post(graphql_url, headers=headers, json=graphql_query) |
| 244 | + response.raise_for_status() |
| 245 | + |
| 246 | + result = response.json() |
| 247 | + if 'errors' in result: |
| 248 | + error_message = result['errors'][0].get('message', 'Unknown error') |
| 249 | + print(f"GraphQL error deleting issue #{issue_number}: {error_message}") |
| 250 | + print(f"Attempting to close issue #{issue_number} instead") |
| 251 | + _close_issue(issue_number, issue_title, headers) |
| 252 | + else: |
| 253 | + print(f"Deleted issue #{issue_number}: {issue_title}") |
| 254 | + except requests.exceptions.RequestException as e: |
| 255 | + print(f"Error deleting issue #{issue_number} via GraphQL: {e}") |
| 256 | + print(f"Attempting to close issue #{issue_number} instead") |
| 257 | + _close_issue(issue_number, issue_title, headers) |
| 258 | + |
| 259 | + time.sleep(0.2) # Rate limiting |
| 260 | + |
| 261 | + |
| 262 | +def _get_issue_node_id(issue_number: int, headers: Dict[str, str]) -> str: |
| 263 | + """Get the node_id for a specific issue number.""" |
| 264 | + issue_url = f"https://api.github.com/repos/drycc/stacks/issues/{issue_number}" |
| 265 | + |
| 266 | + try: |
| 267 | + response = requests.get(issue_url, headers=headers) |
| 268 | + response.raise_for_status() |
| 269 | + issue_data = response.json() |
| 270 | + return issue_data.get('node_id') |
| 271 | + except requests.exceptions.RequestException as e: |
| 272 | + print(f"Error fetching node_id for issue #{issue_number}: {e}") |
| 273 | + return None |
| 274 | + |
| 275 | + |
| 276 | +def _close_issue(issue_number: int, issue_title: str, headers: Dict[str, str]) -> None: |
| 277 | + """Close an issue if deletion fails.""" |
| 278 | + close_url = f"https://api.github.com/repos/drycc/stacks/issues/{issue_number}" |
| 279 | + close_data = {'state': 'closed'} |
| 280 | + |
| 281 | + try: |
| 282 | + close_response = requests.patch(close_url, headers=headers, json=close_data) |
| 283 | + close_response.raise_for_status() |
| 284 | + print(f"Closed issue #{issue_number}: {issue_title} (could not delete)") |
| 285 | + except requests.exceptions.RequestException as e: |
| 286 | + print(f"Error closing issue #{issue_number}: {e}") |
| 287 | + |
| 288 | + |
| 289 | +def main(): |
| 290 | + parser = argparse.ArgumentParser( |
| 291 | + description='Git repository cleaner with various cleanup operations', |
| 292 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 293 | + epilog=""" |
| 294 | +Examples: |
| 295 | + python cleaner.py tags -n 3 # Keep 3 latest versions per stack |
| 296 | + python cleaner.py tags -n 5 --dry-run # Preview tag cleanup |
| 297 | + python cleaner.py tags -n 2 --confirm # Skip confirmation prompt |
| 298 | + python cleaner.py issues --max-issues 50 # Keep 50 most recent issues |
| 299 | + python cleaner.py issues --dry-run # Preview issue cleanup |
| 300 | +
|
| 301 | +Note: Issues cleanup requires admin permissions to delete issues. |
| 302 | +If deletion fails, issues will be closed instead. |
| 303 | + """ |
| 304 | + ) |
| 305 | + |
| 306 | + subparsers = parser.add_subparsers(dest='action', help='Cleanup action to perform') |
| 307 | + |
| 308 | + # Tags cleanup parser |
| 309 | + tags_parser = subparsers.add_parser('tags', help='Clean up old git tags') |
| 310 | + tags_parser.add_argument('-n', '--keep-count', type=int, required=True, |
| 311 | + help='Number of latest versions to keep for each stack') |
| 312 | + tags_parser.add_argument('--dry-run', action='store_true', |
| 313 | + help='Only print commands without executing them') |
| 314 | + tags_parser.add_argument('--confirm', action='store_true', |
| 315 | + help='Skip confirmation prompt and execute directly') |
| 316 | + |
| 317 | + # Issues cleanup parser |
| 318 | + issues_parser = subparsers.add_parser('issues', help='Clean up GitHub issues') |
| 319 | + issues_parser.add_argument('--max-issues', type=int, default=100, |
| 320 | + help='Maximum number of issues to keep (default: 100)') |
| 321 | + issues_parser.add_argument('--dry-run', action='store_true', |
| 322 | + help='Only print commands without executing them') |
| 323 | + |
| 324 | + args = parser.parse_args() |
| 325 | + |
| 326 | + if not args.action: |
| 327 | + parser.print_help() |
| 328 | + sys.exit(1) |
| 329 | + |
| 330 | + if args.action == 'tags': |
| 331 | + clean_tags(args) |
| 332 | + elif args.action == 'issues': |
| 333 | + clean_github_issues(args) |
| 334 | + else: |
| 335 | + parser.error(f"Unknown action: {args.action}") |
| 336 | + |
| 337 | + |
| 338 | +if __name__ == '__main__': |
| 339 | + main() |
0 commit comments