Skip to content

Commit b3367e7

Browse files
committed
chore(stacks): add cleaner.py scripts
1 parent c949a35 commit b3367e7

4 files changed

Lines changed: 344 additions & 9 deletions

File tree

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,12 @@ RUN install-packages \
6767

6868
ENV PATH "/usr/local/python/bin:${PATH}"
6969

70+
ADD requirements.txt /tmp/requirements.txt
71+
7072
RUN UPX_VERSION=4.1.0; \
7173
OS_ARCH=$(dpkg --print-architecture); \
7274
wget https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-${OS_ARCH}_linux.tar.xz; \
7375
tar -Jxvf upx-${UPX_VERSION}-${OS_ARCH}_linux.tar.xz; \
7476
cp upx-${UPX_VERSION}-${OS_ARCH}_linux/upx /usr/local/bin; \
7577
rm -rf upx-*; \
76-
pip install oss2 packaging;
78+
pip install -r /tmp/requirements.txt;

build.sh

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,14 +119,6 @@ function patch() {
119119
return 0
120120
}
121121

122-
function clean-tags() {
123-
for tag in $(git tag --sort creatordate|head -n "$1")
124-
do
125-
git tag -d ${tag}
126-
git push origin :refs/tags/${tag}
127-
done
128-
}
129-
130122
function all() {
131123
STACK_NAME=$(echo "${1}" | cut -d '@' -f 1)
132124
STACK_VERSION=$(echo "${1}" | cut -d '@' -f 2)

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
oss2>=2.19.1
2+
packaging>=21.0

scripts/cleaner.py

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Git Repository Cleaner
4+
5+
This script provides various cleanup operations for git repositories and GitHub issues.
6+
7+
Usage:
8+
python cleaner.py tags -n 3 [--dry-run] [--confirm]
9+
python cleaner.py issues --max-issues 100 [--dry-run]
10+
"""
11+
12+
import argparse
13+
import subprocess
14+
import sys
15+
import os
16+
import requests
17+
from collections import defaultdict
18+
from typing import List, Dict, Tuple
19+
20+
21+
def run_command(command: List[str], dry_run: bool = False) -> str:
22+
"""Execute a shell command or print it in dry-run mode."""
23+
cmd_str = ' '.join(command)
24+
25+
if dry_run:
26+
print(f"[DRY RUN] Would execute: {cmd_str}")
27+
return ""
28+
29+
try:
30+
result = subprocess.run(command, capture_output=True, text=True, check=True)
31+
return result.stdout.strip()
32+
except subprocess.CalledProcessError as e:
33+
print(f"Error executing command: {cmd_str}")
34+
print(f"Error: {e.stderr}")
35+
return ""
36+
37+
38+
def get_all_tags() -> List[str]:
39+
"""Get all git tags from the repository."""
40+
try:
41+
result = subprocess.run(['git', 'tag', '-l'], capture_output=True, text=True, check=True)
42+
return result.stdout.strip().split('\n') if result.stdout.strip() else []
43+
except subprocess.CalledProcessError as e:
44+
print(f"Error getting tags: {e.stderr}")
45+
return []
46+
47+
48+
def parse_tag(tag: str) -> Tuple[str, str]:
49+
"""Parse a tag into stack name and version."""
50+
if '@' not in tag:
51+
raise ValueError(f"Invalid tag format: {tag}. Expected format: stack-name@version")
52+
53+
stack_name, version = tag.split('@', 1)
54+
if not stack_name or not version:
55+
raise ValueError(f"Invalid tag format: {tag}")
56+
57+
return stack_name, version
58+
59+
60+
def group_tags_by_stack(tags: List[str]) -> Dict[str, List[Tuple[str, str]]]:
61+
"""Group tags by stack name and sort versions for each stack."""
62+
stacks = defaultdict(list)
63+
64+
for tag in tags:
65+
try:
66+
stack_name, version = parse_tag(tag)
67+
stacks[stack_name].append((tag, version))
68+
except ValueError as e:
69+
print(f"Warning: {e}", file=sys.stderr)
70+
continue
71+
72+
# Sort versions for each stack (newest first)
73+
for stack_name in stacks:
74+
stacks[stack_name].sort(key=lambda x: x[1], reverse=True)
75+
76+
return stacks
77+
78+
79+
def get_tags_to_delete(stacks: Dict[str, List[Tuple[str, str]]], keep_count: int) -> List[str]:
80+
"""Determine which tags should be deleted based on keep_count."""
81+
tags_to_delete = []
82+
83+
for stack_name, tags in stacks.items():
84+
if len(tags) > keep_count:
85+
tags_to_delete.extend([tag[0] for tag in tags[keep_count:]])
86+
87+
return tags_to_delete
88+
89+
90+
def delete_tag(tag: str, dry_run: bool = False) -> bool:
91+
"""Delete a tag both locally and remotely."""
92+
print(f"Processing tag: {tag}")
93+
94+
# Delete local tag
95+
local_result = run_command(['git', 'tag', '-d', tag], dry_run)
96+
print(f"Delete local tag: {local_result}")
97+
# Delete remote tag
98+
remote_result = run_command(['git', 'push', 'origin', f':refs/tags/{tag}'], dry_run)
99+
print(f"Delete remote tag: {remote_result}")
100+
return True
101+
102+
103+
def clean_tags(args):
104+
"""Clean up old git tags for stacks."""
105+
if args.keep_count < 1:
106+
print("Error: keep-count must be at least 1", file=sys.stderr)
107+
sys.exit(1)
108+
109+
print(f"Git Tag Cleaner - Keeping {args.keep_count} latest versions per stack")
110+
111+
# Get all tags
112+
all_tags = get_all_tags()
113+
if not all_tags:
114+
print("No tags found in repository")
115+
return
116+
117+
print(f"Found {len(all_tags)} total tags")
118+
119+
# Group tags by stack
120+
stacks = group_tags_by_stack(all_tags)
121+
print(f"Found {len(stacks)} stacks")
122+
123+
# Show tag distribution
124+
for stack_name, tags in sorted(stacks.items()):
125+
print(f" {stack_name}: {len(tags)} versions")
126+
127+
# Get tags to delete
128+
tags_to_delete = get_tags_to_delete(stacks, args.keep_count)
129+
130+
if not tags_to_delete:
131+
print("No tags to delete - all stacks have <= {} versions".format(args.keep_count))
132+
return
133+
134+
print(f"\nFound {len(tags_to_delete)} tags to delete:")
135+
for tag in sorted(tags_to_delete):
136+
print(f" - {tag}")
137+
138+
# Confirm deletion
139+
if not args.dry_run and not args.confirm:
140+
response = input(f"\nDelete these {len(tags_to_delete)} tags? (y/N): ")
141+
if response.lower() != 'y':
142+
print("Aborted by user")
143+
return
144+
145+
# Delete tags
146+
success_count = 0
147+
for tag in tags_to_delete:
148+
try:
149+
delete_tag(tag, args.dry_run)
150+
success_count += 1
151+
except Exception as e:
152+
print(f"Error deleting tag {tag}: {e}", file=sys.stderr)
153+
154+
print(f"\nCompleted: {success_count}/{len(tags_to_delete)} tags processed")
155+
156+
157+
def clean_github_issues(args):
158+
"""Clean GitHub issues to keep only the most recent ones."""
159+
github_headers = {'Authorization': 'token %s' % os.environ.get("GITHUB_TOKEN")}
160+
161+
if not os.environ.get("GITHUB_TOKEN"):
162+
print("Error: GITHUB_TOKEN environment variable is required")
163+
return
164+
165+
max_issues = args.max_issues
166+
all_issues = _fetch_all_issues(github_headers)
167+
168+
if len(all_issues) <= max_issues:
169+
print(f"No issues to clean. Found {len(all_issues)} total issues, keeping {max_issues}")
170+
return
171+
172+
issues_to_process = all_issues[max_issues:]
173+
print(f"Found {len(all_issues)} total issues")
174+
print(f"Will keep {max_issues} most recent issues")
175+
print(f"Will close and delete {len(issues_to_process)} oldest issues")
176+
177+
for issue in issues_to_process:
178+
_process_issue_with_graphql(issue, args.dry_run, github_headers)
179+
180+
181+
def _fetch_all_issues(headers: Dict[str, str]) -> List[Dict]:
182+
"""Fetch all GitHub issues with pagination."""
183+
all_issues = []
184+
page = 1
185+
per_page = 100
186+
187+
while True:
188+
issues_url = "https://api.github.com/repos/drycc/stacks/issues"
189+
params = {
190+
'state': 'all',
191+
'sort': 'created',
192+
'direction': 'desc',
193+
'per_page': per_page,
194+
'page': page
195+
}
196+
197+
try:
198+
response = requests.get(issues_url, headers=headers, params=params)
199+
response.raise_for_status()
200+
issues = response.json()
201+
202+
if not issues:
203+
break
204+
205+
all_issues.extend(issues)
206+
page += 1
207+
208+
if page > 50: # Max 5000 issues
209+
break
210+
211+
except requests.exceptions.RequestException as e:
212+
print(f"Error fetching issues on page {page}: {e}")
213+
break
214+
215+
return all_issues
216+
217+
218+
def _process_issue_with_graphql(issue: Dict, dry_run: bool, headers: Dict[str, str]) -> None:
219+
"""Process a single issue using GraphQL API to delete by node_id."""
220+
import time
221+
222+
issue_number = issue['number']
223+
issue_title = issue['title']
224+
225+
if dry_run:
226+
print(f"[DRY RUN] Would delete issue #{issue_number}: {issue_title}")
227+
return
228+
229+
# First, get the node_id for the issue
230+
node_id = _get_issue_node_id(issue_number, headers)
231+
if not node_id:
232+
print(f"Could not get node_id for issue #{issue_number}, attempting to close instead")
233+
_close_issue(issue_number, issue_title, headers)
234+
return
235+
236+
# Use GraphQL to delete the issue
237+
graphql_url = "https://api.github.com/graphql"
238+
graphql_query = {
239+
"query": f'mutation {{ deleteIssue(input: {{issueId: "{node_id}"}}) {{ clientMutationId }} }}'
240+
}
241+
242+
try:
243+
response = requests.post(graphql_url, headers=headers, json=graphql_query)
244+
response.raise_for_status()
245+
246+
result = response.json()
247+
if 'errors' in result:
248+
error_message = result['errors'][0].get('message', 'Unknown error')
249+
print(f"GraphQL error deleting issue #{issue_number}: {error_message}")
250+
print(f"Attempting to close issue #{issue_number} instead")
251+
_close_issue(issue_number, issue_title, headers)
252+
else:
253+
print(f"Deleted issue #{issue_number}: {issue_title}")
254+
except requests.exceptions.RequestException as e:
255+
print(f"Error deleting issue #{issue_number} via GraphQL: {e}")
256+
print(f"Attempting to close issue #{issue_number} instead")
257+
_close_issue(issue_number, issue_title, headers)
258+
259+
time.sleep(0.2) # Rate limiting
260+
261+
262+
def _get_issue_node_id(issue_number: int, headers: Dict[str, str]) -> str:
263+
"""Get the node_id for a specific issue number."""
264+
issue_url = f"https://api.github.com/repos/drycc/stacks/issues/{issue_number}"
265+
266+
try:
267+
response = requests.get(issue_url, headers=headers)
268+
response.raise_for_status()
269+
issue_data = response.json()
270+
return issue_data.get('node_id')
271+
except requests.exceptions.RequestException as e:
272+
print(f"Error fetching node_id for issue #{issue_number}: {e}")
273+
return None
274+
275+
276+
def _close_issue(issue_number: int, issue_title: str, headers: Dict[str, str]) -> None:
277+
"""Close an issue if deletion fails."""
278+
close_url = f"https://api.github.com/repos/drycc/stacks/issues/{issue_number}"
279+
close_data = {'state': 'closed'}
280+
281+
try:
282+
close_response = requests.patch(close_url, headers=headers, json=close_data)
283+
close_response.raise_for_status()
284+
print(f"Closed issue #{issue_number}: {issue_title} (could not delete)")
285+
except requests.exceptions.RequestException as e:
286+
print(f"Error closing issue #{issue_number}: {e}")
287+
288+
289+
def main():
290+
parser = argparse.ArgumentParser(
291+
description='Git repository cleaner with various cleanup operations',
292+
formatter_class=argparse.RawDescriptionHelpFormatter,
293+
epilog="""
294+
Examples:
295+
python cleaner.py tags -n 3 # Keep 3 latest versions per stack
296+
python cleaner.py tags -n 5 --dry-run # Preview tag cleanup
297+
python cleaner.py tags -n 2 --confirm # Skip confirmation prompt
298+
python cleaner.py issues --max-issues 50 # Keep 50 most recent issues
299+
python cleaner.py issues --dry-run # Preview issue cleanup
300+
301+
Note: Issues cleanup requires admin permissions to delete issues.
302+
If deletion fails, issues will be closed instead.
303+
"""
304+
)
305+
306+
subparsers = parser.add_subparsers(dest='action', help='Cleanup action to perform')
307+
308+
# Tags cleanup parser
309+
tags_parser = subparsers.add_parser('tags', help='Clean up old git tags')
310+
tags_parser.add_argument('-n', '--keep-count', type=int, required=True,
311+
help='Number of latest versions to keep for each stack')
312+
tags_parser.add_argument('--dry-run', action='store_true',
313+
help='Only print commands without executing them')
314+
tags_parser.add_argument('--confirm', action='store_true',
315+
help='Skip confirmation prompt and execute directly')
316+
317+
# Issues cleanup parser
318+
issues_parser = subparsers.add_parser('issues', help='Clean up GitHub issues')
319+
issues_parser.add_argument('--max-issues', type=int, default=100,
320+
help='Maximum number of issues to keep (default: 100)')
321+
issues_parser.add_argument('--dry-run', action='store_true',
322+
help='Only print commands without executing them')
323+
324+
args = parser.parse_args()
325+
326+
if not args.action:
327+
parser.print_help()
328+
sys.exit(1)
329+
330+
if args.action == 'tags':
331+
clean_tags(args)
332+
elif args.action == 'issues':
333+
clean_github_issues(args)
334+
else:
335+
parser.error(f"Unknown action: {args.action}")
336+
337+
338+
if __name__ == '__main__':
339+
main()

0 commit comments

Comments
 (0)