diff --git a/README.md b/README.md
index 053bfff..da9da09 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
# GitHub Reporting Tool 👨💻 📊 🐍
+
> Create detailed and summary CSV reports of activity by a GitHub user, using the GraphQL API
[](https://github.com/MichaelCurrin/github-reporting-py/tags/)
@@ -7,6 +8,20 @@
[](https://python.org)
[](https://graphql.github.io/)
+## Quick start
+
+Say you just want to get a CSV file of all your commits on the deafult branch of a repo.
+
+1. `cd ghgql`
+2. Set your GitHub access token in `etc/app.local.yml`
+3. Run `python config.py`
+4. Run your commit query via
+
+```bash
+python repo_commits.py REPO_OWNER REPO_NAME COMMITTER -o OUTPUT_DIR -s START_DATE -e END_DATE
+```
+
+This app is currently limited to querying commits from the default branch of a repo.
## Purpose
@@ -17,7 +32,6 @@ This tool was created to:
- Act as a **wrapper** on requests and formatting, so you can focus on writing or using a query and getting the data out as a CSV.
- Act an easy CLI for anyone - without caring about what language the tool is implemented in (other than installing initially).
-
## Documentation
@@ -26,12 +40,10 @@ This tool was created to:
-
## Contributing
If you want to make the project better, see the [contribution guidelines](/CONTRIBUTING.md).
-
## License
Released under [MIT](/LICENSE) by [@MichaelCurrin](https://github.com/MichaelCurrin/).
diff --git a/ghgql/demo/variables.py b/ghgql/demo/variables.py
index 9fbc580..a74ce72 100644
--- a/ghgql/demo/variables.py
+++ b/ghgql/demo/variables.py
@@ -64,7 +64,7 @@
headers = {"Authorization": f"token {config.ACCESS_TOKEN}"}
# Send the POST request.
-resp = requests.post(config.BASE_URL, json=payload, headers=headers)
+resp = requests.post(config.BASE_URL, json=payload, headers=headers, timeout=10)
# Pretty print the output.
prettified = json.dumps(resp.json(), indent=4)
diff --git a/ghgql/lib/__init__.py b/ghgql/lib/__init__.py
index 602d692..f4b1baf 100644
--- a/ghgql/lib/__init__.py
+++ b/ghgql/lib/__init__.py
@@ -30,12 +30,16 @@
HEADERS = {"Authorization": f"token {config.ACCESS_TOKEN}"}
MAX_ATTEMPTS = 3
-dict_of_str = dict[str, str]
-list_of_str = list[str]
+DictOfStr = dict[str, str]
+ListOfStr = list[str]
-def _request(url: str, payload: dict_of_str, headers: dict_of_str):
- resp = requests.post(url, json=payload, headers=headers)
+def _request(url: str, payload: DictOfStr, headers: DictOfStr):
+ resp = requests.post(
+ url,
+ json=payload,
+ headers=headers,
+ timeout=10)
resp_json = resp.json()
resp_msg = resp_json.get("message", None)
@@ -66,7 +70,7 @@ def _request(url: str, payload: dict_of_str, headers: dict_of_str):
return resp_json
-def fetch_github_data(query: str, variables=None) -> dict_of_str:
+def fetch_github_data(query: str, variables={}) -> DictOfStr:
"""
Get data from GitHub API using given parameters.
@@ -74,9 +78,6 @@ def fetch_github_data(query: str, variables=None) -> dict_of_str:
might still contain some data. A 404 will not contain the data or errors
keys.
"""
- if not variables:
- variables = {}
-
payload = {
"query": query,
"variables": variables,
@@ -99,9 +100,9 @@ def fetch_github_data(query: str, variables=None) -> dict_of_str:
# TODO: Sleep for set time or perhaps short time if too frequent
# between requests.
- seconds = 10
+ seconds = 1
text.eprint(f"Sleeping {seconds} s...")
- sleep(seconds * 1000)
+ sleep(seconds)
text.eprint("Retrying...")
else:
break
@@ -113,7 +114,7 @@ def read_file(path: Path):
"""
TODO: Refactor to use Path().read() instead.
"""
- with open(path) as f_in:
+ with open(path, encoding='utf8') as f_in:
file_text = f_in.read()
return file_text
@@ -131,7 +132,7 @@ def write_file(content, path: Path):
print("Writing")
print(f" - path: {path}")
- with open(path, "w") as f_out:
+ with open(path, "w", encoding='utf8') as f_out:
f_out.writelines(content)
@@ -144,24 +145,24 @@ def read_template(path: Path):
# TODO Rename to path.
# TODO Refactor so the file only has to be read once for a set of paged queries.
-def query_by_filename(path: Path, variables=None):
- if not variables:
- variables = {}
-
+def query_by_filename(path: Path, variables={}):
+ """
+ Use query file `path` and `variables` to make a query.
+ """
query = read_file(path)
- resp = fetch_github_data(query, variables)
- return resp
+ return fetch_github_data(query, variables)
def read_csv(path: Path):
- with open(path) as f_in:
- reader = csv.DictReader(f_in)
-
- return list(reader)
+ """
+ Read a CSV file.
+ """
+ with open(path, "r", encoding='utf8') as f_in:
+ return list(csv.DictReader(f_in))
-def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
+def write_csv(path: Path, rows: list[DictOfStr], append=False) -> None:
"""
Write a CSV file to a path with given rows and header from first row.
@@ -178,10 +179,9 @@ def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
is_new_file = not path.exists()
mode = "a" if append else "w"
-
fieldnames = list(rows[0].keys())
- with open(path, mode) as f_out:
+ with open(path, mode, encoding='utf8') as f_out:
writer = csv.DictWriter(f_out, fieldnames)
if is_new_file or not append:
@@ -195,7 +195,7 @@ def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
print()
-def process_variables(args: list_of_str) -> dict_of_str:
+def process_variables(args: ListOfStr) -> DictOfStr:
"""
Process command-line arguments containing a filename and key-value pairs.
"""
@@ -222,7 +222,7 @@ def process_variables(args: list_of_str) -> dict_of_str:
return {}
-def process_args(args: list_of_str):
+def process_args(args: ListOfStr):
"""
Process command-line arguments containing a filename and key-value pairs.
diff --git a/ghgql/lib/git.py b/ghgql/lib/git.py
index 485538c..435f5df 100644
--- a/ghgql/lib/git.py
+++ b/ghgql/lib/git.py
@@ -9,7 +9,7 @@
# TODO This could be better as a class - then the structure can be reused
# and used for type checking as a whole or getting fields on the object.
# Use init to process `value`.
-def parse_commit(value: dict):
+def parse_commit(value: dict, verbose=False):
"""
Extract relevant fields from nested data and return as a flat dict.
"""
@@ -21,23 +21,31 @@ def parse_commit(value: dict):
committer_login = committer["login"] if committer is not None else None
commit_date = time.as_date(value["committedDate"])
- return dict(
- commit_id=value["abbreviatedOid"],
- author_date=author_date,
- author_login=author_login,
- committed_date=commit_date,
- committer_login=committer_login,
- changed_files=value["changedFiles"],
- additions=value["additions"],
- deletions=value["deletions"],
- message=value["message"],
- )
-
-
-def prepare_row(commit: dict, repo_name: str, branch_name: str):
+ if verbose:
+ return dict(
+ commit_id=value["abbreviatedOid"],
+ author_date=author_date,
+ author_login=author_login,
+ committed_date=commit_date,
+ committer_login=committer_login,
+ changed_files=value["changedFiles"],
+ additions=value["additions"],
+ deletions=value["deletions"],
+ message=value["message"],
+ )
+ else:
+ return dict(
+ commit_id=value["abbreviatedOid"],
+ committed_date=commit_date,
+ committer_login=committer_login,
+ message=value["message"],
+ )
+
+
+def prepare_row(commit: dict, repo_name: str, branch_name: str, verbose=False):
"""
Convert commit metadata to a dict for writing to a CSV.
"""
- parsed_commit_data = parse_commit(commit)
+ parsed_commit_data = parse_commit(commit, verbose)
return dict(repo_name=repo_name, branch_name=branch_name, **parsed_commit_data)
diff --git a/ghgql/queries/repos/repo_commits_branch.gql b/ghgql/queries/repos/repo_commits_branch.gql
new file mode 100644
index 0000000..1bc187f
--- /dev/null
+++ b/ghgql/queries/repos/repo_commits_branch.gql
@@ -0,0 +1,39 @@
+# Get details of all commits for a single repo and optional date range,
+# using paging to get all commits.
+query CommitsForRepo($owner: String!, $repo_name: String!, $branch_name: String!, $since: GitTimestamp, $cursor: String) {
+ repository(owner: $owner, name: $repo_name) {
+ ref(qualifiedName: $branch_name) {
+ name
+ target {
+ ... on Commit {
+ history(since: $since, first: 100, after: $cursor) {
+ totalCount
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ nodes {
+ abbreviatedOid
+ authoredDate
+ author {
+ user {
+ login
+ }
+ }
+ committedDate
+ committer {
+ user {
+ login
+ }
+ }
+ changedFiles
+ additions
+ deletions
+ message
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/ghgql/repo_commits.py b/ghgql/repo_commits.py
index 9b033a9..8e0cce3 100755
--- a/ghgql/repo_commits.py
+++ b/ghgql/repo_commits.py
@@ -3,7 +3,7 @@
Repo commits report application.
Fetch all commits for a single given repo using paging. Accepts an optional
-start date.
+committer, output dir, start date, end date, and verbosity.
"""
import argparse
import datetime
@@ -14,53 +14,65 @@
import lib.git
QUERY_PATH = Path("queries/repos/repo_commits.gql")
-CSV_OUT_NAME = "repo-commits--{repo_name}--end-{end_date}--start-{start_date}.csv"
+QUERY_PATH_BRANCH = Path("queries/repos/repo_commits_branch.gql")
+CSV_OUT_NAME = "{owner}--{repo}--start-{start}--end-{end}.csv"
+CSV_OUT_NAME_BRANCH = "{owner}--{repo}--{branch}--start-{start}--end-{end}.csv"
-def parse(resp: dict):
+
+def parse(resp: dict, branch_name):
"""
Parse response data for the repo commits query.
"""
- branch = resp["repository"]["defaultBranchRef"]
-
+ key = "ref" if branch_name else "defaultBranchRef"
+ branch = resp["repository"][key]
branch_name = branch.get("name")
-
commit_history = branch["target"]["history"]
total_commits = commit_history["totalCount"]
commits = commit_history["nodes"]
page_info = commit_history["pageInfo"]
-
cursor = page_info["endCursor"] if page_info["hasNextPage"] else None
return branch_name, total_commits, commits, cursor
-def process_response(resp: dict, repo_name: str):
+def process_response(resp: dict, repo_name: str, branch_name: str, verbose=False):
"""
Format the response from a request for repo commits.
"""
- branch_name, total_commits, commits, cursor = parse(resp)
-
+ branch_name, total_commits, commits, cursor = parse(resp, branch_name)
processed_commits = [
- lib.git.prepare_row(c, repo_name, branch_name) for c in commits
+ lib.git.prepare_row(c, repo_name, branch_name, verbose) for c in commits
]
return processed_commits, total_commits, cursor
-def get_commits(owner: str, repo_name: str, start_date=None) -> list[dict]:
+def get_commits(
+ owner: str,
+ repo_name: str,
+ branch_name=None,
+ committer=None,
+ start_date=None,
+ end_date=None,
+ verbose=False) -> list[dict]:
"""
Fetch all commits for a given repo and an optional start date.
Uses paging if there is more than 1 page of 100 commits to fetch. Returns a
list of zero or more dict objects with commit data.
"""
- print("/".join((owner, repo_name)))
+ if branch_name:
+ print("/".join((owner, repo_name, branch_name)))
+ else:
+ print("/".join((owner, repo_name)))
since = lib.time.as_git_timestamp(start_date) if start_date else None
+ before = lib.time.as_git_timestamp(end_date) if end_date else None
query_variables = dict(
owner=owner,
repo_name=repo_name,
+ branch_name=branch_name,
since=since,
)
@@ -70,37 +82,71 @@ def get_commits(owner: str, repo_name: str, start_date=None) -> list[dict]:
while True:
counter += 1
- resp = lib.query_by_filename(QUERY_PATH, query_variables)
- commits, total_commits, cursor = process_response(resp, repo_name)
+ query_path = QUERY_PATH_BRANCH if branch_name else QUERY_PATH
+ resp = lib.query_by_filename(query_path, query_variables)
+ commits, _, cursor = process_response(resp, repo_name, branch_name, verbose)
results.extend(commits)
- if counter == 1:
- print(f" - commits: {total_commits}")
- print(f" - pages: {math.ceil(total_commits / 100)}")
- print(f"Processed page: #{counter}")
-
if cursor:
query_variables["cursor"] = cursor
else:
break
+ if committer:
+ results = list(filter(
+ lambda res: res["committer_login"] == committer,
+ results
+ ))
+
+ if before:
+ before = lib.time.as_date(before)
+ results = list(filter(
+ lambda res: res["committed_date"] < before,
+ results
+ ))
+
+ if counter == 1:
+ total_commits = len(results)
+ print(f" - commits: {total_commits}")
+ print(f" - pages: {math.ceil(total_commits / 100)}")
+ print(f"Processed page: #{counter}")
+
return results
-def commits_to_csv(owner, repo_name, start_date=None):
+def commits_to_csv(
+ owner,
+ repo_name,
+ branch_name=None,
+ committer=None,
+ output_dir=None,
+ start_date="START",
+ end_date=datetime.date.today(),
+ verbose=False):
"""
Write a CSV of all commits in a repo.
Existing file will be overwritten.
"""
- filename = CSV_OUT_NAME.format(
- repo_name=repo_name,
- end_date=datetime.date.today(),
- start_date=start_date if start_date else "INIT",
+ filename = CSV_OUT_NAME_BRANCH if branch_name else CSV_OUT_NAME
+ filename = filename.format(
+ owner=owner,
+ repo=repo_name,
+ branch=branch_name,
+ start=start_date,
+ end=end_date,
+ )
+ output_dir = Path(output_dir) if output_dir else lib.VAR_DIR
+ path = output_dir / filename
+ repo_commits = get_commits(
+ owner,
+ repo_name,
+ branch_name,
+ committer,
+ start_date,
+ end_date,
+ verbose,
)
- path = lib.VAR_DIR / filename
-
- repo_commits = get_commits(owner, repo_name, start_date)
lib.write_csv(path, repo_commits, append=False)
@@ -114,10 +160,29 @@ def main():
parser.add_argument(
"owner",
metavar="OWNER",
+ help="Owner of the repository."
+ )
+ parser.add_argument(
+ "repo",
+ metavar="REPO",
+ help="Repository name."
+ )
+ parser.add_argument(
+ "committer",
+ metavar="COMMITTER",
+ help="Username of the commit author."
+ )
+ parser.add_argument(
+ "-b",
+ "--branch",
+ metavar="BRANCH",
+ help="Specific branch to pull commits from.",
)
parser.add_argument(
- "repo_name",
- metavar="REPO_NAME",
+ "-o",
+ "--output-dir",
+ metavar="DIR",
+ help="Directory in which to write the csv file.",
)
parser.add_argument(
"-s",
@@ -126,12 +191,29 @@ def main():
help="Optionally filter to commits from this date onwards."
" Format: 'YYYY-MM-DD'.",
)
+ parser.add_argument(
+ "-e",
+ "--end",
+ metavar="DATE",
+ help="Optionally filter to commits strictly before this date."
+ " Format: 'YYYY-MM-DD'.",
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ help="Verbose commit output.",
+ )
args = parser.parse_args()
commits_to_csv(
args.owner,
- args.repo_name,
+ args.repo,
+ args.branch,
+ args.committer,
+ args.output_dir,
args.start,
+ args.end,
+ args.verbose,
)
diff --git a/ghgql/repos_and_commits.py b/ghgql/repos_and_commits.py
index a5ce5d3..2ee672a 100755
--- a/ghgql/repos_and_commits.py
+++ b/ghgql/repos_and_commits.py
@@ -46,7 +46,7 @@ def render(template, owner, repos, since, dry_run=False):
"""
Prepare and return template for repo commits query.
"""
- return template.render(owner=owner, repos=repos, since=since, dry_run=dry_run)
+ return template.render(owner, repos, since, dry_run)
def process_results(results):
@@ -63,10 +63,9 @@ def process_results(results):
for repo_data in results.values():
name = repo_data["name"]
branch = repo_data["defaultBranchRef"]
-
branch_name = branch.get("name")
-
raw_commits = branch["target"]["history"]["nodes"]
+
if raw_commits:
for c in raw_commits:
parsed_commit_data = lib.git.parse_commit(c)
@@ -99,9 +98,12 @@ def get_results(template, owner, repos, since, dry_run):
def write(path, rows):
+ """
+ Write `rows` to the file `path`.
+ """
wrote_header = False
- with open(path, "w") as f_out:
+ with open(path, "w", encoding='utf8') as f_out:
fieldnames = (
"repo_name",
"branch_name",
diff --git a/ghgql/repos_recent_commits.py b/ghgql/repos_recent_commits.py
index 108fb31..f10f906 100755
--- a/ghgql/repos_recent_commits.py
+++ b/ghgql/repos_recent_commits.py
@@ -9,7 +9,7 @@
# TODO Since param
# TODO: Refactor - move this function to lib.
-def parse_commit(value):
+def parse_commit(value, verbose=False):
"""
Extract fields from nested data as returned from API and return as flat dict.
"""
@@ -21,15 +21,23 @@ def parse_commit(value):
committer_login = committer["login"] if committer is not None else None
commit_date = lib.time.as_date(value["committedDate"])
- return dict(
+ if verbose:
+ return dict(
+ commit_id=value["abbreviatedOid"],
+ author_date=author_date,
+ author_login=author_login,
+ committed_date=commit_date,
+ committer_login=committer_login,
+ changed_files=value["changedFiles"],
+ additions=value["additions"],
+ deletions=value["deletions"],
+ message=value["message"],
+ )
+ else:
+ return dict(
commit_id=value["abbreviatedOid"],
- author_date=author_date,
- author_login=author_login,
committed_date=commit_date,
committer_login=committer_login,
- changed_files=value["changedFiles"],
- additions=value["additions"],
- deletions=value["deletions"],
message=value["message"],
)