Skip to content

Commit reporting #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# GitHub Reporting Tool 👨‍💻 📊 🐍

> Create detailed and summary CSV reports of activity by a GitHub user, using the GraphQL API

[![GitHub tag](https://img.shields.io/github/tag/MichaelCurrin/github-reporting-py)](https://github.com/MichaelCurrin/github-reporting-py/tags/)
Expand All @@ -7,6 +8,20 @@
[![Made with Python](https://img.shields.io/badge/Python->=3.9-blue?logo=python&logoColor=white)](https://python.org)
[![API - GitHub GraphQL](https://img.shields.io/badge/GitHub_API-V4_GraphQL-blue?logo=github)](https://graphql.github.io/)

## Quick start

Say you just want to get a CSV file of all your commits on the deafult branch of a repo.

1. `cd ghgql`
2. Set your GitHub access token in `etc/app.local.yml`
3. Run `python config.py`
4. Run your commit query via

```bash
python repo_commits.py REPO_OWNER REPO_NAME COMMITTER -o OUTPUT_DIR -s START_DATE -e END_DATE
```

This app is currently limited to querying commits from the default branch of a repo.

## Purpose

Expand All @@ -17,7 +32,6 @@ This tool was created to:
- Act as a **wrapper** on requests and formatting, so you can focus on writing or using a query and getting the data out as a CSV.
- Act an easy CLI for anyone - without caring about what language the tool is implemented in (other than installing initially).


## Documentation

<div align="center">
Expand All @@ -26,12 +40,10 @@ This tool was created to:

</div>


## Contributing

If you want to make the project better, see the [contribution guidelines](/CONTRIBUTING.md).


## License

Released under [MIT](/LICENSE) by [@MichaelCurrin](https://github.com/MichaelCurrin/).
2 changes: 1 addition & 1 deletion ghgql/demo/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
headers = {"Authorization": f"token {config.ACCESS_TOKEN}"}

# Send the POST request.
resp = requests.post(config.BASE_URL, json=payload, headers=headers)
resp = requests.post(config.BASE_URL, json=payload, headers=headers, timeout=10)

# Pretty print the output.
prettified = json.dumps(resp.json(), indent=4)
Expand Down
54 changes: 27 additions & 27 deletions ghgql/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@
HEADERS = {"Authorization": f"token {config.ACCESS_TOKEN}"}
MAX_ATTEMPTS = 3

dict_of_str = dict[str, str]
list_of_str = list[str]
DictOfStr = dict[str, str]
ListOfStr = list[str]


def _request(url: str, payload: dict_of_str, headers: dict_of_str):
resp = requests.post(url, json=payload, headers=headers)
def _request(url: str, payload: DictOfStr, headers: DictOfStr):
resp = requests.post(
url,
json=payload,
headers=headers,
timeout=10)
resp_json = resp.json()

resp_msg = resp_json.get("message", None)
Expand Down Expand Up @@ -66,17 +70,14 @@ def _request(url: str, payload: dict_of_str, headers: dict_of_str):
return resp_json


def fetch_github_data(query: str, variables=None) -> dict_of_str:
def fetch_github_data(query: str, variables={}) -> DictOfStr:
"""
Get data from GitHub API using given parameters.

Note that a request which returns an error will still give a 200 and can
might still contain some data. A 404 will not contain the data or errors
keys.
"""
if not variables:
variables = {}

payload = {
"query": query,
"variables": variables,
Expand All @@ -99,9 +100,9 @@ def fetch_github_data(query: str, variables=None) -> dict_of_str:

# TODO: Sleep for set time or perhaps short time if too frequent
# between requests.
seconds = 10
seconds = 1
text.eprint(f"Sleeping {seconds} s...")
sleep(seconds * 1000)
sleep(seconds)
text.eprint("Retrying...")
else:
break
Expand All @@ -113,7 +114,7 @@ def read_file(path: Path):
"""
TODO: Refactor to use Path().read() instead.
"""
with open(path) as f_in:
with open(path, encoding='utf8') as f_in:
file_text = f_in.read()

return file_text
Expand All @@ -131,7 +132,7 @@ def write_file(content, path: Path):
print("Writing")
print(f" - path: {path}")

with open(path, "w") as f_out:
with open(path, "w", encoding='utf8') as f_out:
f_out.writelines(content)


Expand All @@ -144,24 +145,24 @@ def read_template(path: Path):

# TODO Rename to path.
# TODO Refactor so the file only has to be read once for a set of paged queries.
def query_by_filename(path: Path, variables=None):
if not variables:
variables = {}

def query_by_filename(path: Path, variables={}):
"""
Use query file `path` and `variables` to make a query.
"""
query = read_file(path)
resp = fetch_github_data(query, variables)

return resp
return fetch_github_data(query, variables)


def read_csv(path: Path):
with open(path) as f_in:
reader = csv.DictReader(f_in)

return list(reader)
"""
Read a CSV file.
"""
with open(path, "r", encoding='utf8') as f_in:
return list(csv.DictReader(f_in))


def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
def write_csv(path: Path, rows: list[DictOfStr], append=False) -> None:
"""
Write a CSV file to a path with given rows and header from first row.

Expand All @@ -178,10 +179,9 @@ def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:

is_new_file = not path.exists()
mode = "a" if append else "w"

fieldnames = list(rows[0].keys())

with open(path, mode) as f_out:
with open(path, mode, encoding='utf8') as f_out:
writer = csv.DictWriter(f_out, fieldnames)

if is_new_file or not append:
Expand All @@ -195,7 +195,7 @@ def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
print()


def process_variables(args: list_of_str) -> dict_of_str:
def process_variables(args: ListOfStr) -> DictOfStr:
"""
Process command-line arguments containing a filename and key-value pairs.
"""
Expand All @@ -222,7 +222,7 @@ def process_variables(args: list_of_str) -> dict_of_str:
return {}


def process_args(args: list_of_str):
def process_args(args: ListOfStr):
"""
Process command-line arguments containing a filename and key-value pairs.

Expand Down
40 changes: 24 additions & 16 deletions ghgql/lib/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# TODO This could be better as a class - then the structure can be reused
# and used for type checking as a whole or getting fields on the object.
# Use init to process `value`.
def parse_commit(value: dict):
def parse_commit(value: dict, verbose=False):
"""
Extract relevant fields from nested data and return as a flat dict.
"""
Expand All @@ -21,23 +21,31 @@ def parse_commit(value: dict):
committer_login = committer["login"] if committer is not None else None
commit_date = time.as_date(value["committedDate"])

return dict(
commit_id=value["abbreviatedOid"],
author_date=author_date,
author_login=author_login,
committed_date=commit_date,
committer_login=committer_login,
changed_files=value["changedFiles"],
additions=value["additions"],
deletions=value["deletions"],
message=value["message"],
)


def prepare_row(commit: dict, repo_name: str, branch_name: str):
if verbose:
return dict(
commit_id=value["abbreviatedOid"],
author_date=author_date,
author_login=author_login,
committed_date=commit_date,
committer_login=committer_login,
changed_files=value["changedFiles"],
additions=value["additions"],
deletions=value["deletions"],
message=value["message"],
)
else:
return dict(
commit_id=value["abbreviatedOid"],
committed_date=commit_date,
committer_login=committer_login,
message=value["message"],
)


def prepare_row(commit: dict, repo_name: str, branch_name: str, verbose=False):
"""
Convert commit metadata to a dict for writing to a CSV.
"""
parsed_commit_data = parse_commit(commit)
parsed_commit_data = parse_commit(commit, verbose)

return dict(repo_name=repo_name, branch_name=branch_name, **parsed_commit_data)
39 changes: 39 additions & 0 deletions ghgql/queries/repos/repo_commits_branch.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Get details of all commits for a single repo and optional date range,
# using paging to get all commits.
query CommitsForRepo($owner: String!, $repo_name: String!, $branch_name: String!, $since: GitTimestamp, $cursor: String) {
repository(owner: $owner, name: $repo_name) {
ref(qualifiedName: $branch_name) {
name
target {
... on Commit {
history(since: $since, first: 100, after: $cursor) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
abbreviatedOid
authoredDate
author {
user {
login
}
}
committedDate
committer {
user {
login
}
}
changedFiles
additions
deletions
message
}
}
}
}
}
}
}
Loading