MichaelCurrin · Isaac-DeFrain · Aug 5, 2023 · Aug 5, 2023 · Aug 5, 2023 · Sep 9, 2023
diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
 # GitHub Reporting Tool 👨‍💻 📊 🐍
+
 > Create detailed and summary CSV reports of activity by a GitHub user, using the GraphQL API
 
 [![GitHub tag](https://img.shields.io/github/tag/MichaelCurrin/github-reporting-py)](https://github.com/MichaelCurrin/github-reporting-py/tags/)
@@ -7,6 +8,20 @@
 [![Made with Python](https://img.shields.io/badge/Python->=3.9-blue?logo=python&logoColor=white)](https://python.org)
 [![API - GitHub GraphQL](https://img.shields.io/badge/GitHub_API-V4_GraphQL-blue?logo=github)](https://graphql.github.io/)
 
+## Quick start
+
+Say you just want to get a CSV file of all your commits on the deafult branch of a repo.
+
+1. `cd ghgql`
+2. Set your GitHub access token in `etc/app.local.yml`
+3. Run `python config.py`
+4. Run your commit query via
+
+```bash
+python repo_commits.py REPO_OWNER REPO_NAME COMMITTER -o OUTPUT_DIR -s START_DATE -e END_DATE
+```
+
+This app is currently limited to querying commits from the default branch of a repo.
 
 ## Purpose
 
@@ -17,7 +32,6 @@ This tool was created to:
 - Act as a **wrapper** on requests and formatting, so you can focus on writing or using a query and getting the data out as a CSV.
 - Act an easy CLI for anyone - without caring about what language the tool is implemented in (other than installing initially).
 
-
 ## Documentation
 
 <div align="center">
@@ -26,12 +40,10 @@ This tool was created to:
 
 </div>
 
-
 ## Contributing
 
 If you want to make the project better, see the [contribution guidelines](/CONTRIBUTING.md).
 
-
 ## License
 
 Released under [MIT](/LICENSE) by [@MichaelCurrin](https://github.com/MichaelCurrin/).
diff --git a/ghgql/demo/variables.py b/ghgql/demo/variables.py
@@ -64,7 +64,7 @@
 headers = {"Authorization": f"token {config.ACCESS_TOKEN}"}
 
 # Send the POST request.
-resp = requests.post(config.BASE_URL, json=payload, headers=headers)
+resp = requests.post(config.BASE_URL, json=payload, headers=headers, timeout=10)
 
 # Pretty print the output.
 prettified = json.dumps(resp.json(), indent=4)

diff --git a/ghgql/lib/__init__.py b/ghgql/lib/__init__.py
@@ -30,12 +30,16 @@
 HEADERS = {"Authorization": f"token {config.ACCESS_TOKEN}"}
 MAX_ATTEMPTS = 3
 
-dict_of_str = dict[str, str]
-list_of_str = list[str]
+DictOfStr = dict[str, str]
+ListOfStr = list[str]
 
 
-def _request(url: str, payload: dict_of_str, headers: dict_of_str):
-    resp = requests.post(url, json=payload, headers=headers)
+def _request(url: str, payload: DictOfStr, headers: DictOfStr):
+    resp = requests.post(
+        url,
+        json=payload,
+        headers=headers,
+        timeout=10)
     resp_json = resp.json()
 
     resp_msg = resp_json.get("message", None)
@@ -66,17 +70,14 @@ def _request(url: str, payload: dict_of_str, headers: dict_of_str):
     return resp_json
 
 
-def fetch_github_data(query: str, variables=None) -> dict_of_str:
+def fetch_github_data(query: str, variables={}) -> DictOfStr:
     """
     Get data from GitHub API using given parameters.
 
     Note that a request which returns an error will still give a 200 and can
     might still contain some data. A 404 will not contain the data or errors
     keys.
     """
-    if not variables:
-        variables = {}
-
     payload = {
         "query": query,
         "variables": variables,
@@ -99,9 +100,9 @@ def fetch_github_data(query: str, variables=None) -> dict_of_str:
 
             # TODO: Sleep for set time or perhaps short time if too frequent
             # between requests.
-            seconds = 10
+            seconds = 1
             text.eprint(f"Sleeping {seconds} s...")
-            sleep(seconds * 1000)
+            sleep(seconds)
             text.eprint("Retrying...")
         else:
             break
@@ -113,7 +114,7 @@ def read_file(path: Path):
     """
     TODO: Refactor to use Path().read() instead.
     """
-    with open(path) as f_in:
+    with open(path, encoding='utf8') as f_in:
         file_text = f_in.read()
 
     return file_text
@@ -131,7 +132,7 @@ def write_file(content, path: Path):
     print("Writing")
     print(f" - path: {path}")
 
-    with open(path, "w") as f_out:
+    with open(path, "w", encoding='utf8') as f_out:
         f_out.writelines(content)
 
 
@@ -144,24 +145,24 @@ def read_template(path: Path):
 
 # TODO Rename to path.
 # TODO Refactor so the file only has to be read once for a set of paged queries.
-def query_by_filename(path: Path, variables=None):
-    if not variables:
-        variables = {}
-
+def query_by_filename(path: Path, variables={}):
+    """
+    Use query file `path` and `variables` to make a query.
+    """
     query = read_file(path)
-    resp = fetch_github_data(query, variables)
 
-    return resp
+    return fetch_github_data(query, variables)
 
 
 def read_csv(path: Path):
-    with open(path) as f_in:
-        reader = csv.DictReader(f_in)
-
-        return list(reader)
+    """
+    Read a CSV file.
+    """
+    with open(path, "r", encoding='utf8') as f_in:
+        return list(csv.DictReader(f_in))
 
 
-def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
+def write_csv(path: Path, rows: list[DictOfStr], append=False) -> None:
     """
     Write a CSV file to a path with given rows and header from first row.
 
@@ -178,10 +179,9 @@ def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
 
     is_new_file = not path.exists()
     mode = "a" if append else "w"
-
     fieldnames = list(rows[0].keys())
 
-    with open(path, mode) as f_out:
+    with open(path, mode, encoding='utf8') as f_out:
         writer = csv.DictWriter(f_out, fieldnames)
 
         if is_new_file or not append:
@@ -195,7 +195,7 @@ def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
     print()
 
 
-def process_variables(args: list_of_str) -> dict_of_str:
+def process_variables(args: ListOfStr) -> DictOfStr:
     """
     Process command-line arguments containing a filename and key-value pairs.
     """
@@ -222,7 +222,7 @@ def process_variables(args: list_of_str) -> dict_of_str:
     return {}
 
 
-def process_args(args: list_of_str):
+def process_args(args: ListOfStr):
     """
     Process command-line arguments containing a filename and key-value pairs.
 

diff --git a/ghgql/lib/git.py b/ghgql/lib/git.py
@@ -9,7 +9,7 @@
 # TODO This could be better as a class - then the structure can be reused
 # and used for type checking as a whole or getting fields on the object.
 # Use init to process `value`.
-def parse_commit(value: dict):
+def parse_commit(value: dict, verbose=False):
     """
     Extract relevant fields from nested data and return as a flat dict.
     """
@@ -21,23 +21,31 @@ def parse_commit(value: dict):
     committer_login = committer["login"] if committer is not None else None
     commit_date = time.as_date(value["committedDate"])
 
-    return dict(
-        commit_id=value["abbreviatedOid"],
-        author_date=author_date,
-        author_login=author_login,
-        committed_date=commit_date,
-        committer_login=committer_login,
-        changed_files=value["changedFiles"],
-        additions=value["additions"],
-        deletions=value["deletions"],
-        message=value["message"],
-    )
-
-
-def prepare_row(commit: dict, repo_name: str, branch_name: str):
+    if verbose:
+        return dict(
+            commit_id=value["abbreviatedOid"],
+            author_date=author_date,
+            author_login=author_login,
+            committed_date=commit_date,
+            committer_login=committer_login,
+            changed_files=value["changedFiles"],
+            additions=value["additions"],
+            deletions=value["deletions"],
+            message=value["message"],
+        )
+    else:
+        return dict(
+            commit_id=value["abbreviatedOid"],
+            committed_date=commit_date,
+            committer_login=committer_login,
+            message=value["message"],
+        )
+
+
+def prepare_row(commit: dict, repo_name: str, branch_name: str, verbose=False):
     """
     Convert commit metadata to a dict for writing to a CSV.
     """
-    parsed_commit_data = parse_commit(commit)
+    parsed_commit_data = parse_commit(commit, verbose)
 
     return dict(repo_name=repo_name, branch_name=branch_name, **parsed_commit_data)
diff --git a/ghgql/queries/repos/repo_commits_branch.gql b/ghgql/queries/repos/repo_commits_branch.gql
@@ -0,0 +1,39 @@
+# Get details of all commits for a single repo and optional date range,
+# using paging to get all commits.
+query CommitsForRepo($owner: String!, $repo_name: String!, $branch_name: String!, $since: GitTimestamp, $cursor: String) {
+	repository(owner: $owner, name: $repo_name) {
+		ref(qualifiedName: $branch_name) {
+			name
+			target {
+				... on Commit {
+					history(since: $since, first: 100, after: $cursor) {
+						totalCount
+						pageInfo {
+							hasNextPage
+							endCursor
+						}
+						nodes {
+							abbreviatedOid
+							authoredDate
+							author {
+								user {
+									login
+								}
+							}
+							committedDate
+							committer {
+								user {
+									login
+								}
+							}
+							changedFiles
+							additions
+							deletions
+							message
+						}
+					}
+				}
+			}
+		}
+	}
+}