Skip to content

Commit 90f44b4

Browse files
committed
Start capturing metadata for GitHub API requests
1 parent 5fdebe6 commit 90f44b4

File tree

11 files changed

+130
-59
lines changed

11 files changed

+130
-59
lines changed

src/app/domain/github/crawler/crawlInstallation.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,18 @@ export async function crawlInstallation(
1212
lookbackDays: number
1313
) {
1414
logger.info(`Crawling Installation for ${installation.accountLogin}`)
15-
await crawlUsers(appState, installation)
16-
const repos = await crawlRepositories(appState, installation)
15+
const githubInstallationClient = appState.githubClient.clientForInstallation(installation.installationId)
16+
await crawlUsers(appState, installation, githubInstallationClient)
17+
logger.info('Github Metadata after crawling users', { ...githubInstallationClient.meta() })
18+
const repos = await crawlRepositories(appState, installation, githubInstallationClient)
1719
// Eventually consider doing some parallelization here (or move back to step function) but
1820
// need to be careful since GitHub gets twitchy about concurrent requests to the API
1921
// Their "best practice" doc says don't do it, but their rate limit doc says it's supported
2022
// Only really need to care if things start getting slow
2123
for (const repo of repos) {
22-
await crawlPushes(appState, installation, repo)
23-
await crawlWorkflowRunEvents(appState, installation, repo, lookbackDays)
24+
await crawlPushes(appState, installation, repo, githubInstallationClient)
25+
await crawlWorkflowRunEvents(appState, installation, repo, lookbackDays, githubInstallationClient)
2426
}
27+
28+
logger.info('Github Metadata after crawls', { ...githubInstallationClient.meta() })
2529
}

src/app/domain/github/crawler/crawlPushes.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,17 @@ import { isRawGithubPushEventEvent } from '../../types/rawGithub/RawGithubAPIPus
44
import { fromRawGithubPushEventEvent, GithubPush } from '../../types/GithubPush'
55
import { processPushes } from '../githubPush'
66
import { GithubInstallation } from '../../types/GithubInstallation'
7-
import { logger } from '../../../util/logging'
7+
import { GithubInstallationClient } from '../../../outboundInterfaces/githubInstallationClient'
88

99
// TOEventually - only get all pushes back to lookback in crawl configuration, however GitHub doesn't keep
1010
// them around for very long
1111
export async function crawlPushes(
1212
appState: AppState,
1313
// the owner ID on repo isn't sufficient when we are crawling public repos from other accounts
1414
installation: GithubInstallation,
15-
repo: GithubRepositorySummary
15+
repo: GithubRepositorySummary,
16+
githubClient: GithubInstallationClient
1617
) {
17-
logger.info(`Crawling Pushes for ${installation.accountLogin}/${repo.name}`)
18-
const githubClient = appState.githubClient.clientForInstallation(installation.installationId)
1918
const allEventsForRepo = await githubClient.listMostRecentEventsForRepo(repo.ownerName, repo.name)
2019
const rawPushes = allEventsForRepo.filter(isRawGithubPushEventEvent)
2120
// TODO - this comment was from pre-step-functions version. Is there something that can be improved now

src/app/domain/github/crawler/crawlRepositories.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ import { GithubInstallation } from '../../types/GithubInstallation'
33
import { GithubInstallationClient } from '../../../outboundInterfaces/githubInstallationClient'
44
import { processRawRepositories, toRepositorySummary } from '../githubRepository'
55
import { ORGANIZATION_ACCOUNT_TYPE, USER_ACCOUNT_TYPE } from '../../types/GithubAccountType'
6-
import { logger } from '../../../util/logging'
76

8-
export async function crawlRepositories(appState: AppState, installation: GithubInstallation) {
9-
logger.info(`Crawling Repositories for ${installation.accountLogin}`)
10-
const githubClient = appState.githubClient.clientForInstallation(installation.installationId)
7+
export async function crawlRepositories(
8+
appState: AppState,
9+
installation: GithubInstallation,
10+
githubClient: GithubInstallationClient
11+
) {
1112
const latestRawRepositories = await readRawRepositories(installation, githubClient)
1213
const repos = await processRawRepositories(appState, latestRawRepositories)
1314
return repos.map(toRepositorySummary)

src/app/domain/github/crawler/crawlRunEvents.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,16 @@ import { GithubRepositorySummary } from '../../types/GithubRepository'
33
import { dateTimeAddDays } from '../../../util/dateAndTime'
44
import { processRawRunEvents } from '../githubWorkflowRunEvent'
55
import { GithubInstallation } from '../../types/GithubInstallation'
6-
import { logger } from '../../../util/logging'
6+
import { GithubInstallationClient } from '../../../outboundInterfaces/githubInstallationClient'
77

88
export async function crawlWorkflowRunEvents(
99
appState: AppState,
1010
// the owner ID on repo isn't sufficient when we are crawling public repos from other accounts
1111
installation: GithubInstallation,
1212
repo: GithubRepositorySummary,
13-
lookbackDays: number
13+
lookbackDays: number,
14+
githubClient: GithubInstallationClient
1415
) {
15-
logger.info(`Crawling Run Events for ${installation.accountLogin}/${repo.name}`)
16-
const githubClient = appState.githubClient.clientForInstallation(installation.installationId)
1716
const startTime = `${dateTimeAddDays(appState.clock.now(), -1 * lookbackDays).toISOString()}`
1817

1918
const recentRunEvents = await githubClient.listWorkflowRunsForRepo(

src/app/domain/github/crawler/crawlUsers.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@ import { GithubInstallation } from '../../types/GithubInstallation'
33
import { GithubInstallationClient } from '../../../outboundInterfaces/githubInstallationClient'
44
import { processRawUsers } from '../githubUser'
55
import { ORGANIZATION_ACCOUNT_TYPE, USER_ACCOUNT_TYPE } from '../../types/GithubAccountType'
6-
import { logger } from '../../../util/logging'
76

8-
export async function crawlUsers(appState: AppState, installation: GithubInstallation) {
9-
logger.info(`Crawling Users for ${installation.accountLogin}`)
10-
const latestRawUsers = await readRawUsers(
11-
installation,
12-
appState.githubClient.clientForInstallation(installation.installationId)
13-
)
7+
export async function crawlUsers(
8+
appState: AppState,
9+
installation: GithubInstallation,
10+
githubClient: GithubInstallationClient
11+
) {
12+
const latestRawUsers = await readRawUsers(installation, githubClient)
1413
await processRawUsers(appState, latestRawUsers, installation)
1514
}
1615

src/app/outboundInterfaces/githubInstallationClient.ts

Lines changed: 71 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import { RawGithubWorkflowRunEvent } from '../domain/types/rawGithub/RawGithubWo
44
import { RawGithubRepository } from '../domain/types/rawGithub/RawGithubRepository'
55
import { RawGithubUser } from '../domain/types/rawGithub/RawGithubUser'
66
import { RawGithubEvent } from '../domain/types/rawGithub/RawGithubEvent'
7-
import { logger } from '../util/logging'
87

98
export interface GithubInstallationClient {
109
listWorkflowRunsForRepo(owner: string, repo: string, created?: string): Promise<RawGithubWorkflowRunEvent[]>
@@ -19,6 +18,17 @@ export interface GithubInstallationClient {
1918
listMostRecentEventsForRepo(owner: string, repo: string): Promise<RawGithubEvent[]>
2019

2120
getUser(username: string): Promise<RawGithubUser>
21+
22+
meta(): GithubInstallationClientMeta
23+
}
24+
25+
export interface GithubInstallationClientMeta {
26+
// For now assuming all API calls are to the "core" resource, and so I don't differentiate
27+
// by x-ratelimit-used header (see https://docs.github.com/en/rest/rate-limit/rate-limit?apiVersion=2022-11-28#get-rate-limit-status-for-the-authenticated-user)
28+
readonly ratelimit: number
29+
readonly ratelimitUsed: number
30+
readonly ratelimitRemaining: number
31+
readonly ratelimitResetTimestamp: number
2232
}
2333

2434
export function createRealGithubInstallationClient(
@@ -38,41 +48,82 @@ export function createRealGithubInstallationClient(
3848
installationId
3949
}
4050
})
51+
const rateLimitData: {
52+
ratelimit: number
53+
ratelimitRemaining: number
54+
ratelimitUsed: number
55+
ratelimitResetTimestamp: number
56+
} = {
57+
ratelimit: 0,
58+
ratelimitRemaining: 0,
59+
ratelimitUsed: 0,
60+
ratelimitResetTimestamp: 0
61+
}
62+
63+
function updateRateLimits(headers: OctokitResponseHeaders) {
64+
rateLimitData.ratelimit = Number(headers['x-ratelimit-limit'] ?? 0)
65+
rateLimitData.ratelimitRemaining = Number(headers['x-ratelimit-remaining'] ?? 0)
66+
rateLimitData.ratelimitUsed = Number(headers['x-ratelimit-used'] ?? 0)
67+
rateLimitData.ratelimitResetTimestamp = Number(headers['x-ratelimit-reset'] ?? 0)
68+
}
69+
70+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
71+
async function processOctokitIterator<T>(iterator: any) {
72+
const results: T[] = []
73+
for await (const response of iterator) {
74+
results.push(...response.data)
75+
updateRateLimits(response.headers)
76+
}
77+
return results
78+
}
79+
80+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
81+
function processOctokitResponse(response: any) {
82+
updateRateLimits(response.headers)
83+
return response.data
84+
}
4185

4286
return {
4387
async listWorkflowRunsForRepo(owner: string, repo: string, created?: string) {
44-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
45-
// @ts-ignore
46-
return await octokit.paginate(octokit.actions.listWorkflowRunsForRepo, {
47-
owner,
48-
repo,
49-
created
50-
})
88+
return processOctokitIterator(
89+
octokit.paginate.iterator(octokit.actions.listWorkflowRunsForRepo, {
90+
owner,
91+
repo,
92+
created
93+
})
94+
)
5195
},
5296
// TOMaybe - consider other options here, e.g. type, sort
5397
async listOrganizationRepositories(org: string): Promise<RawGithubRepository[]> {
54-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
55-
// @ts-ignore
56-
return await octokit.paginate(octokit.repos.listForOrg, { org })
98+
return processOctokitIterator(octokit.paginate.iterator(octokit.repos.listForOrg, { org }))
5799
},
58100
async listInstallationRepositories(): Promise<RawGithubRepository[]> {
59-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
60-
// @ts-ignore
61-
return await octokit.paginate(octokit.apps.listReposAccessibleToInstallation, {})
101+
return processOctokitIterator(
102+
octokit.paginate.iterator(octokit.apps.listReposAccessibleToInstallation, {})
103+
)
62104
},
63105
async listOrganizationMembers(org: string): Promise<RawGithubUser[]> {
64-
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
65-
// @ts-ignore
66-
return await octokit.paginate(octokit.orgs.listMembers, { org })
106+
return processOctokitIterator(octokit.paginate.iterator(octokit.orgs.listMembers, { org }))
67107
},
68108
// For now, hard code page size to 10
69109
// GitHub doesn't retain these for long - so anything older than a few days won't be returned
70110
async listMostRecentEventsForRepo(owner: string, repo: string): Promise<RawGithubEvent[]> {
71-
logger.debug(`List recent ${owner} ${repo}`)
72-
return (await octokit.activity.listRepoEvents({ owner, repo, per_page: 10 })).data
111+
return processOctokitResponse(await octokit.activity.listRepoEvents({ owner, repo, per_page: 10 }))
73112
},
74113
async getUser(username: string): Promise<RawGithubUser> {
75-
return (await octokit.users.getByUsername({ username })).data
114+
return processOctokitResponse(await octokit.users.getByUsername({ username }))
115+
},
116+
meta(): GithubInstallationClientMeta {
117+
return rateLimitData
76118
}
77119
}
78120
}
121+
122+
export type OctokitResponseHeaders = {
123+
etag?: string
124+
'last-modified'?: string
125+
'x-ratelimit-limit'?: string
126+
'x-ratelimit-remaining'?: string
127+
'x-ratelimit-reset'?: string
128+
'x-ratelimit-used'?: string
129+
}

test/local/functional/domain/github/crawler/crawlPushes.test.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ test('repo-crawler-for-personal-account-installation', async () => {
2525
// A
2626
const appState = new FakeAppState()
2727
const githubInstallationClient = new FakeGithubInstallationClient()
28-
appState.githubClient.fakeClientsForInstallation.addResponse(48093071, githubInstallationClient)
2928
githubInstallationClient.stubMostRecentEventsForRepo.addResponse(
3029
{
3130
owner: 'cicada-test-user',
@@ -35,7 +34,7 @@ test('repo-crawler-for-personal-account-installation', async () => {
3534
)
3635

3736
// A
38-
await crawlPushes(appState, testPersonalInstallation, testPersonalTestRepo)
37+
await crawlPushes(appState, testPersonalInstallation, testPersonalTestRepo, githubInstallationClient)
3938

4039
// A
4140
expectPutsLength(appState).toEqual(2)
@@ -57,7 +56,7 @@ test('repo-crawler-for-org-installation', async () => {
5756
)
5857

5958
// A
60-
await crawlPushes(appState, testOrgInstallation, testOrgTestRepoOne)
59+
await crawlPushes(appState, testOrgInstallation, testOrgTestRepoOne, githubInstallationClient)
6160

6261
// A
6362
expectPutsLength(appState).toEqual(2)

test/local/functional/domain/github/crawler/crawlRepositories.test.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ test('repository-crawler-for-personal-account-installation', async () => {
2121
// A
2222
const appState = new FakeAppState()
2323
const githubInstallationClient = new FakeGithubInstallationClient()
24-
appState.githubClient.fakeClientsForInstallation.addResponse(48093071, githubInstallationClient)
2524
githubInstallationClient.stubInstallationRepositories = [example_personal_account_repo]
2625

2726
// A
28-
await crawlRepositories(appState, testPersonalInstallation)
27+
await crawlRepositories(appState, testPersonalInstallation, githubInstallationClient)
2928

3029
// A
3130
expectBatchWritesLength(appState).toEqual(1)
@@ -36,11 +35,10 @@ test('repository-crawler-for-org-installation', async () => {
3635
// A
3736
const appState = new FakeAppState()
3837
const githubInstallationClient = new FakeGithubInstallationClient()
39-
appState.githubClient.fakeClientsForInstallation.addResponse(48133709, githubInstallationClient)
4038
githubInstallationClient.stubOrganizationRepositories.addResponse('cicada-test-org', example_org_repos)
4139

4240
// A
43-
await crawlRepositories(appState, testOrgInstallation)
41+
await crawlRepositories(appState, testOrgInstallation, githubInstallationClient)
4442

4543
// A
4644
expectBatchWritesLength(appState).toEqual(1)

test/local/functional/domain/github/crawler/crawlRunEvents.test.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ test('repo-crawler-for-personal-account-installation', async () => {
2727
// A
2828
const appState = new FakeAppState()
2929
const githubInstallationClient = new FakeGithubInstallationClient()
30-
appState.githubClient.fakeClientsForInstallation.addResponse(48093071, githubInstallationClient)
3130
githubInstallationClient.stubWorkflowRunsForRepo.addResponse(
3231
{
3332
owner: 'cicada-test-user',
@@ -38,7 +37,13 @@ test('repo-crawler-for-personal-account-installation', async () => {
3837
)
3938

4039
// A
41-
await crawlWorkflowRunEvents(appState, testPersonalInstallation, testPersonalTestRepo, 10)
40+
await crawlWorkflowRunEvents(
41+
appState,
42+
testPersonalInstallation,
43+
testPersonalTestRepo,
44+
10,
45+
githubInstallationClient
46+
)
4247

4348
// A
4449
expectPutsLength(appState).toEqual(3)
@@ -51,7 +56,6 @@ test('repo-crawler-for-org-installation', async () => {
5156
// A
5257
const appState = new FakeAppState()
5358
const githubInstallationClient = new FakeGithubInstallationClient()
54-
appState.githubClient.fakeClientsForInstallation.addResponse(48133709, githubInstallationClient)
5559
githubInstallationClient.stubWorkflowRunsForRepo.addResponse(
5660
{
5761
owner: 'cicada-test-org',
@@ -62,7 +66,13 @@ test('repo-crawler-for-org-installation', async () => {
6266
)
6367

6468
// A
65-
await crawlWorkflowRunEvents(appState, testOrgInstallation, testOrgTestRepoOne, 10)
69+
await crawlWorkflowRunEvents(
70+
appState,
71+
testOrgInstallation,
72+
testOrgTestRepoOne,
73+
10,
74+
githubInstallationClient
75+
)
6676

6777
// A
6878
expectPutsLength(appState).toEqual(3)

test/local/functional/domain/github/crawler/crawlUsers.test.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,10 @@ test('user-crawler-for-personal-account-installation', async () => {
2929
// A
3030
const appState = new FakeAppState()
3131
const githubInstallationClient = new FakeGithubInstallationClient()
32-
appState.githubClient.fakeClientsForInstallation.addResponse(48093071, githubInstallationClient)
3332
githubInstallationClient.stubUsers.addResponse('cicada-test-user', example_personal_account_user)
3433

3534
// A
36-
await crawlUsers(appState, testPersonalInstallation)
35+
await crawlUsers(appState, testPersonalInstallation, githubInstallationClient)
3736

3837
// A
3938
expectBatchWritesLength(appState).toEqual(2)
@@ -47,7 +46,6 @@ test('user-crawler-for-org-installation', async () => {
4746
// A
4847
const appState = new FakeAppState()
4948
const githubInstallationClient = new FakeGithubInstallationClient()
50-
appState.githubClient.fakeClientsForInstallation.addResponse(48133709, githubInstallationClient)
5149
githubInstallationClient.stubOrganizationMembers.addResponse('cicada-test-org', example_org_users)
5250

5351
stubQueryAccountMembershipsByAccount(appState, [
@@ -57,7 +55,7 @@ test('user-crawler-for-org-installation', async () => {
5755
])
5856

5957
// A
60-
await crawlUsers(appState, testOrgInstallation)
58+
await crawlUsers(appState, testOrgInstallation, githubInstallationClient)
6159

6260
// A
6361
expectBatchWritesLength(appState).toEqual(3)

0 commit comments

Comments
 (0)