From 5ae8a8bcfd563ef0d472d0560beba53798897e3c Mon Sep 17 00:00:00 2001
From: Ben Parsons <ben@bpulse.co.uk>
Date: Tue, 7 Apr 2020 14:37:55 +0100
Subject: [PATCH 1/2] fix pagination in scraper

---
 scripts/proposals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/proposals.py b/scripts/proposals.py
index 27cc6cfb..96904ce4 100755
--- a/scripts/proposals.py
+++ b/scripts/proposals.py
@@ -38,7 +38,7 @@ def getpage(url):
     pagecount = 1
     for link in resp.links.values():
         if link['rel'] == 'last':
-            pagecount = int(re.search('page=(.+?)', link['url']).group(1))
+            pagecount = int(re.search('page=(.+)', link['url']).group(1))
 
     val = resp.json()
     if not isinstance(val, list):

From 7c037d2490203d156e9def41f43ce255d863d48b Mon Sep 17 00:00:00 2001
From: Ben Parsons <ben@bpulse.co.uk>
Date: Tue, 7 Apr 2020 15:26:48 +0100
Subject: [PATCH 2/2] improve capture and add example

---
 scripts/proposals.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/proposals.py b/scripts/proposals.py
index 96904ce4..faa10a83 100755
--- a/scripts/proposals.py
+++ b/scripts/proposals.py
@@ -38,7 +38,10 @@ def getpage(url):
     pagecount = 1
     for link in resp.links.values():
         if link['rel'] == 'last':
-            pagecount = int(re.search('page=(.+)', link['url']).group(1))
+            # we extract the pagecount from the `page` param of the last url
+            # in the response, eg
+            # 'https://api.github.com/repositories/24998719/issues?state=all&labels=proposal&page=10'
+            pagecount = int(re.search('page=(\d+)', link['url']).group(1))
 
     val = resp.json()
     if not isinstance(val, list):