import requests def fetch_all_pages(url, items_key='_embedded', entity_key='dataElements', params=None): results = [] next_url = url session = requests.Session() first = True while next_url: if first and params: response = session.get(next_url, params=params) first = False else: response = session.get(next_url) response.raise_for_status() data = response.json() print(f'Fetched {len(data.get(items_key, {}).get(entity_key, []))} items from {next_url} \n') # Extract items from the HATEOAS _embedded section if items_key in data and entity_key in data[items_key]: results.extend(data[items_key][entity_key]) else: break # Find the next page link next_url = data.get('_links', {}).get('next', {}).get('href') return results BASE_URL ='https://data.uutilsynet.no/dataset/alle-erklaeringer' ENTITY_KEY='dataElements' PAGE_SIZE = 100 URL = "{}?size={}".format(BASE_URL,PAGE_SIZE) if __name__ == '__main__': all_data = fetch_all_pages(URL, '_embedded', ENTITY_KEY) print(f'Total items: {len(all_data)}')