Problem:
You need a list of countries, ordered by continent, under a liberal license.
Solution:
One possibility is to use the data from Wikipedia and parse the information from the Wikitext format. For example, the english wikipedia ‘List of sovereign states and dependent territories by continent’ provides a good source.
The script provided here downloads and parses that format without any external dependencies (Python core libraries only). Python 2.7+ is required. The country list is encoded as Unicode.
Update 1.1 2015-12-14: Upgrade to Python3, added file headers
Update 1.2 2017-02-19: Fix changed format
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Extract a list of countries, grouped by continent, from Wikipedia """ __copyright__ = "Copyright (c) 2015 Uli Köhler" __license__ = "Apache License v2.0" __version__ = "1.2" try: import urllib.request as urllib2 except ImportError: import urllib2 import re def downloadWikipediaCountryList(): """Downloads the wikipedia country list in wikitext format""" url = "http://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent?action=raw" wikitext = urllib2.urlopen(url).read().decode("utf-8") return wikitext.split("\n") def splitListAtIndices(theList, indices): """ Split a list at a given set of indices >>> splitListAtIndices([1,3,5,7,9], [0, 3]) [[], [1, 3, 5], [7, 9]] """ return [theList[i:j] for i, j in zip([0] + indices, indices + [None])] def extractContinents(wikitextLines): """Given a wikitext-format line list, extracts the lines that belong to a continent for each continent""" continentRegex = re.compile("==\s*(\w+)\s*==") #Generate list of (lines index, continent) tuples continents = [(idx, continentRegex.match(line).group(1)) for idx, line in enumerate(wikitextLines) if continentRegex.match(line)] continentLines = splitListAtIndices(wikitextLines, [continent[0] for continent in continents])[1:] return {continents[i][1]: continentLines[i] for i in range(len(continents))} countryRegex = re.compile("\| '+\[?\[?([^\]]+)\]?\]?\'+") def findCountry(line): """ >>> findCountry("| '''[[Germany]]'''") 'Germany' >>> findCountry("| [[Berlin]]") """ match = countryRegex.match(line) if match: country = match.group(1) if "De facto" in country: return None if "|" in country: return country.partition("|")[2] return country return None def extractCountries(linesByContinent): return {continent: [findCountry(line) for line in continentLines if findCountry(line)] for continent, continentLines in linesByContinent.items()} def getCountriesFromWikipedia(): lines = downloadWikipediaCountryList() linesByContinent = extractContinents(lines) return extractCountries(linesByContinent) if __name__ == "__main__": #Unit-selftest import doctest doctest.testmod() #Print country list print(getCountriesFromWikipedia())