# Determines from National Data on the relative frequency of given names
# in the population of U.S. births the top 10 names that have disappeared
# and reappeared for the longest period of time.
# The data are stored in a directory "names", in files named "yobxxxx.txt
# with xxxx (the year of birth) ranging from 1880 to2013.
#
# Written by Eric Martin for COMP9021


import os


# A dictionnary where a key is a name and a value is the list
# of all years when the name was given.
years_per_first_name = {}
directory = 'names'
for filename in os.listdir(directory):
    if not filename.endswith('.txt'):
        continue
    year = int(filename[3: 7])
    with open(directory + '/' + filename, 'r') as file:
        for line in file:
            first_name = line.split(',')[0]
            if first_name not in years_per_first_name:
                years_per_first_name[first_name] = [year]
            else:
                years_per_first_name[first_name].append(year)

# A list of triples consisting of:
# - difference between year when a name was last given
#   and first given again,
# - year when name was last given,
# - name.
revivals = []
for first_name in years_per_first_name:
    revivals += [(years_per_first_name[first_name][i + 1] -
                                      years_per_first_name[first_name][i],                     
                 years_per_first_name[first_name][i],
                 first_name)
                    for i in range(len(years_per_first_name[first_name]) - 1)]       
revivals.sort(reverse = True)

for i in range(10):
    print('{:} was last used in {:} and then again in {:}, {:} years later.'.
                                  format(revivals[i][2], revivals[i][1],
                                         revivals[i][1] + revivals[i][0],
                                         revivals[i][0]))

Resource created Wednesday 26 August 2015, 10:27:40 AM.

file: names_revivals.py


Back to top

COMP9021 15s2 (Principles of Programming) is powered by WebCMS3
CRICOS Provider No. 00098G