- Added 12 different event types (Baptism, Education, Occupation, etc.) - Fixed missing event references by storing and reusing original events - Made event generation deterministic with random seed - Updated gen_person to return both XML and tuple format for event reuse - All event references now properly defined and validated - Demo family now includes 240+ additional events for comprehensive testing
445 lines
19 KiB
Python
445 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate a huge demo family for Gramps testing
|
|
"""
|
|
|
|
import random
|
|
from datetime import datetime, timedelta
|
|
|
|
# Set seed for deterministic event generation
|
|
random.seed(42)
|
|
|
|
# Generate unique handles
|
|
def gen_handle(prefix, num):
|
|
return f"_{prefix}{num:08d}"
|
|
|
|
# Event types to add
|
|
EVENT_TYPES = [
|
|
("Baptism", 0.7, 0, 2), # 70% chance, 0-2 years after birth
|
|
("Christening", 0.5, 0, 1), # 50% chance, 0-1 years after birth
|
|
("Education", 0.8, 5, 18), # 80% chance, 5-18 years after birth
|
|
("Graduation", 0.6, 18, 25), # 60% chance, 18-25 years after birth
|
|
("Occupation", 0.9, 18, 65), # 90% chance, 18-65 years after birth
|
|
("Military Service", 0.3, 18, 30), # 30% chance, 18-30 years after birth
|
|
("Residence", 0.7, 0, 80), # 70% chance, any time
|
|
("Emigration", 0.2, 20, 50), # 20% chance, 20-50 years after birth
|
|
("Immigration", 0.15, 20, 50), # 15% chance, 20-50 years after birth
|
|
("Retirement", 0.4, 60, 75), # 40% chance, 60-75 years after birth
|
|
("Burial", 0.6, None, None), # 60% chance if death exists, at death time
|
|
("Cremation", 0.2, None, None), # 20% chance if death exists, at death time
|
|
]
|
|
|
|
# Generate additional events for a person
|
|
def gen_additional_events(pid, first_name, surname, birth_year, death_year=None):
|
|
events = []
|
|
event_id_offset = pid * 10 + 2 # Start after birth and death events
|
|
|
|
for event_type, probability, min_years, max_years in EVENT_TYPES:
|
|
if random.random() > probability:
|
|
continue
|
|
|
|
# Special handling for death-related events
|
|
if event_type in ("Burial", "Cremation"):
|
|
if not death_year:
|
|
continue
|
|
event_year = death_year
|
|
event_month = random.randint(1, 12)
|
|
event_day = random.randint(1, 28)
|
|
else:
|
|
if max_years is None:
|
|
continue
|
|
event_year = birth_year + random.randint(min_years, max_years)
|
|
if death_year and event_year > death_year:
|
|
continue
|
|
event_month = random.randint(1, 12)
|
|
event_day = random.randint(1, 28)
|
|
|
|
event_handle = gen_handle("EVENT", event_id_offset)
|
|
event_id_offset += 1
|
|
|
|
# Generate description based on event type
|
|
if event_type == "Education":
|
|
description = f"Education - {first_name} {surname}"
|
|
elif event_type == "Graduation":
|
|
description = f"Graduation - {first_name} {surname}"
|
|
elif event_type == "Occupation":
|
|
occupations = ["Farmer", "Teacher", "Engineer", "Doctor", "Lawyer", "Merchant",
|
|
"Carpenter", "Blacksmith", "Sailor", "Soldier", "Clerk", "Nurse"]
|
|
occupation = random.choice(occupations)
|
|
description = f"{occupation} - {first_name} {surname}"
|
|
elif event_type == "Military Service":
|
|
description = f"Military Service - {first_name} {surname}"
|
|
elif event_type == "Residence":
|
|
places = ["New York", "London", "Paris", "Berlin", "Rome", "Madrid", "Amsterdam",
|
|
"Vienna", "Prague", "Warsaw", "Stockholm", "Copenhagen"]
|
|
place = random.choice(places)
|
|
description = f"Residence in {place} - {first_name} {surname}"
|
|
elif event_type == "Emigration":
|
|
description = f"Emigration - {first_name} {surname}"
|
|
elif event_type == "Immigration":
|
|
description = f"Immigration - {first_name} {surname}"
|
|
elif event_type == "Retirement":
|
|
description = f"Retirement - {first_name} {surname}"
|
|
else:
|
|
description = f"{event_type} of {surname}, {first_name}"
|
|
|
|
event_xml = f""" <event handle="{event_handle}" change="{int(datetime.now().timestamp())}" id="E{event_id_offset-1:04d}">
|
|
<type>{event_type}</type>
|
|
<dateval val="{event_year}-{event_month:02d}-{event_day:02d}"/>
|
|
<description>{description}</description>
|
|
</event>
|
|
"""
|
|
events.append((event_handle, event_xml))
|
|
|
|
return events
|
|
|
|
# Generate a person
|
|
def gen_person(pid, first_name, surname, birth_year, death_year=None, gender="M",
|
|
parentin_families=None, childof_families=None, reuse_additional_events=None):
|
|
handle = gen_handle("PERSON", pid)
|
|
birth_handle = gen_handle("EVENT", pid * 10)
|
|
death_handle = gen_handle("EVENT", pid * 10 + 1) if death_year else None
|
|
|
|
person_xml = f""" <person handle="{handle}" change="{int(datetime.now().timestamp())}" id="I{pid:04d}">
|
|
<gender>{gender}</gender>
|
|
<name type="Birth Name">
|
|
<first>{first_name}</first>
|
|
<surname>{surname}</surname>
|
|
</name>
|
|
<eventref hlink="{birth_handle}" role="Primary"/>
|
|
"""
|
|
if death_handle:
|
|
person_xml += f""" <eventref hlink="{death_handle}" role="Primary"/>
|
|
"""
|
|
|
|
# Add additional events - reuse if provided, otherwise generate new
|
|
if reuse_additional_events is not None:
|
|
# reuse_additional_events is a list of (handle, xml) tuples
|
|
additional_events = reuse_additional_events
|
|
else:
|
|
additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year)
|
|
|
|
for event_handle, _ in additional_events:
|
|
person_xml += f""" <eventref hlink="{event_handle}" role="Primary"/>
|
|
"""
|
|
|
|
# Add parentin references (for fathers and mothers)
|
|
if parentin_families:
|
|
for family_handle in parentin_families:
|
|
person_xml += f""" <parentin hlink="{family_handle}"/>
|
|
"""
|
|
# Add childof references (for children)
|
|
if childof_families:
|
|
for family_handle in childof_families:
|
|
person_xml += f""" <childof hlink="{family_handle}"/>
|
|
"""
|
|
person_xml += """ </person>
|
|
"""
|
|
|
|
# Birth event
|
|
birth_month = random.randint(1, 12)
|
|
birth_day = random.randint(1, 28)
|
|
birth_event = f""" <event handle="{birth_handle}" change="{int(datetime.now().timestamp())}" id="E{pid*10:04d}">
|
|
<type>Birth</type>
|
|
<dateval val="{birth_year}-{birth_month:02d}-{birth_day:02d}"/>
|
|
<description>Birth of {surname}, {first_name}</description>
|
|
</event>
|
|
"""
|
|
|
|
# Death event
|
|
death_event = ""
|
|
if death_handle and death_year:
|
|
death_month = random.randint(1, 12)
|
|
death_day = random.randint(1, 28)
|
|
death_event = f""" <event handle="{death_handle}" change="{int(datetime.now().timestamp())}" id="E{pid*10+1:04d}">
|
|
<type>Death</type>
|
|
<dateval val="{death_year}-{death_month:02d}-{death_day:02d}"/>
|
|
<description>Death of {surname}, {first_name}</description>
|
|
</event>
|
|
"""
|
|
|
|
# Collect all additional events (return tuples for reuse, XML strings for output)
|
|
all_additional_events_xml = [event_xml for _, event_xml in additional_events]
|
|
|
|
return person_xml, birth_event, death_event, all_additional_events_xml, additional_events
|
|
|
|
# Generate a family
|
|
def gen_family(fid, father_handle, mother_handle, marriage_year, children_handles):
|
|
handle = gen_handle("FAMILY", fid)
|
|
marriage_handle = gen_handle("EVENT", fid * 100)
|
|
|
|
family_xml = f""" <family handle="{handle}" change="{int(datetime.now().timestamp())}" id="F{fid:04d}">
|
|
<rel type="Married"/>
|
|
<father hlink="{father_handle}"/>
|
|
<mother hlink="{mother_handle}"/>
|
|
"""
|
|
for child_handle in children_handles:
|
|
family_xml += f""" <childref hlink="{child_handle}"/>
|
|
"""
|
|
family_xml += f""" <eventref hlink="{marriage_handle}" role="Family"/>
|
|
</family>
|
|
"""
|
|
|
|
# Marriage event
|
|
marriage_month = random.randint(1, 12)
|
|
marriage_day = random.randint(1, 28)
|
|
marriage_event = f""" <event handle="{marriage_handle}" change="{int(datetime.now().timestamp())}" id="E{fid*100:04d}">
|
|
<type>Marriage</type>
|
|
<dateval val="{marriage_year}-{marriage_month:02d}-{marriage_day:02d}"/>
|
|
<description>Marriage</description>
|
|
</event>
|
|
"""
|
|
|
|
return family_xml, marriage_event
|
|
|
|
# First names
|
|
male_names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph",
|
|
"Thomas", "Charles", "Daniel", "Matthew", "Anthony", "Mark", "Donald", "Steven",
|
|
"Paul", "Andrew", "Joshua", "Kenneth", "Kevin", "Brian", "George", "Timothy",
|
|
"Ronald", "Jason", "Edward", "Jeffrey", "Ryan", "Jacob", "Gary", "Nicholas",
|
|
"Eric", "Jonathan", "Stephen", "Larry", "Justin", "Scott", "Brandon", "Benjamin"]
|
|
|
|
female_names = ["Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan",
|
|
"Jessica", "Sarah", "Karen", "Nancy", "Lisa", "Betty", "Margaret", "Sandra",
|
|
"Ashley", "Kimberly", "Emily", "Donna", "Michelle", "Dorothy", "Carol",
|
|
"Amanda", "Melissa", "Deborah", "Stephanie", "Rebecca", "Sharon", "Laura",
|
|
"Cynthia", "Kathleen", "Amy", "Angela", "Shirley", "Anna", "Brenda", "Pamela",
|
|
"Emma", "Nicole", "Helen", "Samantha", "Katherine", "Christine", "Debra"]
|
|
|
|
surnames = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
|
|
"Rodriguez", "Martinez", "Hernandez", "Lopez", "Wilson", "Anderson", "Thomas",
|
|
"Taylor", "Moore", "Jackson", "Martin", "Lee", "Thompson", "White", "Harris",
|
|
"Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", "Walker", "Young", "Allen",
|
|
"King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", "Green", "Adams"]
|
|
|
|
def main():
|
|
print("Generating huge demo family...")
|
|
|
|
# Generate main family
|
|
# Father: John Smith, born 1950, died 2010
|
|
father_id = 1
|
|
father_handle = gen_handle("PERSON", father_id)
|
|
main_family_handle = gen_handle("FAMILY", 1)
|
|
father_person, father_birth, father_death, father_additional_xml, _ = gen_person(
|
|
father_id, "John", "Smith", 1950, 2010, "M",
|
|
parentin_families=[main_family_handle]
|
|
)
|
|
|
|
# Mother: Mary Smith, born 1952, died 2015
|
|
mother_id = 2
|
|
mother_handle = gen_handle("PERSON", mother_id)
|
|
mother_person, mother_birth, mother_death, mother_additional_xml, _ = gen_person(
|
|
mother_id, "Mary", "Smith", 1952, 2015, "F",
|
|
parentin_families=[main_family_handle]
|
|
)
|
|
|
|
all_additional_events = father_additional_xml + mother_additional_xml
|
|
|
|
# Generate 15 children
|
|
children = []
|
|
child_handles = []
|
|
child_events = []
|
|
child_additional_events_map = {} # Store additional events by child_id
|
|
child_id = 3
|
|
|
|
for i in range(15):
|
|
gender = "M" if i % 2 == 0 else "F"
|
|
first_name = random.choice(male_names if gender == "M" else female_names)
|
|
birth_year = 1970 + (i * 2) # Spread births from 1970 to 1998
|
|
death_year = birth_year + random.randint(60, 90) if random.random() < 0.3 else None # 30% chance of death
|
|
|
|
child_handle = gen_handle("PERSON", child_id)
|
|
child_person, child_birth, child_death, child_additional_xml, child_additional_tuples = gen_person(
|
|
child_id, first_name, "Smith", birth_year, death_year, gender,
|
|
childof_families=[main_family_handle]
|
|
)
|
|
|
|
children.append(child_person)
|
|
child_handles.append(child_handle)
|
|
child_events.append(child_birth)
|
|
if child_death:
|
|
child_events.append(child_death)
|
|
# Store tuples for reuse when regenerating
|
|
child_additional_events_map[child_id] = child_additional_tuples
|
|
all_additional_events.extend(child_additional_xml)
|
|
child_id += 1
|
|
|
|
# Generate family
|
|
family_id = 1
|
|
family_xml, marriage_event = gen_family(family_id, father_handle, mother_handle, 1969, child_handles)
|
|
|
|
# Track person data for regeneration (needed for children who become parents)
|
|
import re
|
|
person_data = {}
|
|
# Store initial person data
|
|
person_data[father_id] = {"handle": father_handle, "name": "John", "surname": "Smith",
|
|
"birth": 1950, "death": 2010, "gender": "M",
|
|
"parentin": [main_family_handle], "childof": []}
|
|
person_data[mother_id] = {"handle": mother_handle, "name": "Mary", "surname": "Smith",
|
|
"birth": 1952, "death": 2015, "gender": "F",
|
|
"parentin": [main_family_handle], "childof": []}
|
|
for i, child_handle in enumerate(child_handles):
|
|
child_pid = 3 + i
|
|
gender = "M" if i % 2 == 0 else "F"
|
|
# Extract name from generated child XML
|
|
child_xml = children[i]
|
|
name_match = re.search(r'<first>([^<]+)</first>', child_xml)
|
|
first_name = name_match.group(1) if name_match else random.choice(male_names if gender == "M" else female_names)
|
|
birth_year = 1970 + (i * 2)
|
|
# Extract death year from child_events if it exists
|
|
death_year = None
|
|
for event in child_events:
|
|
if f"id=\"E{child_pid*10+1:04d}\"" in event:
|
|
match = re.search(r'val="(\d{4})', event)
|
|
if match:
|
|
death_year = int(match.group(1))
|
|
person_data[child_pid] = {"handle": child_handle, "name": first_name, "surname": "Smith",
|
|
"birth": birth_year, "death": death_year, "gender": gender,
|
|
"parentin": [], "childof": [main_family_handle]}
|
|
|
|
# Generate grandchildren (children of first 5 children)
|
|
grandchildren = []
|
|
grandchild_events = []
|
|
grandchild_id = child_id
|
|
|
|
for i in range(5): # First 5 children have children
|
|
parent_handle = child_handles[i]
|
|
parent_pid = 3 + i
|
|
parent_gender = "M" if i % 2 == 0 else "F"
|
|
spouse_gender = "F" if parent_gender == "M" else "M"
|
|
|
|
# Create spouse
|
|
spouse_name = random.choice(female_names if spouse_gender == "F" else male_names)
|
|
spouse_birth = 1970 + (i * 2) + random.randint(-2, 2)
|
|
spouse_handle = gen_handle("PERSON", grandchild_id)
|
|
child_family_handle = gen_handle("FAMILY", family_id + 1)
|
|
|
|
person_data[grandchild_id] = {"handle": spouse_handle, "name": spouse_name, "surname": "Smith",
|
|
"birth": spouse_birth, "death": None, "gender": spouse_gender,
|
|
"parentin": [child_family_handle], "childof": []}
|
|
|
|
spouse_person, spouse_birth_event, spouse_death_event, spouse_additional_xml, _ = gen_person(
|
|
grandchild_id, spouse_name, "Smith", spouse_birth, None, spouse_gender,
|
|
parentin_families=[child_family_handle]
|
|
)
|
|
grandchildren.append(spouse_person)
|
|
grandchild_events.append(spouse_birth_event)
|
|
if spouse_death_event:
|
|
grandchild_events.append(spouse_death_event)
|
|
all_additional_events.extend(spouse_additional_xml)
|
|
grandchild_id += 1
|
|
|
|
# Update parent to include parentin reference
|
|
person_data[parent_pid]["parentin"].append(child_family_handle)
|
|
|
|
# Create 3-5 children per couple
|
|
num_grandchildren = random.randint(3, 5)
|
|
grandchild_handles = []
|
|
for j in range(num_grandchildren):
|
|
gchild_gender = "M" if j % 2 == 0 else "F"
|
|
gchild_name = random.choice(male_names if gchild_gender == "M" else female_names)
|
|
gchild_birth = 1995 + (i * 3) + j
|
|
gchild_handle = gen_handle("PERSON", grandchild_id)
|
|
|
|
person_data[grandchild_id] = {"handle": gchild_handle, "name": gchild_name, "surname": "Smith",
|
|
"birth": gchild_birth, "death": None, "gender": gchild_gender,
|
|
"parentin": [], "childof": [child_family_handle]}
|
|
|
|
gchild_person, gchild_birth_event, gchild_death_event, gchild_additional_xml, _ = gen_person(
|
|
grandchild_id, gchild_name, "Smith", gchild_birth, None, gchild_gender,
|
|
childof_families=[child_family_handle]
|
|
)
|
|
grandchildren.append(gchild_person)
|
|
grandchild_handles.append(gchild_handle)
|
|
grandchild_events.append(gchild_birth_event)
|
|
if gchild_death_event:
|
|
grandchild_events.append(gchild_death_event)
|
|
all_additional_events.extend(gchild_additional_xml)
|
|
grandchild_id += 1
|
|
|
|
# Create family for this couple
|
|
family_id += 1
|
|
fam_xml, fam_marriage = gen_family(family_id, parent_handle, spouse_handle, 1990 + i, grandchild_handles)
|
|
family_xml += fam_xml
|
|
child_events.append(fam_marriage)
|
|
|
|
# Regenerate children XMLs with updated family references
|
|
# We need to regenerate to update family references, but reuse the same events
|
|
children = []
|
|
for i, child_handle in enumerate(child_handles):
|
|
child_pid = 3 + i
|
|
data = person_data[child_pid]
|
|
# Reuse the original additional events to ensure consistency
|
|
original_additional_events = child_additional_events_map.get(child_pid, [])
|
|
child_person, _, _, _, _ = gen_person(
|
|
child_pid, data["name"], data["surname"], data["birth"], data["death"], data["gender"],
|
|
parentin_families=data["parentin"], childof_families=data["childof"],
|
|
reuse_additional_events=original_additional_events
|
|
)
|
|
children.append(child_person)
|
|
|
|
# Write XML file
|
|
xml_content = f"""<?xml version="1.0" encoding="UTF-8"?>
|
|
<!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN"
|
|
"http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
|
|
<database xmlns="http://gramps-project.org/xml/1.7.1/">
|
|
<header>
|
|
<created date="{datetime.now().strftime('%Y-%m-%d')}" version="5.1.0"/>
|
|
<researcher>
|
|
<resname>Demo Family Generator</resname>
|
|
</researcher>
|
|
</header>
|
|
<tags>
|
|
</tags>
|
|
<events>
|
|
{father_birth}
|
|
{father_death}
|
|
{mother_birth}
|
|
{mother_death}
|
|
{marriage_event}
|
|
"""
|
|
|
|
for event in child_events:
|
|
xml_content += event
|
|
for event in grandchild_events:
|
|
xml_content += event
|
|
for event in all_additional_events:
|
|
xml_content += event
|
|
|
|
xml_content += """ </events>
|
|
<people>
|
|
"""
|
|
xml_content += father_person
|
|
xml_content += mother_person
|
|
for child in children:
|
|
xml_content += child
|
|
for grandchild in grandchildren:
|
|
xml_content += grandchild
|
|
|
|
xml_content += """ </people>
|
|
<families>
|
|
"""
|
|
xml_content += family_xml
|
|
|
|
xml_content += """ </families>
|
|
</database>
|
|
"""
|
|
|
|
with open("demo_family.gramps", "w", encoding="utf-8") as f:
|
|
f.write(xml_content)
|
|
|
|
total_events = len(child_events) + len(grandchild_events) + len(all_additional_events)
|
|
print(f"Generated demo_family.gramps with:")
|
|
print(f" - 2 parents (John and Mary Smith)")
|
|
print(f" - 15 children")
|
|
print(f" - 5 spouses")
|
|
print(f" - ~20 grandchildren")
|
|
print(f" - Multiple families with marriage events")
|
|
print(f" - Birth and death events for all")
|
|
print(f" - {len(all_additional_events)} additional events (Baptism, Education, Occupation, etc.)")
|
|
print(f" - Total events: {total_events}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|