mygramps/generate_demo_family.py
Daniel Viegas 0abe20849c Fix demo family generation: add diverse events and fix missing event references
- Added 12 different event types (Baptism, Education, Occupation, etc.)
- Fixed missing event references by storing and reusing original events
- Made event generation deterministic with random seed
- Updated gen_person to return both XML and tuple format for event reuse
- All event references now properly defined and validated
- Demo family now includes 240+ additional events for comprehensive testing
2025-11-28 23:19:09 +01:00

445 lines
19 KiB
Python

#!/usr/bin/env python3
"""
Generate a huge demo family for Gramps testing
"""
import random
from datetime import datetime, timedelta
# Set seed for deterministic event generation
random.seed(42)
# Generate unique handles
def gen_handle(prefix, num):
return f"_{prefix}{num:08d}"
# Event types to add
EVENT_TYPES = [
("Baptism", 0.7, 0, 2), # 70% chance, 0-2 years after birth
("Christening", 0.5, 0, 1), # 50% chance, 0-1 years after birth
("Education", 0.8, 5, 18), # 80% chance, 5-18 years after birth
("Graduation", 0.6, 18, 25), # 60% chance, 18-25 years after birth
("Occupation", 0.9, 18, 65), # 90% chance, 18-65 years after birth
("Military Service", 0.3, 18, 30), # 30% chance, 18-30 years after birth
("Residence", 0.7, 0, 80), # 70% chance, any time
("Emigration", 0.2, 20, 50), # 20% chance, 20-50 years after birth
("Immigration", 0.15, 20, 50), # 15% chance, 20-50 years after birth
("Retirement", 0.4, 60, 75), # 40% chance, 60-75 years after birth
("Burial", 0.6, None, None), # 60% chance if death exists, at death time
("Cremation", 0.2, None, None), # 20% chance if death exists, at death time
]
# Generate additional events for a person
def gen_additional_events(pid, first_name, surname, birth_year, death_year=None):
events = []
event_id_offset = pid * 10 + 2 # Start after birth and death events
for event_type, probability, min_years, max_years in EVENT_TYPES:
if random.random() > probability:
continue
# Special handling for death-related events
if event_type in ("Burial", "Cremation"):
if not death_year:
continue
event_year = death_year
event_month = random.randint(1, 12)
event_day = random.randint(1, 28)
else:
if max_years is None:
continue
event_year = birth_year + random.randint(min_years, max_years)
if death_year and event_year > death_year:
continue
event_month = random.randint(1, 12)
event_day = random.randint(1, 28)
event_handle = gen_handle("EVENT", event_id_offset)
event_id_offset += 1
# Generate description based on event type
if event_type == "Education":
description = f"Education - {first_name} {surname}"
elif event_type == "Graduation":
description = f"Graduation - {first_name} {surname}"
elif event_type == "Occupation":
occupations = ["Farmer", "Teacher", "Engineer", "Doctor", "Lawyer", "Merchant",
"Carpenter", "Blacksmith", "Sailor", "Soldier", "Clerk", "Nurse"]
occupation = random.choice(occupations)
description = f"{occupation} - {first_name} {surname}"
elif event_type == "Military Service":
description = f"Military Service - {first_name} {surname}"
elif event_type == "Residence":
places = ["New York", "London", "Paris", "Berlin", "Rome", "Madrid", "Amsterdam",
"Vienna", "Prague", "Warsaw", "Stockholm", "Copenhagen"]
place = random.choice(places)
description = f"Residence in {place} - {first_name} {surname}"
elif event_type == "Emigration":
description = f"Emigration - {first_name} {surname}"
elif event_type == "Immigration":
description = f"Immigration - {first_name} {surname}"
elif event_type == "Retirement":
description = f"Retirement - {first_name} {surname}"
else:
description = f"{event_type} of {surname}, {first_name}"
event_xml = f""" <event handle="{event_handle}" change="{int(datetime.now().timestamp())}" id="E{event_id_offset-1:04d}">
<type>{event_type}</type>
<dateval val="{event_year}-{event_month:02d}-{event_day:02d}"/>
<description>{description}</description>
</event>
"""
events.append((event_handle, event_xml))
return events
# Generate a person
def gen_person(pid, first_name, surname, birth_year, death_year=None, gender="M",
parentin_families=None, childof_families=None, reuse_additional_events=None):
handle = gen_handle("PERSON", pid)
birth_handle = gen_handle("EVENT", pid * 10)
death_handle = gen_handle("EVENT", pid * 10 + 1) if death_year else None
person_xml = f""" <person handle="{handle}" change="{int(datetime.now().timestamp())}" id="I{pid:04d}">
<gender>{gender}</gender>
<name type="Birth Name">
<first>{first_name}</first>
<surname>{surname}</surname>
</name>
<eventref hlink="{birth_handle}" role="Primary"/>
"""
if death_handle:
person_xml += f""" <eventref hlink="{death_handle}" role="Primary"/>
"""
# Add additional events - reuse if provided, otherwise generate new
if reuse_additional_events is not None:
# reuse_additional_events is a list of (handle, xml) tuples
additional_events = reuse_additional_events
else:
additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year)
for event_handle, _ in additional_events:
person_xml += f""" <eventref hlink="{event_handle}" role="Primary"/>
"""
# Add parentin references (for fathers and mothers)
if parentin_families:
for family_handle in parentin_families:
person_xml += f""" <parentin hlink="{family_handle}"/>
"""
# Add childof references (for children)
if childof_families:
for family_handle in childof_families:
person_xml += f""" <childof hlink="{family_handle}"/>
"""
person_xml += """ </person>
"""
# Birth event
birth_month = random.randint(1, 12)
birth_day = random.randint(1, 28)
birth_event = f""" <event handle="{birth_handle}" change="{int(datetime.now().timestamp())}" id="E{pid*10:04d}">
<type>Birth</type>
<dateval val="{birth_year}-{birth_month:02d}-{birth_day:02d}"/>
<description>Birth of {surname}, {first_name}</description>
</event>
"""
# Death event
death_event = ""
if death_handle and death_year:
death_month = random.randint(1, 12)
death_day = random.randint(1, 28)
death_event = f""" <event handle="{death_handle}" change="{int(datetime.now().timestamp())}" id="E{pid*10+1:04d}">
<type>Death</type>
<dateval val="{death_year}-{death_month:02d}-{death_day:02d}"/>
<description>Death of {surname}, {first_name}</description>
</event>
"""
# Collect all additional events (return tuples for reuse, XML strings for output)
all_additional_events_xml = [event_xml for _, event_xml in additional_events]
return person_xml, birth_event, death_event, all_additional_events_xml, additional_events
# Generate a family
def gen_family(fid, father_handle, mother_handle, marriage_year, children_handles):
handle = gen_handle("FAMILY", fid)
marriage_handle = gen_handle("EVENT", fid * 100)
family_xml = f""" <family handle="{handle}" change="{int(datetime.now().timestamp())}" id="F{fid:04d}">
<rel type="Married"/>
<father hlink="{father_handle}"/>
<mother hlink="{mother_handle}"/>
"""
for child_handle in children_handles:
family_xml += f""" <childref hlink="{child_handle}"/>
"""
family_xml += f""" <eventref hlink="{marriage_handle}" role="Family"/>
</family>
"""
# Marriage event
marriage_month = random.randint(1, 12)
marriage_day = random.randint(1, 28)
marriage_event = f""" <event handle="{marriage_handle}" change="{int(datetime.now().timestamp())}" id="E{fid*100:04d}">
<type>Marriage</type>
<dateval val="{marriage_year}-{marriage_month:02d}-{marriage_day:02d}"/>
<description>Marriage</description>
</event>
"""
return family_xml, marriage_event
# First names
male_names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph",
"Thomas", "Charles", "Daniel", "Matthew", "Anthony", "Mark", "Donald", "Steven",
"Paul", "Andrew", "Joshua", "Kenneth", "Kevin", "Brian", "George", "Timothy",
"Ronald", "Jason", "Edward", "Jeffrey", "Ryan", "Jacob", "Gary", "Nicholas",
"Eric", "Jonathan", "Stephen", "Larry", "Justin", "Scott", "Brandon", "Benjamin"]
female_names = ["Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan",
"Jessica", "Sarah", "Karen", "Nancy", "Lisa", "Betty", "Margaret", "Sandra",
"Ashley", "Kimberly", "Emily", "Donna", "Michelle", "Dorothy", "Carol",
"Amanda", "Melissa", "Deborah", "Stephanie", "Rebecca", "Sharon", "Laura",
"Cynthia", "Kathleen", "Amy", "Angela", "Shirley", "Anna", "Brenda", "Pamela",
"Emma", "Nicole", "Helen", "Samantha", "Katherine", "Christine", "Debra"]
surnames = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
"Rodriguez", "Martinez", "Hernandez", "Lopez", "Wilson", "Anderson", "Thomas",
"Taylor", "Moore", "Jackson", "Martin", "Lee", "Thompson", "White", "Harris",
"Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", "Walker", "Young", "Allen",
"King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", "Green", "Adams"]
def main():
print("Generating huge demo family...")
# Generate main family
# Father: John Smith, born 1950, died 2010
father_id = 1
father_handle = gen_handle("PERSON", father_id)
main_family_handle = gen_handle("FAMILY", 1)
father_person, father_birth, father_death, father_additional_xml, _ = gen_person(
father_id, "John", "Smith", 1950, 2010, "M",
parentin_families=[main_family_handle]
)
# Mother: Mary Smith, born 1952, died 2015
mother_id = 2
mother_handle = gen_handle("PERSON", mother_id)
mother_person, mother_birth, mother_death, mother_additional_xml, _ = gen_person(
mother_id, "Mary", "Smith", 1952, 2015, "F",
parentin_families=[main_family_handle]
)
all_additional_events = father_additional_xml + mother_additional_xml
# Generate 15 children
children = []
child_handles = []
child_events = []
child_additional_events_map = {} # Store additional events by child_id
child_id = 3
for i in range(15):
gender = "M" if i % 2 == 0 else "F"
first_name = random.choice(male_names if gender == "M" else female_names)
birth_year = 1970 + (i * 2) # Spread births from 1970 to 1998
death_year = birth_year + random.randint(60, 90) if random.random() < 0.3 else None # 30% chance of death
child_handle = gen_handle("PERSON", child_id)
child_person, child_birth, child_death, child_additional_xml, child_additional_tuples = gen_person(
child_id, first_name, "Smith", birth_year, death_year, gender,
childof_families=[main_family_handle]
)
children.append(child_person)
child_handles.append(child_handle)
child_events.append(child_birth)
if child_death:
child_events.append(child_death)
# Store tuples for reuse when regenerating
child_additional_events_map[child_id] = child_additional_tuples
all_additional_events.extend(child_additional_xml)
child_id += 1
# Generate family
family_id = 1
family_xml, marriage_event = gen_family(family_id, father_handle, mother_handle, 1969, child_handles)
# Track person data for regeneration (needed for children who become parents)
import re
person_data = {}
# Store initial person data
person_data[father_id] = {"handle": father_handle, "name": "John", "surname": "Smith",
"birth": 1950, "death": 2010, "gender": "M",
"parentin": [main_family_handle], "childof": []}
person_data[mother_id] = {"handle": mother_handle, "name": "Mary", "surname": "Smith",
"birth": 1952, "death": 2015, "gender": "F",
"parentin": [main_family_handle], "childof": []}
for i, child_handle in enumerate(child_handles):
child_pid = 3 + i
gender = "M" if i % 2 == 0 else "F"
# Extract name from generated child XML
child_xml = children[i]
name_match = re.search(r'<first>([^<]+)</first>', child_xml)
first_name = name_match.group(1) if name_match else random.choice(male_names if gender == "M" else female_names)
birth_year = 1970 + (i * 2)
# Extract death year from child_events if it exists
death_year = None
for event in child_events:
if f"id=\"E{child_pid*10+1:04d}\"" in event:
match = re.search(r'val="(\d{4})', event)
if match:
death_year = int(match.group(1))
person_data[child_pid] = {"handle": child_handle, "name": first_name, "surname": "Smith",
"birth": birth_year, "death": death_year, "gender": gender,
"parentin": [], "childof": [main_family_handle]}
# Generate grandchildren (children of first 5 children)
grandchildren = []
grandchild_events = []
grandchild_id = child_id
for i in range(5): # First 5 children have children
parent_handle = child_handles[i]
parent_pid = 3 + i
parent_gender = "M" if i % 2 == 0 else "F"
spouse_gender = "F" if parent_gender == "M" else "M"
# Create spouse
spouse_name = random.choice(female_names if spouse_gender == "F" else male_names)
spouse_birth = 1970 + (i * 2) + random.randint(-2, 2)
spouse_handle = gen_handle("PERSON", grandchild_id)
child_family_handle = gen_handle("FAMILY", family_id + 1)
person_data[grandchild_id] = {"handle": spouse_handle, "name": spouse_name, "surname": "Smith",
"birth": spouse_birth, "death": None, "gender": spouse_gender,
"parentin": [child_family_handle], "childof": []}
spouse_person, spouse_birth_event, spouse_death_event, spouse_additional_xml, _ = gen_person(
grandchild_id, spouse_name, "Smith", spouse_birth, None, spouse_gender,
parentin_families=[child_family_handle]
)
grandchildren.append(spouse_person)
grandchild_events.append(spouse_birth_event)
if spouse_death_event:
grandchild_events.append(spouse_death_event)
all_additional_events.extend(spouse_additional_xml)
grandchild_id += 1
# Update parent to include parentin reference
person_data[parent_pid]["parentin"].append(child_family_handle)
# Create 3-5 children per couple
num_grandchildren = random.randint(3, 5)
grandchild_handles = []
for j in range(num_grandchildren):
gchild_gender = "M" if j % 2 == 0 else "F"
gchild_name = random.choice(male_names if gchild_gender == "M" else female_names)
gchild_birth = 1995 + (i * 3) + j
gchild_handle = gen_handle("PERSON", grandchild_id)
person_data[grandchild_id] = {"handle": gchild_handle, "name": gchild_name, "surname": "Smith",
"birth": gchild_birth, "death": None, "gender": gchild_gender,
"parentin": [], "childof": [child_family_handle]}
gchild_person, gchild_birth_event, gchild_death_event, gchild_additional_xml, _ = gen_person(
grandchild_id, gchild_name, "Smith", gchild_birth, None, gchild_gender,
childof_families=[child_family_handle]
)
grandchildren.append(gchild_person)
grandchild_handles.append(gchild_handle)
grandchild_events.append(gchild_birth_event)
if gchild_death_event:
grandchild_events.append(gchild_death_event)
all_additional_events.extend(gchild_additional_xml)
grandchild_id += 1
# Create family for this couple
family_id += 1
fam_xml, fam_marriage = gen_family(family_id, parent_handle, spouse_handle, 1990 + i, grandchild_handles)
family_xml += fam_xml
child_events.append(fam_marriage)
# Regenerate children XMLs with updated family references
# We need to regenerate to update family references, but reuse the same events
children = []
for i, child_handle in enumerate(child_handles):
child_pid = 3 + i
data = person_data[child_pid]
# Reuse the original additional events to ensure consistency
original_additional_events = child_additional_events_map.get(child_pid, [])
child_person, _, _, _, _ = gen_person(
child_pid, data["name"], data["surname"], data["birth"], data["death"], data["gender"],
parentin_families=data["parentin"], childof_families=data["childof"],
reuse_additional_events=original_additional_events
)
children.append(child_person)
# Write XML file
xml_content = f"""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN"
"http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
<database xmlns="http://gramps-project.org/xml/1.7.1/">
<header>
<created date="{datetime.now().strftime('%Y-%m-%d')}" version="5.1.0"/>
<researcher>
<resname>Demo Family Generator</resname>
</researcher>
</header>
<tags>
</tags>
<events>
{father_birth}
{father_death}
{mother_birth}
{mother_death}
{marriage_event}
"""
for event in child_events:
xml_content += event
for event in grandchild_events:
xml_content += event
for event in all_additional_events:
xml_content += event
xml_content += """ </events>
<people>
"""
xml_content += father_person
xml_content += mother_person
for child in children:
xml_content += child
for grandchild in grandchildren:
xml_content += grandchild
xml_content += """ </people>
<families>
"""
xml_content += family_xml
xml_content += """ </families>
</database>
"""
with open("demo_family.gramps", "w", encoding="utf-8") as f:
f.write(xml_content)
total_events = len(child_events) + len(grandchild_events) + len(all_additional_events)
print(f"Generated demo_family.gramps with:")
print(f" - 2 parents (John and Mary Smith)")
print(f" - 15 children")
print(f" - 5 spouses")
print(f" - ~20 grandchildren")
print(f" - Multiple families with marriage events")
print(f" - Birth and death events for all")
print(f" - {len(all_additional_events)} additional events (Baptism, Education, Occupation, etc.)")
print(f" - Total events: {total_events}")
if __name__ == "__main__":
main()