mygramps/generate_demo_family.py
Daniel Viegas 5860b3d25c Improve code quality across the codebase
- Add comprehensive type hints to MyTimeline.py methods
  - Add type hints to __init__, change_db, and tooltip formatting methods
  - Use Any for Gramps-specific types that aren't easily importable

- Refactor generate_demo_family.py to use ElementTree
  - Replace string concatenation with xml.etree.ElementTree for proper XML generation
  - Add compatibility handling for Python < 3.9 (ET.indent)
  - Add EventData, PersonData, and FamilyData dataclasses for better structure
  - Add comprehensive type hints to all functions

- Extract magic numbers to named constants
  - Add constants for UI dimensions, timeline heights, dialog sizes
  - Add constants for date calculations and genealogical year ranges
  - Improve code readability and maintainability

- Refactor duplicated code in filter dialog handlers
  - Extract common checkbox handler logic into reusable methods
  - Create _make_group_toggle_handler and _make_child_toggle_handler
  - Eliminate code duplication between event type and family filters

- Improve shell scripts with better error handling
  - Add validation for Gramps installation
  - Improve error messages with actionable troubleshooting steps
  - Use set -euo pipefail for better error detection
  - Add better user guidance in error scenarios
2025-11-29 22:49:16 +01:00

660 lines
24 KiB
Python

#!/usr/bin/env python3
"""
Generate a huge demo family for Gramps testing
"""
import random
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List, Tuple, Dict
# Set seed for deterministic event generation
random.seed(42)
# Constants
EVENT_ID_OFFSET = 10
FAMILY_ID_OFFSET = 100
EVENT_ID_START_OFFSET = 2
MIN_MONTH = 1
MAX_MONTH = 12
MIN_DAY = 1
MAX_DAY = 28
GRAMPS_XML_VERSION = "5.1.0"
GRAMPS_XML_NAMESPACE = "http://gramps-project.org/xml/1.7.1/"
GRAMPS_XML_DTD = "http://gramps-project.org/xml/1.7.1/grampsxml.dtd"
# Event types to add
EVENT_TYPES = [
("Baptism", 0.7, 0, 2), # 70% chance, 0-2 years after birth
("Christening", 0.5, 0, 1), # 50% chance, 0-1 years after birth
("Education", 0.8, 5, 18), # 80% chance, 5-18 years after birth
("Graduation", 0.6, 18, 25), # 60% chance, 18-25 years after birth
("Occupation", 0.9, 18, 65), # 90% chance, 18-65 years after birth
("Military Service", 0.3, 18, 30), # 30% chance, 18-30 years after birth
("Residence", 0.7, 0, 80), # 70% chance, any time
("Emigration", 0.2, 20, 50), # 20% chance, 20-50 years after birth
("Immigration", 0.15, 20, 50), # 15% chance, 20-50 years after birth
("Retirement", 0.4, 60, 75), # 40% chance, 60-75 years after birth
("Burial", 0.6, None, None), # 60% chance if death exists, at death time
("Cremation", 0.2, None, None), # 20% chance if death exists, at death time
]
# Name lists
MALE_NAMES = [
"James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph",
"Thomas", "Charles", "Daniel", "Matthew", "Anthony", "Mark", "Donald", "Steven",
"Paul", "Andrew", "Joshua", "Kenneth", "Kevin", "Brian", "George", "Timothy",
"Ronald", "Jason", "Edward", "Jeffrey", "Ryan", "Jacob", "Gary", "Nicholas",
"Eric", "Jonathan", "Stephen", "Larry", "Justin", "Scott", "Brandon", "Benjamin"
]
FEMALE_NAMES = [
"Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan",
"Jessica", "Sarah", "Karen", "Nancy", "Lisa", "Betty", "Margaret", "Sandra",
"Ashley", "Kimberly", "Emily", "Donna", "Michelle", "Dorothy", "Carol",
"Amanda", "Melissa", "Deborah", "Stephanie", "Rebecca", "Sharon", "Laura",
"Cynthia", "Kathleen", "Amy", "Angela", "Shirley", "Anna", "Brenda", "Pamela",
"Emma", "Nicole", "Helen", "Samantha", "Katherine", "Christine", "Debra"
]
SURNAMES = [
"Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
"Rodriguez", "Martinez", "Hernandez", "Lopez", "Wilson", "Anderson", "Thomas",
"Taylor", "Moore", "Jackson", "Martin", "Lee", "Thompson", "White", "Harris",
"Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", "Walker", "Young", "Allen",
"King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", "Green", "Adams"
]
OCCUPATIONS = [
"Farmer", "Teacher", "Engineer", "Doctor", "Lawyer", "Merchant",
"Carpenter", "Blacksmith", "Sailor", "Soldier", "Clerk", "Nurse"
]
PLACES = [
"New York", "London", "Paris", "Berlin", "Rome", "Madrid", "Amsterdam",
"Vienna", "Prague", "Warsaw", "Stockholm", "Copenhagen"
]
@dataclass
class EventData:
"""Data structure for an event."""
handle: str
event_type: str
year: int
month: int
day: int
description: str
event_id: int
@dataclass
class PersonData:
"""Data structure for person information."""
handle: str
name: str
surname: str
birth: int
death: Optional[int]
gender: str
parentin: List[str]
childof: List[str]
@dataclass
class FamilyData:
"""Data structure for family information."""
handle: str
father_handle: str
mother_handle: str
children_handles: List[str]
marriage_year: int
marriage_handle: str
family_id: int
def gen_handle(prefix: str, num: int) -> str:
"""Generate unique handle."""
return f"_{prefix}{num:08d}"
def create_event_element(event_data: EventData) -> ET.Element:
"""Create an XML element for an event."""
event_elem = ET.Element("event")
event_elem.set("handle", event_data.handle)
event_elem.set("change", str(int(datetime.now().timestamp())))
event_elem.set("id", f"E{event_data.event_id:04d}")
type_elem = ET.SubElement(event_elem, "type")
type_elem.text = event_data.event_type
date_elem = ET.SubElement(event_elem, "dateval")
date_elem.set("val", f"{event_data.year}-{event_data.month:02d}-{event_data.day:02d}")
if event_data.description:
desc_elem = ET.SubElement(event_elem, "description")
desc_elem.text = event_data.description
return event_elem
def gen_additional_events(
pid: int,
first_name: str,
surname: str,
birth_year: int,
death_year: Optional[int] = None
) -> List[Tuple[str, EventData]]:
"""Generate additional events for a person."""
events: List[Tuple[str, EventData]] = []
event_id_offset = pid * EVENT_ID_OFFSET + EVENT_ID_START_OFFSET
for event_type, probability, min_years, max_years in EVENT_TYPES:
if random.random() > probability:
continue
# Special handling for death-related events
if event_type in ("Burial", "Cremation"):
if not death_year:
continue
event_year = death_year
event_month = random.randint(MIN_MONTH, MAX_MONTH)
event_day = random.randint(MIN_DAY, MAX_DAY)
else:
if max_years is None:
continue
event_year = birth_year + random.randint(min_years, max_years)
if death_year and event_year > death_year:
continue
event_month = random.randint(MIN_MONTH, MAX_MONTH)
event_day = random.randint(MIN_DAY, MAX_DAY)
event_handle = gen_handle("EVENT", event_id_offset)
# Generate description based on event type
if event_type == "Education":
description = f"Education - {first_name} {surname}"
elif event_type == "Graduation":
description = f"Graduation - {first_name} {surname}"
elif event_type == "Occupation":
occupation = random.choice(OCCUPATIONS)
description = f"{occupation} - {first_name} {surname}"
elif event_type == "Military Service":
description = f"Military Service - {first_name} {surname}"
elif event_type == "Residence":
place = random.choice(PLACES)
description = f"Residence in {place} - {first_name} {surname}"
elif event_type == "Emigration":
description = f"Emigration - {first_name} {surname}"
elif event_type == "Immigration":
description = f"Immigration - {first_name} {surname}"
elif event_type == "Retirement":
description = f"Retirement - {first_name} {surname}"
else:
description = f"{event_type} of {surname}, {first_name}"
event_data = EventData(
handle=event_handle,
event_type=event_type,
year=event_year,
month=event_month,
day=event_day,
description=description,
event_id=event_id_offset
)
events.append((event_handle, event_data))
event_id_offset += 1
return events
def gen_person(
pid: int,
first_name: str,
surname: str,
birth_year: int,
death_year: Optional[int] = None,
gender: str = "M",
parentin_families: Optional[List[str]] = None,
childof_families: Optional[List[str]] = None,
reuse_additional_events: Optional[List[Tuple[str, EventData]]] = None
) -> Tuple[ET.Element, ET.Element, Optional[ET.Element], List[ET.Element], List[Tuple[str, EventData]]]:
"""Generate a person with all associated events."""
handle = gen_handle("PERSON", pid)
birth_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET)
death_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + 1) if death_year else None
# Create person element
person_elem = ET.Element("person")
person_elem.set("handle", handle)
person_elem.set("change", str(int(datetime.now().timestamp())))
person_elem.set("id", f"I{pid:04d}")
gender_elem = ET.SubElement(person_elem, "gender")
gender_elem.text = gender
name_elem = ET.SubElement(person_elem, "name")
name_elem.set("type", "Birth Name")
first_elem = ET.SubElement(name_elem, "first")
first_elem.text = first_name
surname_elem = ET.SubElement(name_elem, "surname")
surname_elem.text = surname
# Birth event reference
birth_ref = ET.SubElement(person_elem, "eventref")
birth_ref.set("hlink", birth_handle)
birth_ref.set("role", "Primary")
# Death event reference
if death_handle:
death_ref = ET.SubElement(person_elem, "eventref")
death_ref.set("hlink", death_handle)
death_ref.set("role", "Primary")
# Add additional events - reuse if provided, otherwise generate new
if reuse_additional_events is not None:
additional_events = reuse_additional_events
else:
additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year)
for event_handle, _ in additional_events:
event_ref = ET.SubElement(person_elem, "eventref")
event_ref.set("hlink", event_handle)
event_ref.set("role", "Primary")
# Add parentin references
if parentin_families:
for family_handle in parentin_families:
parentin_elem = ET.SubElement(person_elem, "parentin")
parentin_elem.set("hlink", family_handle)
# Add childof references
if childof_families:
for family_handle in childof_families:
childof_elem = ET.SubElement(person_elem, "childof")
childof_elem.set("hlink", family_handle)
# Birth event
birth_month = random.randint(MIN_MONTH, MAX_MONTH)
birth_day = random.randint(MIN_DAY, MAX_DAY)
birth_event_data = EventData(
handle=birth_handle,
event_type="Birth",
year=birth_year,
month=birth_month,
day=birth_day,
description=f"Birth of {surname}, {first_name}",
event_id=pid * EVENT_ID_OFFSET
)
birth_event = create_event_element(birth_event_data)
# Death event
death_event: Optional[ET.Element] = None
if death_handle and death_year:
death_month = random.randint(MIN_MONTH, MAX_MONTH)
death_day = random.randint(MIN_DAY, MAX_DAY)
death_event_data = EventData(
handle=death_handle,
event_type="Death",
year=death_year,
month=death_month,
day=death_day,
description=f"Death of {surname}, {first_name}",
event_id=pid * EVENT_ID_OFFSET + 1
)
death_event = create_event_element(death_event_data)
# Convert additional events to XML elements
all_additional_events_xml = [create_event_element(event_data) for _, event_data in additional_events]
return person_elem, birth_event, death_event, all_additional_events_xml, additional_events
def gen_family(
fid: int,
father_handle: str,
mother_handle: str,
marriage_year: int,
children_handles: List[str]
) -> Tuple[ET.Element, ET.Element]:
"""Generate a family with marriage event."""
handle = gen_handle("FAMILY", fid)
marriage_handle = gen_handle("EVENT", fid * FAMILY_ID_OFFSET)
# Create family element
family_elem = ET.Element("family")
family_elem.set("handle", handle)
family_elem.set("change", str(int(datetime.now().timestamp())))
family_elem.set("id", f"F{fid:04d}")
rel_elem = ET.SubElement(family_elem, "rel")
rel_elem.set("type", "Married")
father_elem = ET.SubElement(family_elem, "father")
father_elem.set("hlink", father_handle)
mother_elem = ET.SubElement(family_elem, "mother")
mother_elem.set("hlink", mother_handle)
for child_handle in children_handles:
child_elem = ET.SubElement(family_elem, "childref")
child_elem.set("hlink", child_handle)
marriage_ref = ET.SubElement(family_elem, "eventref")
marriage_ref.set("hlink", marriage_handle)
marriage_ref.set("role", "Family")
# Marriage event
marriage_month = random.randint(MIN_MONTH, MAX_MONTH)
marriage_day = random.randint(MIN_DAY, MAX_DAY)
marriage_event_data = EventData(
handle=marriage_handle,
event_type="Marriage",
year=marriage_year,
month=marriage_month,
day=marriage_day,
description="Marriage",
event_id=fid * FAMILY_ID_OFFSET
)
marriage_event = create_event_element(marriage_event_data)
return family_elem, marriage_event
def create_gramps_xml_document(
events: List[ET.Element],
people: List[ET.Element],
families: List[ET.Element]
) -> ET.ElementTree:
"""Create the complete Gramps XML document."""
# Create root element
database = ET.Element("database")
database.set("xmlns", GRAMPS_XML_NAMESPACE)
# Header
header = ET.SubElement(database, "header")
created = ET.SubElement(header, "created")
created.set("date", datetime.now().strftime('%Y-%m-%d'))
created.set("version", GRAMPS_XML_VERSION)
researcher = ET.SubElement(header, "researcher")
resname = ET.SubElement(researcher, "resname")
resname.text = "Demo Family Generator"
# Tags (empty)
ET.SubElement(database, "tags")
# Events
events_elem = ET.SubElement(database, "events")
for event in events:
events_elem.append(event)
# People
people_elem = ET.SubElement(database, "people")
for person in people:
people_elem.append(person)
# Families
families_elem = ET.SubElement(database, "families")
for family in families:
families_elem.append(family)
return ET.ElementTree(database)
def main() -> None:
"""Main function to generate the demo family."""
print("Generating huge demo family...")
# Generate main family
# Father: John Smith, born 1950, died 2010
father_id = 1
father_handle = gen_handle("PERSON", father_id)
main_family_handle = gen_handle("FAMILY", 1)
father_person, father_birth, father_death, father_additional_xml, _ = gen_person(
father_id, "John", "Smith", 1950, 2010, "M",
parentin_families=[main_family_handle]
)
# Mother: Mary Smith, born 1952, died 2015
mother_id = 2
mother_handle = gen_handle("PERSON", mother_id)
mother_person, mother_birth, mother_death, mother_additional_xml, _ = gen_person(
mother_id, "Mary", "Smith", 1952, 2015, "F",
parentin_families=[main_family_handle]
)
all_additional_events = father_additional_xml + mother_additional_xml
all_events = [father_birth, mother_birth]
if father_death:
all_events.append(father_death)
if mother_death:
all_events.append(mother_death)
# Generate 15 children
children: List[ET.Element] = []
child_handles: List[str] = []
child_additional_events_map: Dict[int, List[Tuple[str, EventData]]] = {}
child_id = 3
for i in range(15):
gender = "M" if i % 2 == 0 else "F"
first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES)
birth_year = 1970 + (i * 2) # Spread births from 1970 to 1998
death_year = birth_year + random.randint(60, 90) if random.random() < 0.3 else None # 30% chance of death
child_handle = gen_handle("PERSON", child_id)
child_person, child_birth, child_death, child_additional_xml, child_additional_tuples = gen_person(
child_id, first_name, "Smith", birth_year, death_year, gender,
childof_families=[main_family_handle]
)
children.append(child_person)
child_handles.append(child_handle)
all_events.append(child_birth)
if child_death:
all_events.append(child_death)
# Store tuples for reuse when regenerating
child_additional_events_map[child_id] = child_additional_tuples
all_additional_events.extend(child_additional_xml)
child_id += 1
# Generate family
family_id = 1
family_elem, marriage_event = gen_family(family_id, father_handle, mother_handle, 1969, child_handles)
all_events.append(marriage_event)
families: List[ET.Element] = [family_elem]
# Track person data for regeneration (needed for children who become parents)
person_data: Dict[int, PersonData] = {}
# Store initial person data
person_data[father_id] = PersonData(
handle=father_handle,
name="John",
surname="Smith",
birth=1950,
death=2010,
gender="M",
parentin=[main_family_handle],
childof=[]
)
person_data[mother_id] = PersonData(
handle=mother_handle,
name="Mary",
surname="Smith",
birth=1952,
death=2015,
gender="F",
parentin=[main_family_handle],
childof=[]
)
for i, child_handle in enumerate(child_handles):
child_pid = 3 + i
gender = "M" if i % 2 == 0 else "F"
# Extract name from generated child XML
name_elem = children[i].find(".//first")
first_name = name_elem.text if name_elem is not None and name_elem.text else random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES)
birth_year = 1970 + (i * 2)
# Extract death year from events if it exists
death_year = None
for event in all_events:
if event.get("id") == f"E{child_pid * EVENT_ID_OFFSET + 1:04d}":
date_elem = event.find(".//dateval")
if date_elem is not None:
date_val = date_elem.get("val", "")
if date_val:
try:
death_year = int(date_val.split("-")[0])
except (ValueError, IndexError):
pass
person_data[child_pid] = PersonData(
handle=child_handle,
name=first_name,
surname="Smith",
birth=birth_year,
death=death_year,
gender=gender,
parentin=[],
childof=[main_family_handle]
)
# Generate grandchildren (children of first 5 children)
grandchildren: List[ET.Element] = []
grandchild_id = child_id
for i in range(5): # First 5 children have children
parent_handle = child_handles[i]
parent_pid = 3 + i
parent_gender = "M" if i % 2 == 0 else "F"
spouse_gender = "F" if parent_gender == "M" else "M"
# Create spouse
spouse_name = random.choice(FEMALE_NAMES if spouse_gender == "F" else MALE_NAMES)
spouse_birth = 1970 + (i * 2) + random.randint(-2, 2)
spouse_handle = gen_handle("PERSON", grandchild_id)
child_family_handle = gen_handle("FAMILY", family_id + 1)
person_data[grandchild_id] = PersonData(
handle=spouse_handle,
name=spouse_name,
surname="Smith",
birth=spouse_birth,
death=None,
gender=spouse_gender,
parentin=[child_family_handle],
childof=[]
)
spouse_person, spouse_birth_event, spouse_death_event, spouse_additional_xml, _ = gen_person(
grandchild_id, spouse_name, "Smith", spouse_birth, None, spouse_gender,
parentin_families=[child_family_handle]
)
grandchildren.append(spouse_person)
all_events.append(spouse_birth_event)
if spouse_death_event:
all_events.append(spouse_death_event)
all_additional_events.extend(spouse_additional_xml)
grandchild_id += 1
# Update parent to include parentin reference
person_data[parent_pid].parentin.append(child_family_handle)
# Create 3-5 children per couple
num_grandchildren = random.randint(3, 5)
grandchild_handles: List[str] = []
for j in range(num_grandchildren):
gchild_gender = "M" if j % 2 == 0 else "F"
gchild_name = random.choice(MALE_NAMES if gchild_gender == "M" else FEMALE_NAMES)
gchild_birth = 1995 + (i * 3) + j
gchild_handle = gen_handle("PERSON", grandchild_id)
person_data[grandchild_id] = PersonData(
handle=gchild_handle,
name=gchild_name,
surname="Smith",
birth=gchild_birth,
death=None,
gender=gchild_gender,
parentin=[],
childof=[child_family_handle]
)
gchild_person, gchild_birth_event, gchild_death_event, gchild_additional_xml, _ = gen_person(
grandchild_id, gchild_name, "Smith", gchild_birth, None, gchild_gender,
childof_families=[child_family_handle]
)
grandchildren.append(gchild_person)
grandchild_handles.append(gchild_handle)
all_events.append(gchild_birth_event)
if gchild_death_event:
all_events.append(gchild_death_event)
all_additional_events.extend(gchild_additional_xml)
grandchild_id += 1
# Create family for this couple
family_id += 1
fam_elem, fam_marriage = gen_family(family_id, parent_handle, spouse_handle, 1990 + i, grandchild_handles)
families.append(fam_elem)
all_events.append(fam_marriage)
# Regenerate children XMLs with updated family references
# We need to regenerate to update family references, but reuse the same events
children = []
for i, child_handle in enumerate(child_handles):
child_pid = 3 + i
data = person_data[child_pid]
# Reuse the original additional events to ensure consistency
original_additional_events = child_additional_events_map.get(child_pid, [])
child_person, _, _, _, _ = gen_person(
child_pid, data.name, data.surname, data.birth, data.death, data.gender,
parentin_families=data.parentin, childof_families=data.childof,
reuse_additional_events=original_additional_events
)
children.append(child_person)
# Add all additional events to events list
all_events.extend(all_additional_events)
# Create complete XML document
people = [father_person, mother_person] + children + grandchildren
tree = create_gramps_xml_document(all_events, people, families)
# Write XML file with proper formatting
# ET.indent is only available in Python 3.9+, so we'll format manually if needed
try:
ET.indent(tree, space=" ")
except AttributeError:
# Python < 3.9 doesn't have indent, will write without indentation
pass
tree.write("demo_family.gramps", encoding="utf-8", xml_declaration=True)
# Add DOCTYPE declaration (ElementTree doesn't support this directly)
with open("demo_family.gramps", "r", encoding="utf-8") as f:
content = f.read()
# Insert DOCTYPE after XML declaration
doctype = f'<!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN"\n"{GRAMPS_XML_DTD}">\n'
content = content.replace('<?xml version="1.0" encoding="utf-8"?>',
f'<?xml version="1.0" encoding="UTF-8"?>\n{doctype}', 1)
with open("demo_family.gramps", "w", encoding="utf-8") as f:
f.write(content)
total_events = len(all_events)
print(f"Generated demo_family.gramps with:")
print(f" - 2 parents (John and Mary Smith)")
print(f" - 15 children")
print(f" - 5 spouses")
print(f" - ~20 grandchildren")
print(f" - Multiple families with marriage events")
print(f" - Birth and death events for all")
print(f" - {len(all_additional_events)} additional events (Baptism, Education, Occupation, etc.)")
print(f" - Total events: {total_events}")
if __name__ == "__main__":
main()