#!/usr/bin/env python3 """ Generate a huge demo family for Gramps testing """ import random import xml.etree.ElementTree as ET from dataclasses import dataclass from datetime import datetime from typing import Optional, List, Tuple, Dict # Set seed for deterministic event generation random.seed(42) # Constants EVENT_ID_OFFSET = 10 FAMILY_ID_OFFSET = 100 EVENT_ID_START_OFFSET = 2 MIN_MONTH = 1 MAX_MONTH = 12 MIN_DAY = 1 MAX_DAY = 28 GRAMPS_XML_VERSION = "5.1.0" GRAMPS_XML_NAMESPACE = "http://gramps-project.org/xml/1.7.1/" GRAMPS_XML_DTD = "http://gramps-project.org/xml/1.7.1/grampsxml.dtd" # Event types to add EVENT_TYPES = [ ("Baptism", 0.7, 0, 2), # 70% chance, 0-2 years after birth ("Christening", 0.5, 0, 1), # 50% chance, 0-1 years after birth ("Education", 0.8, 5, 18), # 80% chance, 5-18 years after birth ("Graduation", 0.6, 18, 25), # 60% chance, 18-25 years after birth ("Occupation", 0.9, 18, 65), # 90% chance, 18-65 years after birth ("Military Service", 0.3, 18, 30), # 30% chance, 18-30 years after birth ("Residence", 0.7, 0, 80), # 70% chance, any time ("Emigration", 0.2, 20, 50), # 20% chance, 20-50 years after birth ("Immigration", 0.15, 20, 50), # 15% chance, 20-50 years after birth ("Retirement", 0.4, 60, 75), # 40% chance, 60-75 years after birth ("Burial", 0.6, None, None), # 60% chance if death exists, at death time ("Cremation", 0.2, None, None), # 20% chance if death exists, at death time ] # Name lists MALE_NAMES = [ "James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph", "Thomas", "Charles", "Daniel", "Matthew", "Anthony", "Mark", "Donald", "Steven", "Paul", "Andrew", "Joshua", "Kenneth", "Kevin", "Brian", "George", "Timothy", "Ronald", "Jason", "Edward", "Jeffrey", "Ryan", "Jacob", "Gary", "Nicholas", "Eric", "Jonathan", "Stephen", "Larry", "Justin", "Scott", "Brandon", "Benjamin" ] FEMALE_NAMES = [ "Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan", "Jessica", "Sarah", "Karen", "Nancy", "Lisa", "Betty", "Margaret", "Sandra", "Ashley", "Kimberly", "Emily", "Donna", "Michelle", "Dorothy", "Carol", "Amanda", "Melissa", "Deborah", "Stephanie", "Rebecca", "Sharon", "Laura", "Cynthia", "Kathleen", "Amy", "Angela", "Shirley", "Anna", "Brenda", "Pamela", "Emma", "Nicole", "Helen", "Samantha", "Katherine", "Christine", "Debra" ] SURNAMES = [ "Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin", "Lee", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", "Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", "Green", "Adams" ] OCCUPATIONS = [ "Farmer", "Teacher", "Engineer", "Doctor", "Lawyer", "Merchant", "Carpenter", "Blacksmith", "Sailor", "Soldier", "Clerk", "Nurse" ] PLACES = [ "New York", "London", "Paris", "Berlin", "Rome", "Madrid", "Amsterdam", "Vienna", "Prague", "Warsaw", "Stockholm", "Copenhagen" ] @dataclass class EventData: """Data structure for an event.""" handle: str event_type: str year: int month: int day: int description: str event_id: int @dataclass class PersonData: """Data structure for person information.""" handle: str name: str surname: str birth: int death: Optional[int] gender: str parentin: List[str] childof: List[str] @dataclass class FamilyData: """Data structure for family information.""" handle: str father_handle: str mother_handle: str children_handles: List[str] marriage_year: int marriage_handle: str family_id: int def gen_handle(prefix: str, num: int) -> str: """Generate unique handle.""" return f"_{prefix}{num:08d}" def create_event_element(event_data: EventData) -> ET.Element: """Create an XML element for an event.""" event_elem = ET.Element("event") event_elem.set("handle", event_data.handle) event_elem.set("change", str(int(datetime.now().timestamp()))) event_elem.set("id", f"E{event_data.event_id:04d}") type_elem = ET.SubElement(event_elem, "type") type_elem.text = event_data.event_type date_elem = ET.SubElement(event_elem, "dateval") date_elem.set("val", f"{event_data.year}-{event_data.month:02d}-{event_data.day:02d}") if event_data.description: desc_elem = ET.SubElement(event_elem, "description") desc_elem.text = event_data.description return event_elem def gen_additional_events( pid: int, first_name: str, surname: str, birth_year: int, death_year: Optional[int] = None ) -> List[Tuple[str, EventData]]: """Generate additional events for a person.""" events: List[Tuple[str, EventData]] = [] event_id_offset = pid * EVENT_ID_OFFSET + EVENT_ID_START_OFFSET for event_type, probability, min_years, max_years in EVENT_TYPES: if random.random() > probability: continue # Special handling for death-related events if event_type in ("Burial", "Cremation"): if not death_year: continue event_year = death_year event_month = random.randint(MIN_MONTH, MAX_MONTH) event_day = random.randint(MIN_DAY, MAX_DAY) else: if max_years is None: continue event_year = birth_year + random.randint(min_years, max_years) if death_year and event_year > death_year: continue event_month = random.randint(MIN_MONTH, MAX_MONTH) event_day = random.randint(MIN_DAY, MAX_DAY) event_handle = gen_handle("EVENT", event_id_offset) # Generate description based on event type if event_type == "Education": description = f"Education - {first_name} {surname}" elif event_type == "Graduation": description = f"Graduation - {first_name} {surname}" elif event_type == "Occupation": occupation = random.choice(OCCUPATIONS) description = f"{occupation} - {first_name} {surname}" elif event_type == "Military Service": description = f"Military Service - {first_name} {surname}" elif event_type == "Residence": place = random.choice(PLACES) description = f"Residence in {place} - {first_name} {surname}" elif event_type == "Emigration": description = f"Emigration - {first_name} {surname}" elif event_type == "Immigration": description = f"Immigration - {first_name} {surname}" elif event_type == "Retirement": description = f"Retirement - {first_name} {surname}" else: description = f"{event_type} of {surname}, {first_name}" event_data = EventData( handle=event_handle, event_type=event_type, year=event_year, month=event_month, day=event_day, description=description, event_id=event_id_offset ) events.append((event_handle, event_data)) event_id_offset += 1 return events def gen_person( pid: int, first_name: str, surname: str, birth_year: int, death_year: Optional[int] = None, gender: str = "M", parentin_families: Optional[List[str]] = None, childof_families: Optional[List[str]] = None, reuse_additional_events: Optional[List[Tuple[str, EventData]]] = None ) -> Tuple[ET.Element, ET.Element, Optional[ET.Element], List[ET.Element], List[Tuple[str, EventData]]]: """Generate a person with all associated events.""" handle = gen_handle("PERSON", pid) birth_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET) death_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + 1) if death_year else None # Create person element person_elem = ET.Element("person") person_elem.set("handle", handle) person_elem.set("change", str(int(datetime.now().timestamp()))) person_elem.set("id", f"I{pid:04d}") gender_elem = ET.SubElement(person_elem, "gender") gender_elem.text = gender name_elem = ET.SubElement(person_elem, "name") name_elem.set("type", "Birth Name") first_elem = ET.SubElement(name_elem, "first") first_elem.text = first_name surname_elem = ET.SubElement(name_elem, "surname") surname_elem.text = surname # Birth event reference birth_ref = ET.SubElement(person_elem, "eventref") birth_ref.set("hlink", birth_handle) birth_ref.set("role", "Primary") # Death event reference if death_handle: death_ref = ET.SubElement(person_elem, "eventref") death_ref.set("hlink", death_handle) death_ref.set("role", "Primary") # Add additional events - reuse if provided, otherwise generate new if reuse_additional_events is not None: additional_events = reuse_additional_events else: additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year) for event_handle, _ in additional_events: event_ref = ET.SubElement(person_elem, "eventref") event_ref.set("hlink", event_handle) event_ref.set("role", "Primary") # Add parentin references if parentin_families: for family_handle in parentin_families: parentin_elem = ET.SubElement(person_elem, "parentin") parentin_elem.set("hlink", family_handle) # Add childof references if childof_families: for family_handle in childof_families: childof_elem = ET.SubElement(person_elem, "childof") childof_elem.set("hlink", family_handle) # Birth event birth_month = random.randint(MIN_MONTH, MAX_MONTH) birth_day = random.randint(MIN_DAY, MAX_DAY) birth_event_data = EventData( handle=birth_handle, event_type="Birth", year=birth_year, month=birth_month, day=birth_day, description=f"Birth of {surname}, {first_name}", event_id=pid * EVENT_ID_OFFSET ) birth_event = create_event_element(birth_event_data) # Death event death_event: Optional[ET.Element] = None if death_handle and death_year: death_month = random.randint(MIN_MONTH, MAX_MONTH) death_day = random.randint(MIN_DAY, MAX_DAY) death_event_data = EventData( handle=death_handle, event_type="Death", year=death_year, month=death_month, day=death_day, description=f"Death of {surname}, {first_name}", event_id=pid * EVENT_ID_OFFSET + 1 ) death_event = create_event_element(death_event_data) # Convert additional events to XML elements all_additional_events_xml = [create_event_element(event_data) for _, event_data in additional_events] return person_elem, birth_event, death_event, all_additional_events_xml, additional_events def gen_family( fid: int, father_handle: str, mother_handle: str, marriage_year: int, children_handles: List[str] ) -> Tuple[ET.Element, ET.Element]: """Generate a family with marriage event.""" handle = gen_handle("FAMILY", fid) marriage_handle = gen_handle("EVENT", fid * FAMILY_ID_OFFSET) # Create family element family_elem = ET.Element("family") family_elem.set("handle", handle) family_elem.set("change", str(int(datetime.now().timestamp()))) family_elem.set("id", f"F{fid:04d}") rel_elem = ET.SubElement(family_elem, "rel") rel_elem.set("type", "Married") father_elem = ET.SubElement(family_elem, "father") father_elem.set("hlink", father_handle) mother_elem = ET.SubElement(family_elem, "mother") mother_elem.set("hlink", mother_handle) for child_handle in children_handles: child_elem = ET.SubElement(family_elem, "childref") child_elem.set("hlink", child_handle) marriage_ref = ET.SubElement(family_elem, "eventref") marriage_ref.set("hlink", marriage_handle) marriage_ref.set("role", "Family") # Marriage event marriage_month = random.randint(MIN_MONTH, MAX_MONTH) marriage_day = random.randint(MIN_DAY, MAX_DAY) marriage_event_data = EventData( handle=marriage_handle, event_type="Marriage", year=marriage_year, month=marriage_month, day=marriage_day, description="Marriage", event_id=fid * FAMILY_ID_OFFSET ) marriage_event = create_event_element(marriage_event_data) return family_elem, marriage_event def create_gramps_xml_document( events: List[ET.Element], people: List[ET.Element], families: List[ET.Element] ) -> ET.ElementTree: """Create the complete Gramps XML document.""" # Create root element database = ET.Element("database") database.set("xmlns", GRAMPS_XML_NAMESPACE) # Header header = ET.SubElement(database, "header") created = ET.SubElement(header, "created") created.set("date", datetime.now().strftime('%Y-%m-%d')) created.set("version", GRAMPS_XML_VERSION) researcher = ET.SubElement(header, "researcher") resname = ET.SubElement(researcher, "resname") resname.text = "Demo Family Generator" # Tags (empty) ET.SubElement(database, "tags") # Events events_elem = ET.SubElement(database, "events") for event in events: events_elem.append(event) # People people_elem = ET.SubElement(database, "people") for person in people: people_elem.append(person) # Families families_elem = ET.SubElement(database, "families") for family in families: families_elem.append(family) return ET.ElementTree(database) def main() -> None: """Main function to generate the demo family.""" print("Generating huge demo family...") # Generate main family # Father: John Smith, born 1950, died 2010 father_id = 1 father_handle = gen_handle("PERSON", father_id) main_family_handle = gen_handle("FAMILY", 1) father_person, father_birth, father_death, father_additional_xml, _ = gen_person( father_id, "John", "Smith", 1950, 2010, "M", parentin_families=[main_family_handle] ) # Mother: Mary Smith, born 1952, died 2015 mother_id = 2 mother_handle = gen_handle("PERSON", mother_id) mother_person, mother_birth, mother_death, mother_additional_xml, _ = gen_person( mother_id, "Mary", "Smith", 1952, 2015, "F", parentin_families=[main_family_handle] ) all_additional_events = father_additional_xml + mother_additional_xml all_events = [father_birth, mother_birth] if father_death: all_events.append(father_death) if mother_death: all_events.append(mother_death) # Generate 15 children children: List[ET.Element] = [] child_handles: List[str] = [] child_additional_events_map: Dict[int, List[Tuple[str, EventData]]] = {} child_id = 3 for i in range(15): gender = "M" if i % 2 == 0 else "F" first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES) birth_year = 1970 + (i * 2) # Spread births from 1970 to 1998 death_year = birth_year + random.randint(60, 90) if random.random() < 0.3 else None # 30% chance of death child_handle = gen_handle("PERSON", child_id) child_person, child_birth, child_death, child_additional_xml, child_additional_tuples = gen_person( child_id, first_name, "Smith", birth_year, death_year, gender, childof_families=[main_family_handle] ) children.append(child_person) child_handles.append(child_handle) all_events.append(child_birth) if child_death: all_events.append(child_death) # Store tuples for reuse when regenerating child_additional_events_map[child_id] = child_additional_tuples all_additional_events.extend(child_additional_xml) child_id += 1 # Generate family family_id = 1 family_elem, marriage_event = gen_family(family_id, father_handle, mother_handle, 1969, child_handles) all_events.append(marriage_event) families: List[ET.Element] = [family_elem] # Track person data for regeneration (needed for children who become parents) person_data: Dict[int, PersonData] = {} # Store initial person data person_data[father_id] = PersonData( handle=father_handle, name="John", surname="Smith", birth=1950, death=2010, gender="M", parentin=[main_family_handle], childof=[] ) person_data[mother_id] = PersonData( handle=mother_handle, name="Mary", surname="Smith", birth=1952, death=2015, gender="F", parentin=[main_family_handle], childof=[] ) for i, child_handle in enumerate(child_handles): child_pid = 3 + i gender = "M" if i % 2 == 0 else "F" # Extract name from generated child XML name_elem = children[i].find(".//first") first_name = name_elem.text if name_elem is not None and name_elem.text else random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES) birth_year = 1970 + (i * 2) # Extract death year from events if it exists death_year = None for event in all_events: if event.get("id") == f"E{child_pid * EVENT_ID_OFFSET + 1:04d}": date_elem = event.find(".//dateval") if date_elem is not None: date_val = date_elem.get("val", "") if date_val: try: death_year = int(date_val.split("-")[0]) except (ValueError, IndexError): pass person_data[child_pid] = PersonData( handle=child_handle, name=first_name, surname="Smith", birth=birth_year, death=death_year, gender=gender, parentin=[], childof=[main_family_handle] ) # Generate grandchildren (children of first 5 children) grandchildren: List[ET.Element] = [] grandchild_id = child_id for i in range(5): # First 5 children have children parent_handle = child_handles[i] parent_pid = 3 + i parent_gender = "M" if i % 2 == 0 else "F" spouse_gender = "F" if parent_gender == "M" else "M" # Create spouse spouse_name = random.choice(FEMALE_NAMES if spouse_gender == "F" else MALE_NAMES) spouse_birth = 1970 + (i * 2) + random.randint(-2, 2) spouse_handle = gen_handle("PERSON", grandchild_id) child_family_handle = gen_handle("FAMILY", family_id + 1) person_data[grandchild_id] = PersonData( handle=spouse_handle, name=spouse_name, surname="Smith", birth=spouse_birth, death=None, gender=spouse_gender, parentin=[child_family_handle], childof=[] ) spouse_person, spouse_birth_event, spouse_death_event, spouse_additional_xml, _ = gen_person( grandchild_id, spouse_name, "Smith", spouse_birth, None, spouse_gender, parentin_families=[child_family_handle] ) grandchildren.append(spouse_person) all_events.append(spouse_birth_event) if spouse_death_event: all_events.append(spouse_death_event) all_additional_events.extend(spouse_additional_xml) grandchild_id += 1 # Update parent to include parentin reference person_data[parent_pid].parentin.append(child_family_handle) # Create 3-5 children per couple num_grandchildren = random.randint(3, 5) grandchild_handles: List[str] = [] for j in range(num_grandchildren): gchild_gender = "M" if j % 2 == 0 else "F" gchild_name = random.choice(MALE_NAMES if gchild_gender == "M" else FEMALE_NAMES) gchild_birth = 1995 + (i * 3) + j gchild_handle = gen_handle("PERSON", grandchild_id) person_data[grandchild_id] = PersonData( handle=gchild_handle, name=gchild_name, surname="Smith", birth=gchild_birth, death=None, gender=gchild_gender, parentin=[], childof=[child_family_handle] ) gchild_person, gchild_birth_event, gchild_death_event, gchild_additional_xml, _ = gen_person( grandchild_id, gchild_name, "Smith", gchild_birth, None, gchild_gender, childof_families=[child_family_handle] ) grandchildren.append(gchild_person) grandchild_handles.append(gchild_handle) all_events.append(gchild_birth_event) if gchild_death_event: all_events.append(gchild_death_event) all_additional_events.extend(gchild_additional_xml) grandchild_id += 1 # Create family for this couple family_id += 1 fam_elem, fam_marriage = gen_family(family_id, parent_handle, spouse_handle, 1990 + i, grandchild_handles) families.append(fam_elem) all_events.append(fam_marriage) # Regenerate children XMLs with updated family references # We need to regenerate to update family references, but reuse the same events children = [] for i, child_handle in enumerate(child_handles): child_pid = 3 + i data = person_data[child_pid] # Reuse the original additional events to ensure consistency original_additional_events = child_additional_events_map.get(child_pid, []) child_person, _, _, _, _ = gen_person( child_pid, data.name, data.surname, data.birth, data.death, data.gender, parentin_families=data.parentin, childof_families=data.childof, reuse_additional_events=original_additional_events ) children.append(child_person) # Add all additional events to events list all_events.extend(all_additional_events) # Create complete XML document people = [father_person, mother_person] + children + grandchildren tree = create_gramps_xml_document(all_events, people, families) # Write XML file with proper formatting # ET.indent is only available in Python 3.9+, so we'll format manually if needed try: ET.indent(tree, space=" ") except AttributeError: # Python < 3.9 doesn't have indent, will write without indentation pass tree.write("demo_family.gramps", encoding="utf-8", xml_declaration=True) # Add DOCTYPE declaration (ElementTree doesn't support this directly) with open("demo_family.gramps", "r", encoding="utf-8") as f: content = f.read() # Insert DOCTYPE after XML declaration doctype = f'\n' content = content.replace('', f'\n{doctype}', 1) with open("demo_family.gramps", "w", encoding="utf-8") as f: f.write(content) total_events = len(all_events) print(f"Generated demo_family.gramps with:") print(f" - 2 parents (John and Mary Smith)") print(f" - 15 children") print(f" - 5 spouses") print(f" - ~20 grandchildren") print(f" - Multiple families with marriage events") print(f" - Birth and death events for all") print(f" - {len(all_additional_events)} additional events (Baptism, Education, Occupation, etc.)") print(f" - Total events: {total_events}") if __name__ == "__main__": main()