#!/usr/bin/env python3 """ Generate a realistic medieval German family database for Gramps testing. Covers High Medieval period (1000-1300 AD) with 5 generations. """ import random import xml.etree.ElementTree as ET import os import urllib.request import urllib.error import urllib.parse from dataclasses import dataclass from datetime import datetime from typing import Optional, List, Tuple, Dict, Set # Set seed for deterministic generation random.seed(42) # Constants EVENT_ID_OFFSET = 10 FAMILY_ID_OFFSET = 100 EVENT_ID_START_OFFSET = 2 MIN_MONTH = 1 MAX_MONTH = 12 MIN_DAY = 1 MAX_DAY = 28 GRAMPS_XML_VERSION = "5.1.0" GRAMPS_XML_NAMESPACE = "http://gramps-project.org/xml/1.7.1/" GRAMPS_XML_DTD = "http://gramps-project.org/xml/1.7.1/grampsxml.dtd" # Portrait generation constants PORTRAITS_DIR = "portraits" DICEBEAR_API_BASE = "https://api.dicebear.com/7.x/avataaars/svg" # Medieval German male names (High Medieval period, 1000-1300 AD) MALE_NAMES = [ "Heinrich", "Konrad", "Friedrich", "Albrecht", "Gottfried", "Ludwig", "Rudolf", "Otto", "Berthold", "Werner", "Ulrich", "Hartmann", "Eberhard", "Gerhard", "Dietrich", "Arnold", "Hermann", "Bruno", "Adalbert", "Gunther", "Siegfried", "Wolfgang", "Reinhard", "Burchard", "Gebhard", "Volkmar", "Wigand", "Baldwin", "Gunzelin", "Ekkehard", "Bernhard", "Hildebrand", "Ruprecht", "Wilhelm", "Johann", "Matthias", "Philipp", "Markward", "Hartwig", "Lothar", "Theoderich", "Wichmann", "Thiemo", "Gebhard", "Adalbero", "Gero", "Wichard", "Widukind", "Hugo", "Lambert" ] # Medieval German female names (High Medieval period, 1000-1300 AD) FEMALE_NAMES = [ "Adelheid", "Gertrud", "Margarete", "Elisabeth", "Katharina", "Anna", "Agnes", "Hedwig", "Mechthild", "Beatrix", "Sophia", "Mathilde", "Irmgard", "Gisela", "Kunigunde", "Helena", "Brigitta", "Ursula", "Barbara", "Dorothea", "Christina", "Petronilla", "Richza", "Judith", "Euphemia", "Cecilia", "Gertrude", "Ida", "Luitgard", "Oda", "Adela", "Bertha", "Ermengard", "Hildegard", "Imelda", "Jutta", "Liutgard", "Mechtilde", "Odilia", "Regina", "Walburga", "Wiborada", "Yolanda" ] # Medieval German surnames (High Medieval period) SURNAMES = [ # Noble families "von Habsburg", "von Hohenstaufen", "von Wittelsbach", "von Babenberg", "von Zähringen", "von Wettin", "von Ascania", "von Welf", # Common surnames (occupational, descriptive) "Müller", "Schmidt", "Weber", "Fischer", "Meyer", "Wagner", "Becker", "Schulz", "Hoffmann", "Schäfer", "Koch", "Bauer", "Richter", "Klein", "Wolf", "Schröder", "Neumann", "Schwarz", "Zimmermann", "Braun", "Krüger", "Hofmann", "Hartmann", "Lange", "Schmitt", "Werner", "Schmitz", "Krause", "Meier", "Lehmann", "Schmid", "Schulze", "Maier", "Köhler", "Herrmann", "König", "Walter", "Huber", "Mayer", "Fuchs", "Peters", "Lang", "Scholz", "Möller", "Weiß", "Jung", "Hahn", "Schubert", "Vogel", "Friedrich" ] # Medieval German cities and places (High Medieval period) PLACES = [ "Köln", "Mainz", "Trier", "Augsburg", "Regensburg", "Worms", "Speyer", "Straßburg", "Würzburg", "Bamberg", "Fulda", "Magdeburg", "Halle", "Erfurt", "Nürnberg", "Ulm", "Freiburg", "Basel", "Konstanz", "Lübeck", "Hamburg", "Bremen", "Braunschweig", "Goslar", "Quedlinburg", "Merseburg", "Naumburg", "Meißen", "Dresden", "Leipzig", "Frankfurt", "Heidelberg", "Rothenburg", "Dinkelsbühl", "Nördlingen", "Esslingen", "Reutlingen", "Ravensburg", "Memmingen", "Kempten", "Lindau", "Überlingen" ] # Medieval occupations OCCUPATIONS = [ # Nobility and military "Ritter", "Knappe", "Burgvogt", "Landvogt", "Marschall", # Clergy "Priester", "Mönch", "Nonne", "Abt", "Äbtissin", "Bischof", # Craftsmen "Schmied", "Schneider", "Schuster", "Bäcker", "Müller", "Bauer", "Zimmermann", "Maurer", "Tischler", "Schreiner", "Färber", "Gerber", "Kürschner", "Seiler", "Böttcher", "Drechsler", "Schlosser", # Merchants and traders "Kaufmann", "Händler", "Krämer", "Gewürzhändler", "Tuchhändler", # Other "Jäger", "Fischer", "Schäfer", "Hirte", "Knecht", "Magd", "Wirt", "Koch", "Bader", "Barbier", "Arzt", "Apotheker" ] # Medieval event types with probabilities and age ranges EVENT_TYPES = [ ("Baptism", 0.95, 0, 1), # 95% chance, 0-1 years after birth (very common) ("Christening", 0.80, 0, 1), # 80% chance, 0-1 years after birth ("Confirmation", 0.60, 12, 16), # 60% chance, 12-16 years after birth ("Education", 0.30, 7, 14), # 30% chance (mostly for nobility/wealthy), 7-14 years ("Occupation", 0.85, 14, 50), # 85% chance, 14-50 years after birth ("Military Service", 0.40, 18, 40), # 40% chance for males, 18-40 years ("Residence", 0.70, 0, 80), # 70% chance, any time ("Marriage", 0.90, 18, 35), # 90% chance, 18-35 years (very common) ("Burial", 0.80, None, None), # 80% chance if death exists, at death time ("Cremation", 0.05, None, None), # 5% chance if death exists (rare in medieval times) ] # Generation configuration NUM_GENERATIONS = 5 MIN_CHILDREN_PER_FAMILY = 6 MAX_CHILDREN_PER_FAMILY = 12 GENERATION_START_YEAR = 1000 # Start of High Medieval period GENERATION_END_YEAR = 1300 # End of High Medieval period @dataclass class EventData: """Data structure for an event.""" handle: str event_type: str year: int month: int day: int description: str event_id: int @dataclass class PersonData: """Data structure for person information.""" handle: str name: str surname: str birth: int death: Optional[int] gender: str parentin: List[str] childof: List[str] generation: int @dataclass class FamilyData: """Data structure for family information.""" handle: str father_handle: str mother_handle: str children_handles: List[str] marriage_year: int marriage_handle: str family_id: int generation: int def gen_handle(prefix: str, num: int) -> str: """Generate unique handle.""" return f"_{prefix}{num:08d}" def generate_portrait(person_id: int, name: str, gender: str, birth_year: int) -> Optional[Tuple[str, str]]: """ Generate a portrait for a person using DiceBear Avatars API. Args: person_id: Unique person ID. name: Person's name (used as seed for deterministic generation). gender: Person's gender ('M' or 'F'). birth_year: Birth year. Returns: Optional[Tuple[str, str]]: Tuple of (media_handle, file_path) if successful, None otherwise. """ # Create portraits directory if it doesn't exist if not os.path.exists(PORTRAITS_DIR): os.makedirs(PORTRAITS_DIR) # Create seed from name, person_id, and gender for deterministic generation seed = f"{name}_{person_id}_{gender}" # Build API URL with parameters params = { "seed": seed } # Build URL with proper encoding url = f"{DICEBEAR_API_BASE}?{urllib.parse.urlencode(params)}" # Generate filename filename = f"portrait_{person_id:04d}_{name.replace(' ', '_')}.svg" file_path = os.path.join(PORTRAITS_DIR, filename) # Download portrait try: urllib.request.urlretrieve(url, file_path) media_handle = gen_handle("MEDIA", person_id) return (media_handle, file_path) except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e: print(f"Warning: Could not generate portrait for {name}: {e}") return None def create_media_element(media_handle: str, file_path: str, title: str, media_id: Optional[int] = None) -> ET.Element: """ Create an XML element for a media object. Args: media_handle: Unique handle for the media object. file_path: Path to the media file (relative to XML file location). title: Title/description for the media object. media_id: Optional media ID number. If None, extracted from handle. Returns: ET.Element: The media XML element. """ media_elem = ET.Element("media") media_elem.set("handle", media_handle) media_elem.set("change", str(int(datetime.now().timestamp()))) if media_id is None: # Extract ID from handle (e.g., "_MEDIA00000001" -> 1) media_id = int(media_handle.replace("_MEDIA", "")) media_elem.set("id", f"O{media_id:04d}") file_elem = ET.SubElement(media_elem, "file") file_elem.set("src", file_path) title_elem = ET.SubElement(media_elem, "title") title_elem.text = title mime_elem = ET.SubElement(media_elem, "mimetype") mime_elem.text = "image/svg+xml" return media_elem def create_event_element(event_data: EventData) -> ET.Element: """ Create an XML element for an event. Args: event_data: EventData object containing event information. Returns: ET.Element: The event XML element. """ event_elem = ET.Element("event") event_elem.set("handle", event_data.handle) event_elem.set("change", str(int(datetime.now().timestamp()))) event_elem.set("id", f"E{event_data.event_id:04d}") type_elem = ET.SubElement(event_elem, "type") type_elem.text = event_data.event_type date_elem = ET.SubElement(event_elem, "dateval") date_str = f"{event_data.year:04d}-{event_data.month:02d}-{event_data.day:02d}" date_elem.set("val", date_str) desc_elem = ET.SubElement(event_elem, "description") desc_elem.text = event_data.description return event_elem def gen_additional_events( pid: int, first_name: str, surname: str, birth_year: int, death_year: Optional[int], gender: str ) -> List[Tuple[str, EventData]]: """ Generate additional events for a person based on event type probabilities. Args: pid: Person ID. first_name: Person's first name. surname: Person's surname. birth_year: Birth year. death_year: Death year (if known). gender: Person's gender ('M' or 'F'). Returns: List[Tuple[str, EventData]]: List of (event_handle, EventData) tuples. """ events: List[Tuple[str, EventData]] = [] event_counter = EVENT_ID_START_OFFSET for event_type, probability, min_years, max_years in EVENT_TYPES: if random.random() > probability: continue # Special handling for death-related events if event_type in ("Burial", "Cremation"): if death_year is None: continue event_year = death_year event_month = random.randint(MIN_MONTH, MAX_MONTH) event_day = random.randint(MIN_DAY, MAX_DAY) elif event_type == "Marriage": # Marriage handled separately in family generation continue elif min_years is None or max_years is None: continue else: # Calculate event year based on age range min_age = min_years max_age = max_years age_at_event = random.randint(min_age, max_age) event_year = birth_year + age_at_event # Don't generate events after death if death_year and event_year > death_year: continue event_month = random.randint(MIN_MONTH, MAX_MONTH) event_day = random.randint(MIN_DAY, MAX_DAY) # Special handling for gender-specific events if event_type == "Military Service" and gender == "F": continue # Women typically didn't serve in medieval military # Generate event description if event_type == "Occupation": occupation = random.choice(OCCUPATIONS) description = f"{occupation} - {first_name} {surname}" elif event_type == "Residence": place = random.choice(PLACES) description = f"Residence in {place} - {first_name} {surname}" else: description = f"{event_type} of {surname}, {first_name}" event_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + event_counter) event_data = EventData( handle=event_handle, event_type=event_type, year=event_year, month=event_month, day=event_day, description=description, event_id=pid * EVENT_ID_OFFSET + event_counter ) events.append((event_handle, event_data)) event_counter += 1 return events def gen_person( pid: int, first_name: str, surname: str, birth_year: int, death_year: Optional[int], gender: str, parentin_families: Optional[List[str]] = None, childof_families: Optional[List[str]] = None, generation: int = 1 ) -> Tuple[ET.Element, ET.Element, Optional[ET.Element], List[ET.Element], List[Tuple[str, EventData]], Optional[Tuple[str, str]]]: """ Generate a person element with all associated events. Args: pid: Person ID. first_name: First name. surname: Surname. birth_year: Birth year. death_year: Death year (None if still alive). gender: Gender ('M' or 'F'). parentin_families: List of family handles where person is a parent. childof_families: List of family handles where person is a child. generation: Generation number (1-5). Returns: Tuple containing: (person_element, birth_event, death_event, additional_events_xml, additional_events_tuples, portrait_info) """ handle = gen_handle("PERSON", pid) birth_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET) death_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + 1) if death_year else None # Generate additional events additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year, gender) # Generate portrait full_name = f"{first_name} {surname}" portrait_info = generate_portrait(pid, full_name, gender, birth_year) # Create person element person_elem = ET.Element("person") person_elem.set("handle", handle) person_elem.set("change", str(int(datetime.now().timestamp()))) person_elem.set("id", f"I{pid:04d}") gender_elem = ET.SubElement(person_elem, "gender") gender_elem.text = gender name_elem = ET.SubElement(person_elem, "name") name_elem.set("type", "Birth Name") first_elem = ET.SubElement(name_elem, "first") first_elem.text = first_name surname_elem = ET.SubElement(name_elem, "surname") surname_elem.text = surname # Add portrait reference if available if portrait_info: media_handle, _ = portrait_info gallery_elem = ET.SubElement(person_elem, "gallery") mediaobjref_elem = ET.SubElement(gallery_elem, "mediaobjref") mediaobjref_elem.set("hlink", media_handle) # Add event references birth_ref = ET.SubElement(person_elem, "eventref") birth_ref.set("hlink", birth_handle) birth_ref.set("role", "Primary") for event_handle, _ in additional_events: event_ref = ET.SubElement(person_elem, "eventref") event_ref.set("hlink", event_handle) event_ref.set("role", "Primary") if death_handle: death_ref = ET.SubElement(person_elem, "eventref") death_ref.set("hlink", death_handle) death_ref.set("role", "Primary") # Add parentin references if parentin_families: for family_handle in parentin_families: parentin_elem = ET.SubElement(person_elem, "parentin") parentin_elem.set("hlink", family_handle) # Add childof references if childof_families: for family_handle in childof_families: childof_elem = ET.SubElement(person_elem, "childof") childof_elem.set("hlink", family_handle) # Birth event birth_month = random.randint(MIN_MONTH, MAX_MONTH) birth_day = random.randint(MIN_DAY, MAX_DAY) birth_event_data = EventData( handle=birth_handle, event_type="Birth", year=birth_year, month=birth_month, day=birth_day, description=f"Birth of {surname}, {first_name}", event_id=pid * EVENT_ID_OFFSET ) birth_event = create_event_element(birth_event_data) # Death event death_event: Optional[ET.Element] = None if death_handle and death_year: death_month = random.randint(MIN_MONTH, MAX_MONTH) death_day = random.randint(MIN_DAY, MAX_DAY) death_event_data = EventData( handle=death_handle, event_type="Death", year=death_year, month=death_month, day=death_day, description=f"Death of {surname}, {first_name}", event_id=pid * EVENT_ID_OFFSET + 1 ) death_event = create_event_element(death_event_data) # Convert additional events to XML elements all_additional_events_xml = [create_event_element(event_data) for _, event_data in additional_events] return person_elem, birth_event, death_event, all_additional_events_xml, additional_events, portrait_info def gen_family( fid: int, father_handle: str, mother_handle: str, marriage_year: int, children_handles: List[str], generation: int = 1 ) -> Tuple[ET.Element, ET.Element]: """Generate a family with marriage event.""" handle = gen_handle("FAMILY", fid) marriage_handle = gen_handle("EVENT", fid * FAMILY_ID_OFFSET) # Create family element family_elem = ET.Element("family") family_elem.set("handle", handle) family_elem.set("change", str(int(datetime.now().timestamp()))) family_elem.set("id", f"F{fid:04d}") rel_elem = ET.SubElement(family_elem, "rel") rel_elem.set("type", "Married") father_elem = ET.SubElement(family_elem, "father") father_elem.set("hlink", father_handle) mother_elem = ET.SubElement(family_elem, "mother") mother_elem.set("hlink", mother_handle) for child_handle in children_handles: child_elem = ET.SubElement(family_elem, "childref") child_elem.set("hlink", child_handle) marriage_ref = ET.SubElement(family_elem, "eventref") marriage_ref.set("hlink", marriage_handle) marriage_ref.set("role", "Family") # Marriage event marriage_month = random.randint(MIN_MONTH, MAX_MONTH) marriage_day = random.randint(MIN_DAY, MAX_DAY) marriage_event_data = EventData( handle=marriage_handle, event_type="Marriage", year=marriage_year, month=marriage_month, day=marriage_day, description="Marriage", event_id=fid * FAMILY_ID_OFFSET ) marriage_event = create_event_element(marriage_event_data) return family_elem, marriage_event def create_gramps_xml_document( events: List[ET.Element], people: List[ET.Element], families: List[ET.Element], media: List[ET.Element] ) -> ET.ElementTree: """Create the complete Gramps XML document.""" # Create root element database = ET.Element("database") database.set("xmlns", GRAMPS_XML_NAMESPACE) # Header header = ET.SubElement(database, "header") created = ET.SubElement(header, "created") created.set("date", datetime.now().strftime('%Y-%m-%d')) created.set("version", GRAMPS_XML_VERSION) researcher = ET.SubElement(header, "researcher") resname = ET.SubElement(researcher, "resname") resname.text = "Medieval German Family Generator" # Tags (empty) ET.SubElement(database, "tags") # Events events_elem = ET.SubElement(database, "events") for event in events: events_elem.append(event) # People people_elem = ET.SubElement(database, "people") for person in people: people_elem.append(person) # Families families_elem = ET.SubElement(database, "families") for family in families: families_elem.append(family) # Media objects media_elem = ET.SubElement(database, "objects") for media_obj in media: media_elem.append(media_obj) return ET.ElementTree(database) def calculate_birth_year(generation: int) -> int: """ Calculate birth year for a given generation. Generation 1: 1000-1050 Generation 2: 1020-1100 Generation 3: 1040-1150 Generation 4: 1060-1200 Generation 5: 1080-1250 """ base_year = GENERATION_START_YEAR + (generation - 1) * 20 year_range = 50 + (generation - 1) * 10 return random.randint(base_year, base_year + year_range) def calculate_death_year(birth_year: int, generation: int) -> Optional[int]: """ Calculate death year based on birth year and generation. Medieval life expectancy: 30-70 years. """ # Higher chance of early death in earlier generations if generation <= 2: life_expectancy = random.randint(30, 60) else: life_expectancy = random.randint(35, 70) death_year = birth_year + life_expectancy # Don't exceed the end of High Medieval period if death_year > GENERATION_END_YEAR: # Some people might live past 1300, but reduce probability if random.random() < 0.3: return death_year else: return None # Still alive at end of period return death_year def calculate_marriage_year(birth_year: int, gender: str) -> int: """ Calculate marriage year based on birth year and gender. Women: typically 18-25 Men: typically 20-30 """ if gender == "F": age_at_marriage = random.randint(18, 25) else: age_at_marriage = random.randint(20, 30) return birth_year + age_at_marriage def generate_parents_and_siblings( person_data: PersonData, person_data_map: Dict[int, PersonData], all_people: List[ET.Element], all_events: List[ET.Element], all_media: List[ET.Element], all_families: List[ET.Element], family_data_map: Dict[int, FamilyData], next_person_id: int, next_family_id: int, generation: int ) -> Tuple[int, int, Optional[str]]: """ Generate parents and siblings for a person if they don't have parents. If they already have parents, generate additional siblings. Args: person_data: The person to generate parents/siblings for person_data_map: Map of person IDs to PersonData all_people: List of all person XML elements all_events: List of all event XML elements all_media: List of all media XML elements all_families: List of all family XML elements family_data_map: Map of family IDs to FamilyData next_person_id: Next available person ID next_family_id: Next available family ID generation: Generation number for the person Returns: Tuple of (updated_next_person_id, updated_next_family_id, parent_family_handle or None) """ # If person already has parents, check if we need to add siblings if person_data.childof: parent_family_handle = person_data.childof[0] # Check if parent family exists and has enough children parent_family_id = None for fid, fdata in family_data_map.items(): if fdata.handle == parent_family_handle: parent_family_id = fid break if parent_family_id is not None: parent_family_data = family_data_map[parent_family_id] # If family already has 6+ children, don't add more if len(parent_family_data.children_handles) >= MIN_CHILDREN_PER_FAMILY: return next_person_id, next_family_id, parent_family_handle # Generate additional siblings # Get parent data father_data = None mother_data = None for pid, pdata in person_data_map.items(): if pdata.handle == parent_family_data.father_handle: father_data = pdata if pdata.handle == parent_family_data.mother_handle: mother_data = pdata if father_data and mother_data: # Calculate how many more siblings to add current_children_count = len(parent_family_data.children_handles) num_additional_siblings = random.randint( max(0, MIN_CHILDREN_PER_FAMILY - current_children_count), MAX_CHILDREN_PER_FAMILY - current_children_count ) # Generate additional siblings for _ in range(num_additional_siblings): sibling_id = next_person_id next_person_id += 1 sibling_gender = "M" if random.random() < 0.5 else "F" sibling_name = random.choice(MALE_NAMES if sibling_gender == "M" else FEMALE_NAMES) sibling_surname = father_data.surname # Sibling birth year: around the same time as the person sibling_birth_year = person_data.birth + random.randint(-5, 15) # Ensure sibling is born before parent's death if father_data.death and sibling_birth_year > father_data.death: sibling_birth_year = father_data.death - 1 if mother_data.death and sibling_birth_year > mother_data.death: sibling_birth_year = mother_data.death - 1 # Skip if birth year is invalid if sibling_birth_year < parent_family_data.marriage_year + 1: continue sibling_death_year = calculate_death_year(sibling_birth_year, generation) sibling_handle = gen_handle("PERSON", sibling_id) sibling_person, sibling_birth, sibling_death, sibling_additional, _, sibling_portrait = gen_person( sibling_id, sibling_name, sibling_surname, sibling_birth_year, sibling_death_year, sibling_gender, childof_families=[parent_family_handle], generation=generation ) all_people.append(sibling_person) all_events.append(sibling_birth) if sibling_death is not None: all_events.append(sibling_death) all_events.extend(sibling_additional) if sibling_portrait: media_handle, file_path = sibling_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {sibling_name} {sibling_surname}", sibling_id) all_media.append(media_elem) parent_family_data.children_handles.append(sibling_handle) sibling_data = PersonData( handle=sibling_handle, name=sibling_name, surname=sibling_surname, birth=sibling_birth_year, death=sibling_death_year, gender=sibling_gender, parentin=[], childof=[parent_family_handle], generation=generation ) person_data_map[sibling_id] = sibling_data return next_person_id, next_family_id, parent_family_handle return next_person_id, next_family_id, parent_family_handle # Generate parents for this person # Parents should be from previous generation parent_generation = max(1, generation - 1) # Father's birth year: 20-40 years before person's birth father_birth_year = person_data.birth - random.randint(20, 40) mother_birth_year = person_data.birth - random.randint(18, 35) # Ensure birth years are within the time period if father_birth_year < GENERATION_START_YEAR: father_birth_year = GENERATION_START_YEAR + random.randint(0, 50) if mother_birth_year < GENERATION_START_YEAR: mother_birth_year = GENERATION_START_YEAR + random.randint(0, 50) father_death_year = calculate_death_year(father_birth_year, parent_generation) mother_death_year = calculate_death_year(mother_birth_year, parent_generation) # Generate parent names father_name = random.choice(MALE_NAMES) father_surname = person_data.surname # Child takes father's surname mother_name = random.choice(FEMALE_NAMES) mother_surname = random.choice(SURNAMES) # Mother may have different surname # Create parent family parent_family_id = next_family_id next_family_id += 1 parent_family_handle = gen_handle("FAMILY", parent_family_id) # Create father father_id = next_person_id next_person_id += 1 father_handle = gen_handle("PERSON", father_id) father_person, father_birth_event, father_death_event, father_additional, _, father_portrait = gen_person( father_id, father_name, father_surname, father_birth_year, father_death_year, "M", parentin_families=[parent_family_handle], generation=parent_generation ) all_people.append(father_person) all_events.append(father_birth_event) if father_death_event is not None: all_events.append(father_death_event) all_events.extend(father_additional) if father_portrait: media_handle, file_path = father_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {father_name} {father_surname}", father_id) all_media.append(media_elem) # Create mother mother_id = next_person_id next_person_id += 1 mother_handle = gen_handle("PERSON", mother_id) mother_person, mother_birth_event, mother_death_event, mother_additional, _, mother_portrait = gen_person( mother_id, mother_name, mother_surname, mother_birth_year, mother_death_year, "F", parentin_families=[parent_family_handle], generation=parent_generation ) all_people.append(mother_person) all_events.append(mother_birth_event) if mother_death_event is not None: all_events.append(mother_death_event) all_events.extend(mother_additional) if mother_portrait: media_handle, file_path = mother_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {mother_name} {mother_surname}", mother_id) all_media.append(media_elem) # Store parent data father_data = PersonData( handle=father_handle, name=father_name, surname=father_surname, birth=father_birth_year, death=father_death_year, gender="M", parentin=[parent_family_handle], childof=[], generation=parent_generation ) person_data_map[father_id] = father_data mother_data = PersonData( handle=mother_handle, name=mother_name, surname=mother_surname, birth=mother_birth_year, death=mother_death_year, gender="F", parentin=[parent_family_handle], childof=[], generation=parent_generation ) person_data_map[mother_id] = mother_data # Calculate marriage year for parents parent_marriage_year = max( calculate_marriage_year(father_birth_year, "M"), calculate_marriage_year(mother_birth_year, "F") ) # Generate siblings (including the original person) # Generate 6-12 children total, including the original person num_siblings_total = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY) siblings_handles: List[str] = [] # Add the original person as a child siblings_handles.append(person_data.handle) if parent_family_handle not in person_data.childof: person_data.childof.append(parent_family_handle) # Generate other siblings # Calculate reasonable birth year range for siblings min_sibling_birth = parent_marriage_year + 1 max_sibling_birth = person_data.birth + 20 # Siblings can be born up to 20 years after the person for sibling_idx in range(num_siblings_total - 1): # -1 because we already have the original person sibling_id = next_person_id next_person_id += 1 sibling_gender = "M" if random.random() < 0.5 else "F" sibling_name = random.choice(MALE_NAMES if sibling_gender == "M" else FEMALE_NAMES) sibling_surname = father_surname # Siblings take father's surname # Sibling birth year: between parent marriage and reasonable range sibling_birth_year = random.randint(min_sibling_birth, max_sibling_birth) # Ensure sibling is born before parent's death if father_death_year and sibling_birth_year > father_death_year: sibling_birth_year = min(sibling_birth_year, father_death_year - 1) if mother_death_year and sibling_birth_year > mother_death_year: sibling_birth_year = min(sibling_birth_year, mother_death_year - 1) # Skip if birth year is invalid if sibling_birth_year < min_sibling_birth: continue sibling_death_year = calculate_death_year(sibling_birth_year, generation) sibling_handle = gen_handle("PERSON", sibling_id) sibling_person, sibling_birth, sibling_death, sibling_additional, _, sibling_portrait = gen_person( sibling_id, sibling_name, sibling_surname, sibling_birth_year, sibling_death_year, sibling_gender, childof_families=[parent_family_handle], generation=generation ) all_people.append(sibling_person) all_events.append(sibling_birth) if sibling_death is not None: all_events.append(sibling_death) all_events.extend(sibling_additional) if sibling_portrait: media_handle, file_path = sibling_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {sibling_name} {sibling_surname}", sibling_id) all_media.append(media_elem) siblings_handles.append(sibling_handle) sibling_data = PersonData( handle=sibling_handle, name=sibling_name, surname=sibling_surname, birth=sibling_birth_year, death=sibling_death_year, gender=sibling_gender, parentin=[], childof=[parent_family_handle], generation=generation ) person_data_map[sibling_id] = sibling_data # Create parent family element parent_family_elem, parent_marriage_event = gen_family( parent_family_id, father_handle, mother_handle, parent_marriage_year, siblings_handles, parent_generation ) all_families.append(parent_family_elem) all_events.append(parent_marriage_event) family_data_map[parent_family_id] = FamilyData( handle=parent_family_handle, father_handle=father_handle, mother_handle=mother_handle, children_handles=siblings_handles, marriage_year=parent_marriage_year, marriage_handle=gen_handle("EVENT", parent_family_id * FAMILY_ID_OFFSET), family_id=parent_family_id, generation=parent_generation ) return next_person_id, next_family_id, parent_family_handle def main() -> None: """Main function to generate the medieval German family database.""" print("Generating medieval German family database (5 generations, 1000-1300 AD)...") # Track all data all_people: List[ET.Element] = [] all_events: List[ET.Element] = [] all_families: List[ET.Element] = [] all_media: List[ET.Element] = [] # Track person and family data person_data_map: Dict[int, PersonData] = {} family_data_map: Dict[int, FamilyData] = {} # ID counters next_person_id = 1 next_family_id = 1 # Generation 1: Root couple print("Generating Generation 1...") gen1_father_id = next_person_id next_person_id += 1 gen1_mother_id = next_person_id next_person_id += 1 gen1_father_birth = calculate_birth_year(1) gen1_mother_birth = calculate_birth_year(1) gen1_father_death = calculate_death_year(gen1_father_birth, 1) gen1_mother_death = calculate_death_year(gen1_mother_birth, 1) gen1_family_id = next_family_id next_family_id += 1 gen1_family_handle = gen_handle("FAMILY", gen1_family_id) gen1_father_name = random.choice(MALE_NAMES) gen1_father_surname = random.choice(SURNAMES) gen1_father_handle = gen_handle("PERSON", gen1_father_id) gen1_mother_name = random.choice(FEMALE_NAMES) gen1_mother_surname = random.choice(SURNAMES) gen1_mother_handle = gen_handle("PERSON", gen1_mother_id) # Generate Generation 1 couple gen1_father_person, gen1_father_birth_event, gen1_father_death_event, gen1_father_additional, _, gen1_father_portrait = gen_person( gen1_father_id, gen1_father_name, gen1_father_surname, gen1_father_birth, gen1_father_death, "M", parentin_families=[gen1_family_handle], generation=1 ) gen1_mother_person, gen1_mother_birth_event, gen1_mother_death_event, gen1_mother_additional, _, gen1_mother_portrait = gen_person( gen1_mother_id, gen1_mother_name, gen1_mother_surname, gen1_mother_birth, gen1_mother_death, "F", parentin_families=[gen1_family_handle], generation=1 ) all_people.extend([gen1_father_person, gen1_mother_person]) all_events.extend([gen1_father_birth_event, gen1_mother_birth_event]) if gen1_father_death_event is not None: all_events.append(gen1_father_death_event) if gen1_mother_death_event is not None: all_events.append(gen1_mother_death_event) all_events.extend(gen1_father_additional + gen1_mother_additional) # Add portraits if gen1_father_portrait: media_handle, file_path = gen1_father_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {gen1_father_name} {gen1_father_surname}", gen1_father_id) all_media.append(media_elem) if gen1_mother_portrait: media_handle, file_path = gen1_mother_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {gen1_mother_name} {gen1_mother_surname}", gen1_mother_id) all_media.append(media_elem) # Store person data person_data_map[gen1_father_id] = PersonData( handle=gen1_father_handle, name=gen1_father_name, surname=gen1_father_surname, birth=gen1_father_birth, death=gen1_father_death, gender="M", parentin=[gen1_family_handle], childof=[], generation=1 ) person_data_map[gen1_mother_id] = PersonData( handle=gen1_mother_handle, name=gen1_mother_name, surname=gen1_mother_surname, birth=gen1_mother_birth, death=gen1_mother_death, gender="F", parentin=[gen1_family_handle], childof=[], generation=1 ) # Generate children for Generation 1 num_children_gen1 = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY) gen1_children_handles: List[str] = [] gen1_children_data: List[PersonData] = [] for i in range(num_children_gen1): child_id = next_person_id next_person_id += 1 gender = "M" if i % 2 == 0 else "F" first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES) # Children typically take father's surname child_surname = gen1_father_surname birth_year = max(gen1_father_birth, gen1_mother_birth) + random.randint(20, 35) death_year = calculate_death_year(birth_year, 2) child_handle = gen_handle("PERSON", child_id) child_person, child_birth, child_death, child_additional, _, child_portrait = gen_person( child_id, first_name, child_surname, birth_year, death_year, gender, childof_families=[gen1_family_handle], generation=2 ) all_people.append(child_person) all_events.append(child_birth) if child_death is not None: all_events.append(child_death) all_events.extend(child_additional) if child_portrait: media_handle, file_path = child_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {first_name} {child_surname}", child_id) all_media.append(media_elem) gen1_children_handles.append(child_handle) person_data_map[child_id] = PersonData( handle=child_handle, name=first_name, surname=child_surname, birth=birth_year, death=death_year, gender=gender, parentin=[], childof=[gen1_family_handle], generation=2 ) gen1_children_data.append(person_data_map[child_id]) # Create Generation 1 family marriage_year = calculate_marriage_year(max(gen1_father_birth, gen1_mother_birth), "M") gen1_family, gen1_marriage_event = gen_family(gen1_family_id, gen1_father_handle, gen1_mother_handle, marriage_year, gen1_children_handles, 1) all_families.append(gen1_family) all_events.append(gen1_marriage_event) family_data_map[gen1_family_id] = FamilyData( handle=gen1_family_handle, father_handle=gen1_father_handle, mother_handle=gen1_mother_handle, children_handles=gen1_children_handles, marriage_year=marriage_year, marriage_handle=gen_handle("EVENT", gen1_family_id * FAMILY_ID_OFFSET), family_id=gen1_family_id, generation=1 ) # Update parentin for Generation 1 couple person_data_map[gen1_father_id].parentin = [gen1_family_handle] person_data_map[gen1_mother_id].parentin = [gen1_family_handle] # Generate subsequent generations (2-5) current_generation_people = gen1_children_data.copy() for generation in range(2, NUM_GENERATIONS + 1): print(f"Generating Generation {generation}...") next_generation_people: List[PersonData] = [] # Pair up people from current generation to create families # Shuffle to create more realistic pairing males = [p for p in current_generation_people if p.gender == "M"] females = [p for p in current_generation_people if p.gender == "F"] random.shuffle(males) random.shuffle(females) # Create families by pairing males and females # Try to pair everyone, but if numbers don't match, some may remain unmarried num_families = min(len(males), len(females)) for family_idx in range(num_families): father_data = males[family_idx] mother_data = females[family_idx] # Check if both survive to marriage age father_marriage_age = calculate_marriage_year(father_data.birth, "M") - father_data.birth mother_marriage_age = calculate_marriage_year(mother_data.birth, "F") - mother_data.birth # Skip if either dies before marriage age if father_data.death and father_data.death < father_data.birth + father_marriage_age: continue if mother_data.death and mother_data.death < mother_data.birth + mother_marriage_age: continue # Calculate marriage year marriage_year = max( calculate_marriage_year(father_data.birth, "M"), calculate_marriage_year(mother_data.birth, "F") ) # Don't create family if marriage would be after death if father_data.death and marriage_year > father_data.death: continue if mother_data.death and marriage_year > mother_data.death: continue # Generate parents and siblings for father if needed next_person_id, next_family_id, _ = generate_parents_and_siblings( father_data, person_data_map, all_people, all_events, all_media, all_families, family_data_map, next_person_id, next_family_id, generation ) # Generate parents and siblings for mother if needed next_person_id, next_family_id, _ = generate_parents_and_siblings( mother_data, person_data_map, all_people, all_events, all_media, all_families, family_data_map, next_person_id, next_family_id, generation ) # Create family family_id = next_family_id next_family_id += 1 family_handle = gen_handle("FAMILY", family_id) # Generate children (6-12 per family) num_children = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY) children_handles: List[str] = [] for child_idx in range(num_children): child_id = next_person_id next_person_id += 1 gender = "M" if random.random() < 0.5 else "F" # Random gender first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES) # Children take father's surname child_surname = father_data.surname # Children born 1-25 years after marriage birth_year = marriage_year + random.randint(1, 25) # Don't create child if born after parent's death if father_data.death and birth_year > father_data.death: continue if mother_data.death and birth_year > mother_data.death: continue death_year = calculate_death_year(birth_year, generation) child_handle = gen_handle("PERSON", child_id) child_person, child_birth, child_death, child_additional, _, child_portrait = gen_person( child_id, first_name, child_surname, birth_year, death_year, gender, childof_families=[family_handle], generation=generation ) all_people.append(child_person) all_events.append(child_birth) if child_death is not None: all_events.append(child_death) all_events.extend(child_additional) if child_portrait: media_handle, file_path = child_portrait media_elem = create_media_element(media_handle, file_path, f"Portrait of {first_name} {child_surname}", child_id) all_media.append(media_elem) children_handles.append(child_handle) child_data = PersonData( handle=child_handle, name=first_name, surname=child_surname, birth=birth_year, death=death_year, gender=gender, parentin=[], childof=[family_handle], generation=generation ) person_data_map[child_id] = child_data next_generation_people.append(child_data) # Only create family if there are children if children_handles: # Create family element family_elem, marriage_event = gen_family(family_id, father_data.handle, mother_data.handle, marriage_year, children_handles, generation) all_families.append(family_elem) all_events.append(marriage_event) # Update parent data if family_handle not in father_data.parentin: father_data.parentin.append(family_handle) if family_handle not in mother_data.parentin: mother_data.parentin.append(family_handle) family_data_map[family_id] = FamilyData( handle=family_handle, father_handle=father_data.handle, mother_handle=mother_data.handle, children_handles=children_handles, marriage_year=marriage_year, marriage_handle=gen_handle("EVENT", family_id * FAMILY_ID_OFFSET), family_id=family_id, generation=generation ) current_generation_people = next_generation_people # Regenerate person elements with updated family references print("Updating family references...") # Rebuild events and people lists from person_data_map all_people.clear() all_events.clear() all_media.clear() # Regenerate all people with correct family references for person_id, person_data in person_data_map.items(): person_elem, birth_event, death_event, additional_events, _, portrait_info = gen_person( person_id, person_data.name, person_data.surname, person_data.birth, person_data.death, person_data.gender, parentin_families=person_data.parentin if person_data.parentin else None, childof_families=person_data.childof if person_data.childof else None, generation=person_data.generation ) all_people.append(person_elem) # Add events all_events.append(birth_event) if death_event is not None: all_events.append(death_event) all_events.extend(additional_events) # Add media if portrait_info: media_handle, file_path = portrait_info media_elem = create_media_element(media_handle, file_path, f"Portrait of {person_data.name} {person_data.surname}", person_id) all_media.append(media_elem) # Add marriage events from families for family_id, family_data in family_data_map.items(): marriage_handle = family_data.marriage_handle marriage_event_data = EventData( handle=marriage_handle, event_type="Marriage", year=family_data.marriage_year, month=random.randint(MIN_MONTH, MAX_MONTH), day=random.randint(MIN_DAY, MAX_DAY), description="Marriage", event_id=family_id * FAMILY_ID_OFFSET ) marriage_event = create_event_element(marriage_event_data) all_events.append(marriage_event) # Create XML document print("Creating Gramps XML document...") tree = create_gramps_xml_document(all_events, all_people, all_families, all_media) # Write to file output_file = "medieval_german_family.gramps" tree.write(output_file, encoding='utf-8', xml_declaration=True) print(f"\nGenerated {output_file} with:") print(f" - {len(all_people)} people") print(f" - {len(all_families)} families") print(f" - {len(all_events)} events") print(f" - {len(all_media)} media objects") print(f" - {NUM_GENERATIONS} generations") print(f" - Time period: {GENERATION_START_YEAR}-{GENERATION_END_YEAR} AD") if __name__ == "__main__": main()