diff --git a/generate_medieval_german_family.py b/generate_medieval_german_family.py new file mode 100644 index 0000000..bf092c7 --- /dev/null +++ b/generate_medieval_german_family.py @@ -0,0 +1,1317 @@ +#!/usr/bin/env python3 +""" +Generate a realistic medieval German family database for Gramps testing. +Covers High Medieval period (1000-1300 AD) with 5 generations. +""" + +import random +import xml.etree.ElementTree as ET +import os +import urllib.request +import urllib.error +import urllib.parse +from dataclasses import dataclass +from datetime import datetime +from typing import Optional, List, Tuple, Dict, Set + +# Set seed for deterministic generation +random.seed(42) + +# Constants +EVENT_ID_OFFSET = 10 +FAMILY_ID_OFFSET = 100 +EVENT_ID_START_OFFSET = 2 +MIN_MONTH = 1 +MAX_MONTH = 12 +MIN_DAY = 1 +MAX_DAY = 28 +GRAMPS_XML_VERSION = "5.1.0" +GRAMPS_XML_NAMESPACE = "http://gramps-project.org/xml/1.7.1/" +GRAMPS_XML_DTD = "http://gramps-project.org/xml/1.7.1/grampsxml.dtd" + +# Portrait generation constants +PORTRAITS_DIR = "portraits" +DICEBEAR_API_BASE = "https://api.dicebear.com/7.x/avataaars/svg" + +# Medieval German male names (High Medieval period, 1000-1300 AD) +MALE_NAMES = [ + "Heinrich", "Konrad", "Friedrich", "Albrecht", "Gottfried", "Ludwig", + "Rudolf", "Otto", "Berthold", "Werner", "Ulrich", "Hartmann", "Eberhard", + "Gerhard", "Dietrich", "Arnold", "Hermann", "Bruno", "Adalbert", "Gunther", + "Siegfried", "Wolfgang", "Reinhard", "Burchard", "Gebhard", "Volkmar", + "Wigand", "Baldwin", "Gunzelin", "Ekkehard", "Bernhard", "Hildebrand", + "Ruprecht", "Wilhelm", "Johann", "Matthias", "Philipp", "Markward", + "Hartwig", "Lothar", "Theoderich", "Wichmann", "Thiemo", "Gebhard", + "Adalbero", "Gero", "Wichard", "Widukind", "Hugo", "Lambert" +] + +# Medieval German female names (High Medieval period, 1000-1300 AD) +FEMALE_NAMES = [ + "Adelheid", "Gertrud", "Margarete", "Elisabeth", "Katharina", "Anna", + "Agnes", "Hedwig", "Mechthild", "Beatrix", "Sophia", "Mathilde", + "Irmgard", "Gisela", "Kunigunde", "Helena", "Brigitta", "Ursula", + "Barbara", "Dorothea", "Christina", "Petronilla", "Richza", "Judith", + "Euphemia", "Cecilia", "Gertrude", "Ida", "Luitgard", "Oda", "Adela", + "Bertha", "Ermengard", "Hildegard", "Imelda", "Jutta", "Liutgard", + "Mechtilde", "Odilia", "Regina", "Walburga", "Wiborada", "Yolanda" +] + +# Medieval German surnames (High Medieval period) +SURNAMES = [ + # Noble families + "von Habsburg", "von Hohenstaufen", "von Wittelsbach", "von Babenberg", + "von Zähringen", "von Wettin", "von Ascania", "von Welf", + # Common surnames (occupational, descriptive) + "Müller", "Schmidt", "Weber", "Fischer", "Meyer", "Wagner", "Becker", + "Schulz", "Hoffmann", "Schäfer", "Koch", "Bauer", "Richter", "Klein", + "Wolf", "Schröder", "Neumann", "Schwarz", "Zimmermann", "Braun", + "Krüger", "Hofmann", "Hartmann", "Lange", "Schmitt", "Werner", + "Schmitz", "Krause", "Meier", "Lehmann", "Schmid", "Schulze", + "Maier", "Köhler", "Herrmann", "König", "Walter", "Huber", + "Mayer", "Fuchs", "Peters", "Lang", "Scholz", "Möller", + "Weiß", "Jung", "Hahn", "Schubert", "Vogel", "Friedrich" +] + +# Medieval German cities and places (High Medieval period) +PLACES = [ + "Köln", "Mainz", "Trier", "Augsburg", "Regensburg", "Worms", "Speyer", + "Straßburg", "Würzburg", "Bamberg", "Fulda", "Magdeburg", "Halle", + "Erfurt", "Nürnberg", "Ulm", "Freiburg", "Basel", "Konstanz", "Lübeck", + "Hamburg", "Bremen", "Braunschweig", "Goslar", "Quedlinburg", "Merseburg", + "Naumburg", "Meißen", "Dresden", "Leipzig", "Frankfurt", "Heidelberg", + "Rothenburg", "Dinkelsbühl", "Nördlingen", "Esslingen", "Reutlingen", + "Ravensburg", "Memmingen", "Kempten", "Lindau", "Überlingen" +] + +# Medieval occupations +OCCUPATIONS = [ + # Nobility and military + "Ritter", "Knappe", "Burgvogt", "Landvogt", "Marschall", + # Clergy + "Priester", "Mönch", "Nonne", "Abt", "Äbtissin", "Bischof", + # Craftsmen + "Schmied", "Schneider", "Schuster", "Bäcker", "Müller", "Bauer", + "Zimmermann", "Maurer", "Tischler", "Schreiner", "Färber", "Gerber", + "Kürschner", "Seiler", "Böttcher", "Drechsler", "Schlosser", + # Merchants and traders + "Kaufmann", "Händler", "Krämer", "Gewürzhändler", "Tuchhändler", + # Other + "Jäger", "Fischer", "Schäfer", "Hirte", "Knecht", "Magd", + "Wirt", "Koch", "Bader", "Barbier", "Arzt", "Apotheker" +] + +# Medieval event types with probabilities and age ranges +EVENT_TYPES = [ + ("Baptism", 0.95, 0, 1), # 95% chance, 0-1 years after birth (very common) + ("Christening", 0.80, 0, 1), # 80% chance, 0-1 years after birth + ("Confirmation", 0.60, 12, 16), # 60% chance, 12-16 years after birth + ("Education", 0.30, 7, 14), # 30% chance (mostly for nobility/wealthy), 7-14 years + ("Occupation", 0.85, 14, 50), # 85% chance, 14-50 years after birth + ("Military Service", 0.40, 18, 40), # 40% chance for males, 18-40 years + ("Residence", 0.70, 0, 80), # 70% chance, any time + ("Marriage", 0.90, 18, 35), # 90% chance, 18-35 years (very common) + ("Burial", 0.80, None, None), # 80% chance if death exists, at death time + ("Cremation", 0.05, None, None), # 5% chance if death exists (rare in medieval times) +] + +# Generation configuration +NUM_GENERATIONS = 5 +MIN_CHILDREN_PER_FAMILY = 6 +MAX_CHILDREN_PER_FAMILY = 12 +GENERATION_START_YEAR = 1000 # Start of High Medieval period +GENERATION_END_YEAR = 1300 # End of High Medieval period + +@dataclass +class EventData: + """Data structure for an event.""" + handle: str + event_type: str + year: int + month: int + day: int + description: str + event_id: int + + +@dataclass +class PersonData: + """Data structure for person information.""" + handle: str + name: str + surname: str + birth: int + death: Optional[int] + gender: str + parentin: List[str] + childof: List[str] + generation: int + + +@dataclass +class FamilyData: + """Data structure for family information.""" + handle: str + father_handle: str + mother_handle: str + children_handles: List[str] + marriage_year: int + marriage_handle: str + family_id: int + generation: int + + +def gen_handle(prefix: str, num: int) -> str: + """Generate unique handle.""" + return f"_{prefix}{num:08d}" + + +def generate_portrait(person_id: int, name: str, gender: str, birth_year: int) -> Optional[Tuple[str, str]]: + """ + Generate a portrait for a person using DiceBear Avatars API. + + Args: + person_id: Unique person ID. + name: Person's name (used as seed for deterministic generation). + gender: Person's gender ('M' or 'F'). + birth_year: Birth year. + + Returns: + Optional[Tuple[str, str]]: Tuple of (media_handle, file_path) if successful, None otherwise. + """ + # Create portraits directory if it doesn't exist + if not os.path.exists(PORTRAITS_DIR): + os.makedirs(PORTRAITS_DIR) + + # Create seed from name, person_id, and gender for deterministic generation + seed = f"{name}_{person_id}_{gender}" + + # Build API URL with parameters + params = { + "seed": seed + } + + # Build URL with proper encoding + url = f"{DICEBEAR_API_BASE}?{urllib.parse.urlencode(params)}" + + # Generate filename + filename = f"portrait_{person_id:04d}_{name.replace(' ', '_')}.svg" + file_path = os.path.join(PORTRAITS_DIR, filename) + + # Download portrait + try: + urllib.request.urlretrieve(url, file_path) + media_handle = gen_handle("MEDIA", person_id) + return (media_handle, file_path) + except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e: + print(f"Warning: Could not generate portrait for {name}: {e}") + return None + + +def create_media_element(media_handle: str, file_path: str, title: str, media_id: Optional[int] = None) -> ET.Element: + """ + Create an XML element for a media object. + + Args: + media_handle: Unique handle for the media object. + file_path: Path to the media file (relative to XML file location). + title: Title/description for the media object. + media_id: Optional media ID number. If None, extracted from handle. + + Returns: + ET.Element: The media XML element. + """ + media_elem = ET.Element("media") + media_elem.set("handle", media_handle) + media_elem.set("change", str(int(datetime.now().timestamp()))) + + if media_id is None: + # Extract ID from handle (e.g., "_MEDIA00000001" -> 1) + media_id = int(media_handle.replace("_MEDIA", "")) + media_elem.set("id", f"O{media_id:04d}") + + file_elem = ET.SubElement(media_elem, "file") + file_elem.set("src", file_path) + + title_elem = ET.SubElement(media_elem, "title") + title_elem.text = title + + mime_elem = ET.SubElement(media_elem, "mimetype") + mime_elem.text = "image/svg+xml" + + return media_elem + + +def create_event_element(event_data: EventData) -> ET.Element: + """ + Create an XML element for an event. + + Args: + event_data: EventData object containing event information. + + Returns: + ET.Element: The event XML element. + """ + event_elem = ET.Element("event") + event_elem.set("handle", event_data.handle) + event_elem.set("change", str(int(datetime.now().timestamp()))) + event_elem.set("id", f"E{event_data.event_id:04d}") + + type_elem = ET.SubElement(event_elem, "type") + type_elem.text = event_data.event_type + + date_elem = ET.SubElement(event_elem, "dateval") + date_str = f"{event_data.year:04d}-{event_data.month:02d}-{event_data.day:02d}" + date_elem.set("val", date_str) + + desc_elem = ET.SubElement(event_elem, "description") + desc_elem.text = event_data.description + + return event_elem + + +def gen_additional_events( + pid: int, + first_name: str, + surname: str, + birth_year: int, + death_year: Optional[int], + gender: str +) -> List[Tuple[str, EventData]]: + """ + Generate additional events for a person based on event type probabilities. + + Args: + pid: Person ID. + first_name: Person's first name. + surname: Person's surname. + birth_year: Birth year. + death_year: Death year (if known). + gender: Person's gender ('M' or 'F'). + + Returns: + List[Tuple[str, EventData]]: List of (event_handle, EventData) tuples. + """ + events: List[Tuple[str, EventData]] = [] + event_counter = EVENT_ID_START_OFFSET + + for event_type, probability, min_years, max_years in EVENT_TYPES: + if random.random() > probability: + continue + + # Special handling for death-related events + if event_type in ("Burial", "Cremation"): + if death_year is None: + continue + event_year = death_year + event_month = random.randint(MIN_MONTH, MAX_MONTH) + event_day = random.randint(MIN_DAY, MAX_DAY) + elif event_type == "Marriage": + # Marriage handled separately in family generation + continue + elif min_years is None or max_years is None: + continue + else: + # Calculate event year based on age range + min_age = min_years + max_age = max_years + age_at_event = random.randint(min_age, max_age) + event_year = birth_year + age_at_event + + # Don't generate events after death + if death_year and event_year > death_year: + continue + + event_month = random.randint(MIN_MONTH, MAX_MONTH) + event_day = random.randint(MIN_DAY, MAX_DAY) + + # Special handling for gender-specific events + if event_type == "Military Service" and gender == "F": + continue # Women typically didn't serve in medieval military + + # Generate event description + if event_type == "Occupation": + occupation = random.choice(OCCUPATIONS) + description = f"{occupation} - {first_name} {surname}" + elif event_type == "Residence": + place = random.choice(PLACES) + description = f"Residence in {place} - {first_name} {surname}" + else: + description = f"{event_type} of {surname}, {first_name}" + + event_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + event_counter) + event_data = EventData( + handle=event_handle, + event_type=event_type, + year=event_year, + month=event_month, + day=event_day, + description=description, + event_id=pid * EVENT_ID_OFFSET + event_counter + ) + events.append((event_handle, event_data)) + event_counter += 1 + + return events + + +def gen_person( + pid: int, + first_name: str, + surname: str, + birth_year: int, + death_year: Optional[int], + gender: str, + parentin_families: Optional[List[str]] = None, + childof_families: Optional[List[str]] = None, + generation: int = 1 +) -> Tuple[ET.Element, ET.Element, Optional[ET.Element], List[ET.Element], List[Tuple[str, EventData]], Optional[Tuple[str, str]]]: + """ + Generate a person element with all associated events. + + Args: + pid: Person ID. + first_name: First name. + surname: Surname. + birth_year: Birth year. + death_year: Death year (None if still alive). + gender: Gender ('M' or 'F'). + parentin_families: List of family handles where person is a parent. + childof_families: List of family handles where person is a child. + generation: Generation number (1-5). + + Returns: + Tuple containing: (person_element, birth_event, death_event, additional_events_xml, additional_events_tuples, portrait_info) + """ + handle = gen_handle("PERSON", pid) + birth_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET) + death_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + 1) if death_year else None + + # Generate additional events + additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year, gender) + + # Generate portrait + full_name = f"{first_name} {surname}" + portrait_info = generate_portrait(pid, full_name, gender, birth_year) + + # Create person element + person_elem = ET.Element("person") + person_elem.set("handle", handle) + person_elem.set("change", str(int(datetime.now().timestamp()))) + person_elem.set("id", f"I{pid:04d}") + + gender_elem = ET.SubElement(person_elem, "gender") + gender_elem.text = gender + + name_elem = ET.SubElement(person_elem, "name") + name_elem.set("type", "Birth Name") + first_elem = ET.SubElement(name_elem, "first") + first_elem.text = first_name + surname_elem = ET.SubElement(name_elem, "surname") + surname_elem.text = surname + + # Add portrait reference if available + if portrait_info: + media_handle, _ = portrait_info + gallery_elem = ET.SubElement(person_elem, "gallery") + mediaobjref_elem = ET.SubElement(gallery_elem, "mediaobjref") + mediaobjref_elem.set("hlink", media_handle) + + # Add event references + birth_ref = ET.SubElement(person_elem, "eventref") + birth_ref.set("hlink", birth_handle) + birth_ref.set("role", "Primary") + + for event_handle, _ in additional_events: + event_ref = ET.SubElement(person_elem, "eventref") + event_ref.set("hlink", event_handle) + event_ref.set("role", "Primary") + + if death_handle: + death_ref = ET.SubElement(person_elem, "eventref") + death_ref.set("hlink", death_handle) + death_ref.set("role", "Primary") + + # Add parentin references + if parentin_families: + for family_handle in parentin_families: + parentin_elem = ET.SubElement(person_elem, "parentin") + parentin_elem.set("hlink", family_handle) + + # Add childof references + if childof_families: + for family_handle in childof_families: + childof_elem = ET.SubElement(person_elem, "childof") + childof_elem.set("hlink", family_handle) + + # Birth event + birth_month = random.randint(MIN_MONTH, MAX_MONTH) + birth_day = random.randint(MIN_DAY, MAX_DAY) + birth_event_data = EventData( + handle=birth_handle, + event_type="Birth", + year=birth_year, + month=birth_month, + day=birth_day, + description=f"Birth of {surname}, {first_name}", + event_id=pid * EVENT_ID_OFFSET + ) + birth_event = create_event_element(birth_event_data) + + # Death event + death_event: Optional[ET.Element] = None + if death_handle and death_year: + death_month = random.randint(MIN_MONTH, MAX_MONTH) + death_day = random.randint(MIN_DAY, MAX_DAY) + death_event_data = EventData( + handle=death_handle, + event_type="Death", + year=death_year, + month=death_month, + day=death_day, + description=f"Death of {surname}, {first_name}", + event_id=pid * EVENT_ID_OFFSET + 1 + ) + death_event = create_event_element(death_event_data) + + # Convert additional events to XML elements + all_additional_events_xml = [create_event_element(event_data) for _, event_data in additional_events] + + return person_elem, birth_event, death_event, all_additional_events_xml, additional_events, portrait_info + + +def gen_family( + fid: int, + father_handle: str, + mother_handle: str, + marriage_year: int, + children_handles: List[str], + generation: int = 1 +) -> Tuple[ET.Element, ET.Element]: + """Generate a family with marriage event.""" + handle = gen_handle("FAMILY", fid) + marriage_handle = gen_handle("EVENT", fid * FAMILY_ID_OFFSET) + + # Create family element + family_elem = ET.Element("family") + family_elem.set("handle", handle) + family_elem.set("change", str(int(datetime.now().timestamp()))) + family_elem.set("id", f"F{fid:04d}") + + rel_elem = ET.SubElement(family_elem, "rel") + rel_elem.set("type", "Married") + + father_elem = ET.SubElement(family_elem, "father") + father_elem.set("hlink", father_handle) + + mother_elem = ET.SubElement(family_elem, "mother") + mother_elem.set("hlink", mother_handle) + + for child_handle in children_handles: + child_elem = ET.SubElement(family_elem, "childref") + child_elem.set("hlink", child_handle) + + marriage_ref = ET.SubElement(family_elem, "eventref") + marriage_ref.set("hlink", marriage_handle) + marriage_ref.set("role", "Family") + + # Marriage event + marriage_month = random.randint(MIN_MONTH, MAX_MONTH) + marriage_day = random.randint(MIN_DAY, MAX_DAY) + marriage_event_data = EventData( + handle=marriage_handle, + event_type="Marriage", + year=marriage_year, + month=marriage_month, + day=marriage_day, + description="Marriage", + event_id=fid * FAMILY_ID_OFFSET + ) + marriage_event = create_event_element(marriage_event_data) + + return family_elem, marriage_event + + +def create_gramps_xml_document( + events: List[ET.Element], + people: List[ET.Element], + families: List[ET.Element], + media: List[ET.Element] +) -> ET.ElementTree: + """Create the complete Gramps XML document.""" + # Create root element + database = ET.Element("database") + database.set("xmlns", GRAMPS_XML_NAMESPACE) + + # Header + header = ET.SubElement(database, "header") + created = ET.SubElement(header, "created") + created.set("date", datetime.now().strftime('%Y-%m-%d')) + created.set("version", GRAMPS_XML_VERSION) + + researcher = ET.SubElement(header, "researcher") + resname = ET.SubElement(researcher, "resname") + resname.text = "Medieval German Family Generator" + + # Tags (empty) + ET.SubElement(database, "tags") + + # Events + events_elem = ET.SubElement(database, "events") + for event in events: + events_elem.append(event) + + # People + people_elem = ET.SubElement(database, "people") + for person in people: + people_elem.append(person) + + # Families + families_elem = ET.SubElement(database, "families") + for family in families: + families_elem.append(family) + + # Media objects + media_elem = ET.SubElement(database, "objects") + for media_obj in media: + media_elem.append(media_obj) + + return ET.ElementTree(database) + + +def calculate_birth_year(generation: int) -> int: + """ + Calculate birth year for a given generation. + Generation 1: 1000-1050 + Generation 2: 1020-1100 + Generation 3: 1040-1150 + Generation 4: 1060-1200 + Generation 5: 1080-1250 + """ + base_year = GENERATION_START_YEAR + (generation - 1) * 20 + year_range = 50 + (generation - 1) * 10 + return random.randint(base_year, base_year + year_range) + + +def calculate_death_year(birth_year: int, generation: int) -> Optional[int]: + """ + Calculate death year based on birth year and generation. + Medieval life expectancy: 30-70 years. + """ + # Higher chance of early death in earlier generations + if generation <= 2: + life_expectancy = random.randint(30, 60) + else: + life_expectancy = random.randint(35, 70) + + death_year = birth_year + life_expectancy + + # Don't exceed the end of High Medieval period + if death_year > GENERATION_END_YEAR: + # Some people might live past 1300, but reduce probability + if random.random() < 0.3: + return death_year + else: + return None # Still alive at end of period + + return death_year + + +def calculate_marriage_year(birth_year: int, gender: str) -> int: + """ + Calculate marriage year based on birth year and gender. + Women: typically 18-25 + Men: typically 20-30 + """ + if gender == "F": + age_at_marriage = random.randint(18, 25) + else: + age_at_marriage = random.randint(20, 30) + + return birth_year + age_at_marriage + + +def generate_parents_and_siblings( + person_data: PersonData, + person_data_map: Dict[int, PersonData], + all_people: List[ET.Element], + all_events: List[ET.Element], + all_media: List[ET.Element], + all_families: List[ET.Element], + family_data_map: Dict[int, FamilyData], + next_person_id: int, + next_family_id: int, + generation: int +) -> Tuple[int, int, Optional[str]]: + """ + Generate parents and siblings for a person if they don't have parents. + If they already have parents, generate additional siblings. + + Args: + person_data: The person to generate parents/siblings for + person_data_map: Map of person IDs to PersonData + all_people: List of all person XML elements + all_events: List of all event XML elements + all_media: List of all media XML elements + all_families: List of all family XML elements + family_data_map: Map of family IDs to FamilyData + next_person_id: Next available person ID + next_family_id: Next available family ID + generation: Generation number for the person + + Returns: + Tuple of (updated_next_person_id, updated_next_family_id, parent_family_handle or None) + """ + # If person already has parents, check if we need to add siblings + if person_data.childof: + parent_family_handle = person_data.childof[0] + # Check if parent family exists and has enough children + parent_family_id = None + for fid, fdata in family_data_map.items(): + if fdata.handle == parent_family_handle: + parent_family_id = fid + break + + if parent_family_id is not None: + parent_family_data = family_data_map[parent_family_id] + # If family already has 6+ children, don't add more + if len(parent_family_data.children_handles) >= MIN_CHILDREN_PER_FAMILY: + return next_person_id, next_family_id, parent_family_handle + + # Generate additional siblings + # Get parent data + father_data = None + mother_data = None + for pid, pdata in person_data_map.items(): + if pdata.handle == parent_family_data.father_handle: + father_data = pdata + if pdata.handle == parent_family_data.mother_handle: + mother_data = pdata + + if father_data and mother_data: + # Calculate how many more siblings to add + current_children_count = len(parent_family_data.children_handles) + num_additional_siblings = random.randint( + max(0, MIN_CHILDREN_PER_FAMILY - current_children_count), + MAX_CHILDREN_PER_FAMILY - current_children_count + ) + + # Generate additional siblings + for _ in range(num_additional_siblings): + sibling_id = next_person_id + next_person_id += 1 + + sibling_gender = "M" if random.random() < 0.5 else "F" + sibling_name = random.choice(MALE_NAMES if sibling_gender == "M" else FEMALE_NAMES) + sibling_surname = father_data.surname + + # Sibling birth year: around the same time as the person + sibling_birth_year = person_data.birth + random.randint(-5, 15) + + # Ensure sibling is born before parent's death + if father_data.death and sibling_birth_year > father_data.death: + sibling_birth_year = father_data.death - 1 + if mother_data.death and sibling_birth_year > mother_data.death: + sibling_birth_year = mother_data.death - 1 + + # Skip if birth year is invalid + if sibling_birth_year < parent_family_data.marriage_year + 1: + continue + + sibling_death_year = calculate_death_year(sibling_birth_year, generation) + + sibling_handle = gen_handle("PERSON", sibling_id) + sibling_person, sibling_birth, sibling_death, sibling_additional, _, sibling_portrait = gen_person( + sibling_id, sibling_name, sibling_surname, sibling_birth_year, sibling_death_year, sibling_gender, + childof_families=[parent_family_handle], generation=generation + ) + + all_people.append(sibling_person) + all_events.append(sibling_birth) + if sibling_death is not None: + all_events.append(sibling_death) + all_events.extend(sibling_additional) + + if sibling_portrait: + media_handle, file_path = sibling_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {sibling_name} {sibling_surname}", sibling_id) + all_media.append(media_elem) + + parent_family_data.children_handles.append(sibling_handle) + sibling_data = PersonData( + handle=sibling_handle, + name=sibling_name, + surname=sibling_surname, + birth=sibling_birth_year, + death=sibling_death_year, + gender=sibling_gender, + parentin=[], + childof=[parent_family_handle], + generation=generation + ) + person_data_map[sibling_id] = sibling_data + + return next_person_id, next_family_id, parent_family_handle + + return next_person_id, next_family_id, parent_family_handle + + # Generate parents for this person + # Parents should be from previous generation + parent_generation = max(1, generation - 1) + + # Father's birth year: 20-40 years before person's birth + father_birth_year = person_data.birth - random.randint(20, 40) + mother_birth_year = person_data.birth - random.randint(18, 35) + + # Ensure birth years are within the time period + if father_birth_year < GENERATION_START_YEAR: + father_birth_year = GENERATION_START_YEAR + random.randint(0, 50) + if mother_birth_year < GENERATION_START_YEAR: + mother_birth_year = GENERATION_START_YEAR + random.randint(0, 50) + + father_death_year = calculate_death_year(father_birth_year, parent_generation) + mother_death_year = calculate_death_year(mother_birth_year, parent_generation) + + # Generate parent names + father_name = random.choice(MALE_NAMES) + father_surname = person_data.surname # Child takes father's surname + mother_name = random.choice(FEMALE_NAMES) + mother_surname = random.choice(SURNAMES) # Mother may have different surname + + # Create parent family + parent_family_id = next_family_id + next_family_id += 1 + parent_family_handle = gen_handle("FAMILY", parent_family_id) + + # Create father + father_id = next_person_id + next_person_id += 1 + father_handle = gen_handle("PERSON", father_id) + + father_person, father_birth_event, father_death_event, father_additional, _, father_portrait = gen_person( + father_id, father_name, father_surname, father_birth_year, father_death_year, "M", + parentin_families=[parent_family_handle], generation=parent_generation + ) + + all_people.append(father_person) + all_events.append(father_birth_event) + if father_death_event is not None: + all_events.append(father_death_event) + all_events.extend(father_additional) + + if father_portrait: + media_handle, file_path = father_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {father_name} {father_surname}", father_id) + all_media.append(media_elem) + + # Create mother + mother_id = next_person_id + next_person_id += 1 + mother_handle = gen_handle("PERSON", mother_id) + + mother_person, mother_birth_event, mother_death_event, mother_additional, _, mother_portrait = gen_person( + mother_id, mother_name, mother_surname, mother_birth_year, mother_death_year, "F", + parentin_families=[parent_family_handle], generation=parent_generation + ) + + all_people.append(mother_person) + all_events.append(mother_birth_event) + if mother_death_event is not None: + all_events.append(mother_death_event) + all_events.extend(mother_additional) + + if mother_portrait: + media_handle, file_path = mother_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {mother_name} {mother_surname}", mother_id) + all_media.append(media_elem) + + # Store parent data + father_data = PersonData( + handle=father_handle, + name=father_name, + surname=father_surname, + birth=father_birth_year, + death=father_death_year, + gender="M", + parentin=[parent_family_handle], + childof=[], + generation=parent_generation + ) + person_data_map[father_id] = father_data + + mother_data = PersonData( + handle=mother_handle, + name=mother_name, + surname=mother_surname, + birth=mother_birth_year, + death=mother_death_year, + gender="F", + parentin=[parent_family_handle], + childof=[], + generation=parent_generation + ) + person_data_map[mother_id] = mother_data + + # Calculate marriage year for parents + parent_marriage_year = max( + calculate_marriage_year(father_birth_year, "M"), + calculate_marriage_year(mother_birth_year, "F") + ) + + # Generate siblings (including the original person) + # Generate 6-12 children total, including the original person + num_siblings_total = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY) + siblings_handles: List[str] = [] + + # Add the original person as a child + siblings_handles.append(person_data.handle) + if parent_family_handle not in person_data.childof: + person_data.childof.append(parent_family_handle) + + # Generate other siblings + # Calculate reasonable birth year range for siblings + min_sibling_birth = parent_marriage_year + 1 + max_sibling_birth = person_data.birth + 20 # Siblings can be born up to 20 years after the person + + for sibling_idx in range(num_siblings_total - 1): # -1 because we already have the original person + sibling_id = next_person_id + next_person_id += 1 + + sibling_gender = "M" if random.random() < 0.5 else "F" + sibling_name = random.choice(MALE_NAMES if sibling_gender == "M" else FEMALE_NAMES) + sibling_surname = father_surname # Siblings take father's surname + + # Sibling birth year: between parent marriage and reasonable range + sibling_birth_year = random.randint(min_sibling_birth, max_sibling_birth) + + # Ensure sibling is born before parent's death + if father_death_year and sibling_birth_year > father_death_year: + sibling_birth_year = min(sibling_birth_year, father_death_year - 1) + if mother_death_year and sibling_birth_year > mother_death_year: + sibling_birth_year = min(sibling_birth_year, mother_death_year - 1) + + # Skip if birth year is invalid + if sibling_birth_year < min_sibling_birth: + continue + + sibling_death_year = calculate_death_year(sibling_birth_year, generation) + + sibling_handle = gen_handle("PERSON", sibling_id) + sibling_person, sibling_birth, sibling_death, sibling_additional, _, sibling_portrait = gen_person( + sibling_id, sibling_name, sibling_surname, sibling_birth_year, sibling_death_year, sibling_gender, + childof_families=[parent_family_handle], generation=generation + ) + + all_people.append(sibling_person) + all_events.append(sibling_birth) + if sibling_death is not None: + all_events.append(sibling_death) + all_events.extend(sibling_additional) + + if sibling_portrait: + media_handle, file_path = sibling_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {sibling_name} {sibling_surname}", sibling_id) + all_media.append(media_elem) + + siblings_handles.append(sibling_handle) + sibling_data = PersonData( + handle=sibling_handle, + name=sibling_name, + surname=sibling_surname, + birth=sibling_birth_year, + death=sibling_death_year, + gender=sibling_gender, + parentin=[], + childof=[parent_family_handle], + generation=generation + ) + person_data_map[sibling_id] = sibling_data + + # Create parent family element + parent_family_elem, parent_marriage_event = gen_family( + parent_family_id, father_handle, mother_handle, parent_marriage_year, siblings_handles, parent_generation + ) + all_families.append(parent_family_elem) + all_events.append(parent_marriage_event) + + family_data_map[parent_family_id] = FamilyData( + handle=parent_family_handle, + father_handle=father_handle, + mother_handle=mother_handle, + children_handles=siblings_handles, + marriage_year=parent_marriage_year, + marriage_handle=gen_handle("EVENT", parent_family_id * FAMILY_ID_OFFSET), + family_id=parent_family_id, + generation=parent_generation + ) + + return next_person_id, next_family_id, parent_family_handle + + +def main() -> None: + """Main function to generate the medieval German family database.""" + print("Generating medieval German family database (5 generations, 1000-1300 AD)...") + + # Track all data + all_people: List[ET.Element] = [] + all_events: List[ET.Element] = [] + all_families: List[ET.Element] = [] + all_media: List[ET.Element] = [] + + # Track person and family data + person_data_map: Dict[int, PersonData] = {} + family_data_map: Dict[int, FamilyData] = {} + + # ID counters + next_person_id = 1 + next_family_id = 1 + + # Generation 1: Root couple + print("Generating Generation 1...") + gen1_father_id = next_person_id + next_person_id += 1 + gen1_mother_id = next_person_id + next_person_id += 1 + + gen1_father_birth = calculate_birth_year(1) + gen1_mother_birth = calculate_birth_year(1) + gen1_father_death = calculate_death_year(gen1_father_birth, 1) + gen1_mother_death = calculate_death_year(gen1_mother_birth, 1) + + gen1_family_id = next_family_id + next_family_id += 1 + gen1_family_handle = gen_handle("FAMILY", gen1_family_id) + + gen1_father_name = random.choice(MALE_NAMES) + gen1_father_surname = random.choice(SURNAMES) + gen1_father_handle = gen_handle("PERSON", gen1_father_id) + + gen1_mother_name = random.choice(FEMALE_NAMES) + gen1_mother_surname = random.choice(SURNAMES) + gen1_mother_handle = gen_handle("PERSON", gen1_mother_id) + + # Generate Generation 1 couple + gen1_father_person, gen1_father_birth_event, gen1_father_death_event, gen1_father_additional, _, gen1_father_portrait = gen_person( + gen1_father_id, gen1_father_name, gen1_father_surname, gen1_father_birth, gen1_father_death, "M", + parentin_families=[gen1_family_handle], generation=1 + ) + + gen1_mother_person, gen1_mother_birth_event, gen1_mother_death_event, gen1_mother_additional, _, gen1_mother_portrait = gen_person( + gen1_mother_id, gen1_mother_name, gen1_mother_surname, gen1_mother_birth, gen1_mother_death, "F", + parentin_families=[gen1_family_handle], generation=1 + ) + + all_people.extend([gen1_father_person, gen1_mother_person]) + all_events.extend([gen1_father_birth_event, gen1_mother_birth_event]) + if gen1_father_death_event is not None: + all_events.append(gen1_father_death_event) + if gen1_mother_death_event is not None: + all_events.append(gen1_mother_death_event) + all_events.extend(gen1_father_additional + gen1_mother_additional) + + # Add portraits + if gen1_father_portrait: + media_handle, file_path = gen1_father_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {gen1_father_name} {gen1_father_surname}", gen1_father_id) + all_media.append(media_elem) + if gen1_mother_portrait: + media_handle, file_path = gen1_mother_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {gen1_mother_name} {gen1_mother_surname}", gen1_mother_id) + all_media.append(media_elem) + + # Store person data + person_data_map[gen1_father_id] = PersonData( + handle=gen1_father_handle, + name=gen1_father_name, + surname=gen1_father_surname, + birth=gen1_father_birth, + death=gen1_father_death, + gender="M", + parentin=[gen1_family_handle], + childof=[], + generation=1 + ) + person_data_map[gen1_mother_id] = PersonData( + handle=gen1_mother_handle, + name=gen1_mother_name, + surname=gen1_mother_surname, + birth=gen1_mother_birth, + death=gen1_mother_death, + gender="F", + parentin=[gen1_family_handle], + childof=[], + generation=1 + ) + + # Generate children for Generation 1 + num_children_gen1 = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY) + gen1_children_handles: List[str] = [] + gen1_children_data: List[PersonData] = [] + + for i in range(num_children_gen1): + child_id = next_person_id + next_person_id += 1 + + gender = "M" if i % 2 == 0 else "F" + first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES) + # Children typically take father's surname + child_surname = gen1_father_surname + birth_year = max(gen1_father_birth, gen1_mother_birth) + random.randint(20, 35) + death_year = calculate_death_year(birth_year, 2) + + child_handle = gen_handle("PERSON", child_id) + child_person, child_birth, child_death, child_additional, _, child_portrait = gen_person( + child_id, first_name, child_surname, birth_year, death_year, gender, + childof_families=[gen1_family_handle], generation=2 + ) + + all_people.append(child_person) + all_events.append(child_birth) + if child_death is not None: + all_events.append(child_death) + all_events.extend(child_additional) + + if child_portrait: + media_handle, file_path = child_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {first_name} {child_surname}", child_id) + all_media.append(media_elem) + + gen1_children_handles.append(child_handle) + person_data_map[child_id] = PersonData( + handle=child_handle, + name=first_name, + surname=child_surname, + birth=birth_year, + death=death_year, + gender=gender, + parentin=[], + childof=[gen1_family_handle], + generation=2 + ) + gen1_children_data.append(person_data_map[child_id]) + + # Create Generation 1 family + marriage_year = calculate_marriage_year(max(gen1_father_birth, gen1_mother_birth), "M") + gen1_family, gen1_marriage_event = gen_family(gen1_family_id, gen1_father_handle, gen1_mother_handle, marriage_year, gen1_children_handles, 1) + all_families.append(gen1_family) + all_events.append(gen1_marriage_event) + + family_data_map[gen1_family_id] = FamilyData( + handle=gen1_family_handle, + father_handle=gen1_father_handle, + mother_handle=gen1_mother_handle, + children_handles=gen1_children_handles, + marriage_year=marriage_year, + marriage_handle=gen_handle("EVENT", gen1_family_id * FAMILY_ID_OFFSET), + family_id=gen1_family_id, + generation=1 + ) + + # Update parentin for Generation 1 couple + person_data_map[gen1_father_id].parentin = [gen1_family_handle] + person_data_map[gen1_mother_id].parentin = [gen1_family_handle] + + # Generate subsequent generations (2-5) + current_generation_people = gen1_children_data.copy() + + for generation in range(2, NUM_GENERATIONS + 1): + print(f"Generating Generation {generation}...") + next_generation_people: List[PersonData] = [] + + # Pair up people from current generation to create families + # Shuffle to create more realistic pairing + males = [p for p in current_generation_people if p.gender == "M"] + females = [p for p in current_generation_people if p.gender == "F"] + random.shuffle(males) + random.shuffle(females) + + # Create families by pairing males and females + # Try to pair everyone, but if numbers don't match, some may remain unmarried + num_families = min(len(males), len(females)) + + for family_idx in range(num_families): + father_data = males[family_idx] + mother_data = females[family_idx] + + # Check if both survive to marriage age + father_marriage_age = calculate_marriage_year(father_data.birth, "M") - father_data.birth + mother_marriage_age = calculate_marriage_year(mother_data.birth, "F") - mother_data.birth + + # Skip if either dies before marriage age + if father_data.death and father_data.death < father_data.birth + father_marriage_age: + continue + if mother_data.death and mother_data.death < mother_data.birth + mother_marriage_age: + continue + + # Calculate marriage year + marriage_year = max( + calculate_marriage_year(father_data.birth, "M"), + calculate_marriage_year(mother_data.birth, "F") + ) + + # Don't create family if marriage would be after death + if father_data.death and marriage_year > father_data.death: + continue + if mother_data.death and marriage_year > mother_data.death: + continue + + # Generate parents and siblings for father if needed + next_person_id, next_family_id, _ = generate_parents_and_siblings( + father_data, person_data_map, all_people, all_events, all_media, all_families, + family_data_map, next_person_id, next_family_id, generation + ) + + # Generate parents and siblings for mother if needed + next_person_id, next_family_id, _ = generate_parents_and_siblings( + mother_data, person_data_map, all_people, all_events, all_media, all_families, + family_data_map, next_person_id, next_family_id, generation + ) + + # Create family + family_id = next_family_id + next_family_id += 1 + family_handle = gen_handle("FAMILY", family_id) + + # Generate children (6-12 per family) + num_children = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY) + children_handles: List[str] = [] + + for child_idx in range(num_children): + child_id = next_person_id + next_person_id += 1 + + gender = "M" if random.random() < 0.5 else "F" # Random gender + first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES) + # Children take father's surname + child_surname = father_data.surname + # Children born 1-25 years after marriage + birth_year = marriage_year + random.randint(1, 25) + + # Don't create child if born after parent's death + if father_data.death and birth_year > father_data.death: + continue + if mother_data.death and birth_year > mother_data.death: + continue + + death_year = calculate_death_year(birth_year, generation) + + child_handle = gen_handle("PERSON", child_id) + child_person, child_birth, child_death, child_additional, _, child_portrait = gen_person( + child_id, first_name, child_surname, birth_year, death_year, gender, + childof_families=[family_handle], generation=generation + ) + + all_people.append(child_person) + all_events.append(child_birth) + if child_death is not None: + all_events.append(child_death) + all_events.extend(child_additional) + + if child_portrait: + media_handle, file_path = child_portrait + media_elem = create_media_element(media_handle, file_path, f"Portrait of {first_name} {child_surname}", child_id) + all_media.append(media_elem) + + children_handles.append(child_handle) + child_data = PersonData( + handle=child_handle, + name=first_name, + surname=child_surname, + birth=birth_year, + death=death_year, + gender=gender, + parentin=[], + childof=[family_handle], + generation=generation + ) + person_data_map[child_id] = child_data + next_generation_people.append(child_data) + + # Only create family if there are children + if children_handles: + # Create family element + family_elem, marriage_event = gen_family(family_id, father_data.handle, mother_data.handle, marriage_year, children_handles, generation) + all_families.append(family_elem) + all_events.append(marriage_event) + + # Update parent data + if family_handle not in father_data.parentin: + father_data.parentin.append(family_handle) + if family_handle not in mother_data.parentin: + mother_data.parentin.append(family_handle) + + family_data_map[family_id] = FamilyData( + handle=family_handle, + father_handle=father_data.handle, + mother_handle=mother_data.handle, + children_handles=children_handles, + marriage_year=marriage_year, + marriage_handle=gen_handle("EVENT", family_id * FAMILY_ID_OFFSET), + family_id=family_id, + generation=generation + ) + + current_generation_people = next_generation_people + + # Regenerate person elements with updated family references + print("Updating family references...") + # Rebuild events and people lists from person_data_map + all_people.clear() + all_events.clear() + all_media.clear() + + # Regenerate all people with correct family references + for person_id, person_data in person_data_map.items(): + person_elem, birth_event, death_event, additional_events, _, portrait_info = gen_person( + person_id, person_data.name, person_data.surname, person_data.birth, person_data.death, person_data.gender, + parentin_families=person_data.parentin if person_data.parentin else None, + childof_families=person_data.childof if person_data.childof else None, + generation=person_data.generation + ) + all_people.append(person_elem) + + # Add events + all_events.append(birth_event) + if death_event is not None: + all_events.append(death_event) + all_events.extend(additional_events) + + # Add media + if portrait_info: + media_handle, file_path = portrait_info + media_elem = create_media_element(media_handle, file_path, f"Portrait of {person_data.name} {person_data.surname}", person_id) + all_media.append(media_elem) + + # Add marriage events from families + for family_id, family_data in family_data_map.items(): + marriage_handle = family_data.marriage_handle + marriage_event_data = EventData( + handle=marriage_handle, + event_type="Marriage", + year=family_data.marriage_year, + month=random.randint(MIN_MONTH, MAX_MONTH), + day=random.randint(MIN_DAY, MAX_DAY), + description="Marriage", + event_id=family_id * FAMILY_ID_OFFSET + ) + marriage_event = create_event_element(marriage_event_data) + all_events.append(marriage_event) + + # Create XML document + print("Creating Gramps XML document...") + tree = create_gramps_xml_document(all_events, all_people, all_families, all_media) + + # Write to file + output_file = "medieval_german_family.gramps" + tree.write(output_file, encoding='utf-8', xml_declaration=True) + + print(f"\nGenerated {output_file} with:") + print(f" - {len(all_people)} people") + print(f" - {len(all_families)} families") + print(f" - {len(all_events)} events") + print(f" - {len(all_media)} media objects") + print(f" - {NUM_GENERATIONS} generations") + print(f" - Time period: {GENERATION_START_YEAR}-{GENERATION_END_YEAR} AD") + + +if __name__ == "__main__": + main()