mygramps/generate_medieval_german_family.py
Daniel Viegas f0d52456bc Add medieval German family database generator
- Create generate_medieval_german_family.py script
- Generate 5 generations of medieval German families (1000-1300 AD)
- Include realistic medieval German names, surnames, places, and occupations
- Generate parents and siblings for partners when creating families
- Add medieval event types (Baptism, Confirmation, Marriage, Occupation, etc.)
- Generate 6-12 children per family with realistic date calculations
- Include portraits for all generated people
2025-11-30 16:08:19 +01:00

1318 lines
51 KiB
Python

#!/usr/bin/env python3
"""
Generate a realistic medieval German family database for Gramps testing.
Covers High Medieval period (1000-1300 AD) with 5 generations.
"""
import random
import xml.etree.ElementTree as ET
import os
import urllib.request
import urllib.error
import urllib.parse
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List, Tuple, Dict, Set
# Set seed for deterministic generation
random.seed(42)
# Constants
EVENT_ID_OFFSET = 10
FAMILY_ID_OFFSET = 100
EVENT_ID_START_OFFSET = 2
MIN_MONTH = 1
MAX_MONTH = 12
MIN_DAY = 1
MAX_DAY = 28
GRAMPS_XML_VERSION = "5.1.0"
GRAMPS_XML_NAMESPACE = "http://gramps-project.org/xml/1.7.1/"
GRAMPS_XML_DTD = "http://gramps-project.org/xml/1.7.1/grampsxml.dtd"
# Portrait generation constants
PORTRAITS_DIR = "portraits"
DICEBEAR_API_BASE = "https://api.dicebear.com/7.x/avataaars/svg"
# Medieval German male names (High Medieval period, 1000-1300 AD)
MALE_NAMES = [
"Heinrich", "Konrad", "Friedrich", "Albrecht", "Gottfried", "Ludwig",
"Rudolf", "Otto", "Berthold", "Werner", "Ulrich", "Hartmann", "Eberhard",
"Gerhard", "Dietrich", "Arnold", "Hermann", "Bruno", "Adalbert", "Gunther",
"Siegfried", "Wolfgang", "Reinhard", "Burchard", "Gebhard", "Volkmar",
"Wigand", "Baldwin", "Gunzelin", "Ekkehard", "Bernhard", "Hildebrand",
"Ruprecht", "Wilhelm", "Johann", "Matthias", "Philipp", "Markward",
"Hartwig", "Lothar", "Theoderich", "Wichmann", "Thiemo", "Gebhard",
"Adalbero", "Gero", "Wichard", "Widukind", "Hugo", "Lambert"
]
# Medieval German female names (High Medieval period, 1000-1300 AD)
FEMALE_NAMES = [
"Adelheid", "Gertrud", "Margarete", "Elisabeth", "Katharina", "Anna",
"Agnes", "Hedwig", "Mechthild", "Beatrix", "Sophia", "Mathilde",
"Irmgard", "Gisela", "Kunigunde", "Helena", "Brigitta", "Ursula",
"Barbara", "Dorothea", "Christina", "Petronilla", "Richza", "Judith",
"Euphemia", "Cecilia", "Gertrude", "Ida", "Luitgard", "Oda", "Adela",
"Bertha", "Ermengard", "Hildegard", "Imelda", "Jutta", "Liutgard",
"Mechtilde", "Odilia", "Regina", "Walburga", "Wiborada", "Yolanda"
]
# Medieval German surnames (High Medieval period)
SURNAMES = [
# Noble families
"von Habsburg", "von Hohenstaufen", "von Wittelsbach", "von Babenberg",
"von Zähringen", "von Wettin", "von Ascania", "von Welf",
# Common surnames (occupational, descriptive)
"Müller", "Schmidt", "Weber", "Fischer", "Meyer", "Wagner", "Becker",
"Schulz", "Hoffmann", "Schäfer", "Koch", "Bauer", "Richter", "Klein",
"Wolf", "Schröder", "Neumann", "Schwarz", "Zimmermann", "Braun",
"Krüger", "Hofmann", "Hartmann", "Lange", "Schmitt", "Werner",
"Schmitz", "Krause", "Meier", "Lehmann", "Schmid", "Schulze",
"Maier", "Köhler", "Herrmann", "König", "Walter", "Huber",
"Mayer", "Fuchs", "Peters", "Lang", "Scholz", "Möller",
"Weiß", "Jung", "Hahn", "Schubert", "Vogel", "Friedrich"
]
# Medieval German cities and places (High Medieval period)
PLACES = [
"Köln", "Mainz", "Trier", "Augsburg", "Regensburg", "Worms", "Speyer",
"Straßburg", "Würzburg", "Bamberg", "Fulda", "Magdeburg", "Halle",
"Erfurt", "Nürnberg", "Ulm", "Freiburg", "Basel", "Konstanz", "Lübeck",
"Hamburg", "Bremen", "Braunschweig", "Goslar", "Quedlinburg", "Merseburg",
"Naumburg", "Meißen", "Dresden", "Leipzig", "Frankfurt", "Heidelberg",
"Rothenburg", "Dinkelsbühl", "Nördlingen", "Esslingen", "Reutlingen",
"Ravensburg", "Memmingen", "Kempten", "Lindau", "Überlingen"
]
# Medieval occupations
OCCUPATIONS = [
# Nobility and military
"Ritter", "Knappe", "Burgvogt", "Landvogt", "Marschall",
# Clergy
"Priester", "Mönch", "Nonne", "Abt", "Äbtissin", "Bischof",
# Craftsmen
"Schmied", "Schneider", "Schuster", "Bäcker", "Müller", "Bauer",
"Zimmermann", "Maurer", "Tischler", "Schreiner", "Färber", "Gerber",
"Kürschner", "Seiler", "Böttcher", "Drechsler", "Schlosser",
# Merchants and traders
"Kaufmann", "Händler", "Krämer", "Gewürzhändler", "Tuchhändler",
# Other
"Jäger", "Fischer", "Schäfer", "Hirte", "Knecht", "Magd",
"Wirt", "Koch", "Bader", "Barbier", "Arzt", "Apotheker"
]
# Medieval event types with probabilities and age ranges
EVENT_TYPES = [
("Baptism", 0.95, 0, 1), # 95% chance, 0-1 years after birth (very common)
("Christening", 0.80, 0, 1), # 80% chance, 0-1 years after birth
("Confirmation", 0.60, 12, 16), # 60% chance, 12-16 years after birth
("Education", 0.30, 7, 14), # 30% chance (mostly for nobility/wealthy), 7-14 years
("Occupation", 0.85, 14, 50), # 85% chance, 14-50 years after birth
("Military Service", 0.40, 18, 40), # 40% chance for males, 18-40 years
("Residence", 0.70, 0, 80), # 70% chance, any time
("Marriage", 0.90, 18, 35), # 90% chance, 18-35 years (very common)
("Burial", 0.80, None, None), # 80% chance if death exists, at death time
("Cremation", 0.05, None, None), # 5% chance if death exists (rare in medieval times)
]
# Generation configuration
NUM_GENERATIONS = 5
MIN_CHILDREN_PER_FAMILY = 6
MAX_CHILDREN_PER_FAMILY = 12
GENERATION_START_YEAR = 1000 # Start of High Medieval period
GENERATION_END_YEAR = 1300 # End of High Medieval period
@dataclass
class EventData:
"""Data structure for an event."""
handle: str
event_type: str
year: int
month: int
day: int
description: str
event_id: int
@dataclass
class PersonData:
"""Data structure for person information."""
handle: str
name: str
surname: str
birth: int
death: Optional[int]
gender: str
parentin: List[str]
childof: List[str]
generation: int
@dataclass
class FamilyData:
"""Data structure for family information."""
handle: str
father_handle: str
mother_handle: str
children_handles: List[str]
marriage_year: int
marriage_handle: str
family_id: int
generation: int
def gen_handle(prefix: str, num: int) -> str:
"""Generate unique handle."""
return f"_{prefix}{num:08d}"
def generate_portrait(person_id: int, name: str, gender: str, birth_year: int) -> Optional[Tuple[str, str]]:
"""
Generate a portrait for a person using DiceBear Avatars API.
Args:
person_id: Unique person ID.
name: Person's name (used as seed for deterministic generation).
gender: Person's gender ('M' or 'F').
birth_year: Birth year.
Returns:
Optional[Tuple[str, str]]: Tuple of (media_handle, file_path) if successful, None otherwise.
"""
# Create portraits directory if it doesn't exist
if not os.path.exists(PORTRAITS_DIR):
os.makedirs(PORTRAITS_DIR)
# Create seed from name, person_id, and gender for deterministic generation
seed = f"{name}_{person_id}_{gender}"
# Build API URL with parameters
params = {
"seed": seed
}
# Build URL with proper encoding
url = f"{DICEBEAR_API_BASE}?{urllib.parse.urlencode(params)}"
# Generate filename
filename = f"portrait_{person_id:04d}_{name.replace(' ', '_')}.svg"
file_path = os.path.join(PORTRAITS_DIR, filename)
# Download portrait
try:
urllib.request.urlretrieve(url, file_path)
media_handle = gen_handle("MEDIA", person_id)
return (media_handle, file_path)
except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
print(f"Warning: Could not generate portrait for {name}: {e}")
return None
def create_media_element(media_handle: str, file_path: str, title: str, media_id: Optional[int] = None) -> ET.Element:
"""
Create an XML element for a media object.
Args:
media_handle: Unique handle for the media object.
file_path: Path to the media file (relative to XML file location).
title: Title/description for the media object.
media_id: Optional media ID number. If None, extracted from handle.
Returns:
ET.Element: The media XML element.
"""
media_elem = ET.Element("media")
media_elem.set("handle", media_handle)
media_elem.set("change", str(int(datetime.now().timestamp())))
if media_id is None:
# Extract ID from handle (e.g., "_MEDIA00000001" -> 1)
media_id = int(media_handle.replace("_MEDIA", ""))
media_elem.set("id", f"O{media_id:04d}")
file_elem = ET.SubElement(media_elem, "file")
file_elem.set("src", file_path)
title_elem = ET.SubElement(media_elem, "title")
title_elem.text = title
mime_elem = ET.SubElement(media_elem, "mimetype")
mime_elem.text = "image/svg+xml"
return media_elem
def create_event_element(event_data: EventData) -> ET.Element:
"""
Create an XML element for an event.
Args:
event_data: EventData object containing event information.
Returns:
ET.Element: The event XML element.
"""
event_elem = ET.Element("event")
event_elem.set("handle", event_data.handle)
event_elem.set("change", str(int(datetime.now().timestamp())))
event_elem.set("id", f"E{event_data.event_id:04d}")
type_elem = ET.SubElement(event_elem, "type")
type_elem.text = event_data.event_type
date_elem = ET.SubElement(event_elem, "dateval")
date_str = f"{event_data.year:04d}-{event_data.month:02d}-{event_data.day:02d}"
date_elem.set("val", date_str)
desc_elem = ET.SubElement(event_elem, "description")
desc_elem.text = event_data.description
return event_elem
def gen_additional_events(
pid: int,
first_name: str,
surname: str,
birth_year: int,
death_year: Optional[int],
gender: str
) -> List[Tuple[str, EventData]]:
"""
Generate additional events for a person based on event type probabilities.
Args:
pid: Person ID.
first_name: Person's first name.
surname: Person's surname.
birth_year: Birth year.
death_year: Death year (if known).
gender: Person's gender ('M' or 'F').
Returns:
List[Tuple[str, EventData]]: List of (event_handle, EventData) tuples.
"""
events: List[Tuple[str, EventData]] = []
event_counter = EVENT_ID_START_OFFSET
for event_type, probability, min_years, max_years in EVENT_TYPES:
if random.random() > probability:
continue
# Special handling for death-related events
if event_type in ("Burial", "Cremation"):
if death_year is None:
continue
event_year = death_year
event_month = random.randint(MIN_MONTH, MAX_MONTH)
event_day = random.randint(MIN_DAY, MAX_DAY)
elif event_type == "Marriage":
# Marriage handled separately in family generation
continue
elif min_years is None or max_years is None:
continue
else:
# Calculate event year based on age range
min_age = min_years
max_age = max_years
age_at_event = random.randint(min_age, max_age)
event_year = birth_year + age_at_event
# Don't generate events after death
if death_year and event_year > death_year:
continue
event_month = random.randint(MIN_MONTH, MAX_MONTH)
event_day = random.randint(MIN_DAY, MAX_DAY)
# Special handling for gender-specific events
if event_type == "Military Service" and gender == "F":
continue # Women typically didn't serve in medieval military
# Generate event description
if event_type == "Occupation":
occupation = random.choice(OCCUPATIONS)
description = f"{occupation} - {first_name} {surname}"
elif event_type == "Residence":
place = random.choice(PLACES)
description = f"Residence in {place} - {first_name} {surname}"
else:
description = f"{event_type} of {surname}, {first_name}"
event_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + event_counter)
event_data = EventData(
handle=event_handle,
event_type=event_type,
year=event_year,
month=event_month,
day=event_day,
description=description,
event_id=pid * EVENT_ID_OFFSET + event_counter
)
events.append((event_handle, event_data))
event_counter += 1
return events
def gen_person(
pid: int,
first_name: str,
surname: str,
birth_year: int,
death_year: Optional[int],
gender: str,
parentin_families: Optional[List[str]] = None,
childof_families: Optional[List[str]] = None,
generation: int = 1
) -> Tuple[ET.Element, ET.Element, Optional[ET.Element], List[ET.Element], List[Tuple[str, EventData]], Optional[Tuple[str, str]]]:
"""
Generate a person element with all associated events.
Args:
pid: Person ID.
first_name: First name.
surname: Surname.
birth_year: Birth year.
death_year: Death year (None if still alive).
gender: Gender ('M' or 'F').
parentin_families: List of family handles where person is a parent.
childof_families: List of family handles where person is a child.
generation: Generation number (1-5).
Returns:
Tuple containing: (person_element, birth_event, death_event, additional_events_xml, additional_events_tuples, portrait_info)
"""
handle = gen_handle("PERSON", pid)
birth_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET)
death_handle = gen_handle("EVENT", pid * EVENT_ID_OFFSET + 1) if death_year else None
# Generate additional events
additional_events = gen_additional_events(pid, first_name, surname, birth_year, death_year, gender)
# Generate portrait
full_name = f"{first_name} {surname}"
portrait_info = generate_portrait(pid, full_name, gender, birth_year)
# Create person element
person_elem = ET.Element("person")
person_elem.set("handle", handle)
person_elem.set("change", str(int(datetime.now().timestamp())))
person_elem.set("id", f"I{pid:04d}")
gender_elem = ET.SubElement(person_elem, "gender")
gender_elem.text = gender
name_elem = ET.SubElement(person_elem, "name")
name_elem.set("type", "Birth Name")
first_elem = ET.SubElement(name_elem, "first")
first_elem.text = first_name
surname_elem = ET.SubElement(name_elem, "surname")
surname_elem.text = surname
# Add portrait reference if available
if portrait_info:
media_handle, _ = portrait_info
gallery_elem = ET.SubElement(person_elem, "gallery")
mediaobjref_elem = ET.SubElement(gallery_elem, "mediaobjref")
mediaobjref_elem.set("hlink", media_handle)
# Add event references
birth_ref = ET.SubElement(person_elem, "eventref")
birth_ref.set("hlink", birth_handle)
birth_ref.set("role", "Primary")
for event_handle, _ in additional_events:
event_ref = ET.SubElement(person_elem, "eventref")
event_ref.set("hlink", event_handle)
event_ref.set("role", "Primary")
if death_handle:
death_ref = ET.SubElement(person_elem, "eventref")
death_ref.set("hlink", death_handle)
death_ref.set("role", "Primary")
# Add parentin references
if parentin_families:
for family_handle in parentin_families:
parentin_elem = ET.SubElement(person_elem, "parentin")
parentin_elem.set("hlink", family_handle)
# Add childof references
if childof_families:
for family_handle in childof_families:
childof_elem = ET.SubElement(person_elem, "childof")
childof_elem.set("hlink", family_handle)
# Birth event
birth_month = random.randint(MIN_MONTH, MAX_MONTH)
birth_day = random.randint(MIN_DAY, MAX_DAY)
birth_event_data = EventData(
handle=birth_handle,
event_type="Birth",
year=birth_year,
month=birth_month,
day=birth_day,
description=f"Birth of {surname}, {first_name}",
event_id=pid * EVENT_ID_OFFSET
)
birth_event = create_event_element(birth_event_data)
# Death event
death_event: Optional[ET.Element] = None
if death_handle and death_year:
death_month = random.randint(MIN_MONTH, MAX_MONTH)
death_day = random.randint(MIN_DAY, MAX_DAY)
death_event_data = EventData(
handle=death_handle,
event_type="Death",
year=death_year,
month=death_month,
day=death_day,
description=f"Death of {surname}, {first_name}",
event_id=pid * EVENT_ID_OFFSET + 1
)
death_event = create_event_element(death_event_data)
# Convert additional events to XML elements
all_additional_events_xml = [create_event_element(event_data) for _, event_data in additional_events]
return person_elem, birth_event, death_event, all_additional_events_xml, additional_events, portrait_info
def gen_family(
fid: int,
father_handle: str,
mother_handle: str,
marriage_year: int,
children_handles: List[str],
generation: int = 1
) -> Tuple[ET.Element, ET.Element]:
"""Generate a family with marriage event."""
handle = gen_handle("FAMILY", fid)
marriage_handle = gen_handle("EVENT", fid * FAMILY_ID_OFFSET)
# Create family element
family_elem = ET.Element("family")
family_elem.set("handle", handle)
family_elem.set("change", str(int(datetime.now().timestamp())))
family_elem.set("id", f"F{fid:04d}")
rel_elem = ET.SubElement(family_elem, "rel")
rel_elem.set("type", "Married")
father_elem = ET.SubElement(family_elem, "father")
father_elem.set("hlink", father_handle)
mother_elem = ET.SubElement(family_elem, "mother")
mother_elem.set("hlink", mother_handle)
for child_handle in children_handles:
child_elem = ET.SubElement(family_elem, "childref")
child_elem.set("hlink", child_handle)
marriage_ref = ET.SubElement(family_elem, "eventref")
marriage_ref.set("hlink", marriage_handle)
marriage_ref.set("role", "Family")
# Marriage event
marriage_month = random.randint(MIN_MONTH, MAX_MONTH)
marriage_day = random.randint(MIN_DAY, MAX_DAY)
marriage_event_data = EventData(
handle=marriage_handle,
event_type="Marriage",
year=marriage_year,
month=marriage_month,
day=marriage_day,
description="Marriage",
event_id=fid * FAMILY_ID_OFFSET
)
marriage_event = create_event_element(marriage_event_data)
return family_elem, marriage_event
def create_gramps_xml_document(
events: List[ET.Element],
people: List[ET.Element],
families: List[ET.Element],
media: List[ET.Element]
) -> ET.ElementTree:
"""Create the complete Gramps XML document."""
# Create root element
database = ET.Element("database")
database.set("xmlns", GRAMPS_XML_NAMESPACE)
# Header
header = ET.SubElement(database, "header")
created = ET.SubElement(header, "created")
created.set("date", datetime.now().strftime('%Y-%m-%d'))
created.set("version", GRAMPS_XML_VERSION)
researcher = ET.SubElement(header, "researcher")
resname = ET.SubElement(researcher, "resname")
resname.text = "Medieval German Family Generator"
# Tags (empty)
ET.SubElement(database, "tags")
# Events
events_elem = ET.SubElement(database, "events")
for event in events:
events_elem.append(event)
# People
people_elem = ET.SubElement(database, "people")
for person in people:
people_elem.append(person)
# Families
families_elem = ET.SubElement(database, "families")
for family in families:
families_elem.append(family)
# Media objects
media_elem = ET.SubElement(database, "objects")
for media_obj in media:
media_elem.append(media_obj)
return ET.ElementTree(database)
def calculate_birth_year(generation: int) -> int:
"""
Calculate birth year for a given generation.
Generation 1: 1000-1050
Generation 2: 1020-1100
Generation 3: 1040-1150
Generation 4: 1060-1200
Generation 5: 1080-1250
"""
base_year = GENERATION_START_YEAR + (generation - 1) * 20
year_range = 50 + (generation - 1) * 10
return random.randint(base_year, base_year + year_range)
def calculate_death_year(birth_year: int, generation: int) -> Optional[int]:
"""
Calculate death year based on birth year and generation.
Medieval life expectancy: 30-70 years.
"""
# Higher chance of early death in earlier generations
if generation <= 2:
life_expectancy = random.randint(30, 60)
else:
life_expectancy = random.randint(35, 70)
death_year = birth_year + life_expectancy
# Don't exceed the end of High Medieval period
if death_year > GENERATION_END_YEAR:
# Some people might live past 1300, but reduce probability
if random.random() < 0.3:
return death_year
else:
return None # Still alive at end of period
return death_year
def calculate_marriage_year(birth_year: int, gender: str) -> int:
"""
Calculate marriage year based on birth year and gender.
Women: typically 18-25
Men: typically 20-30
"""
if gender == "F":
age_at_marriage = random.randint(18, 25)
else:
age_at_marriage = random.randint(20, 30)
return birth_year + age_at_marriage
def generate_parents_and_siblings(
person_data: PersonData,
person_data_map: Dict[int, PersonData],
all_people: List[ET.Element],
all_events: List[ET.Element],
all_media: List[ET.Element],
all_families: List[ET.Element],
family_data_map: Dict[int, FamilyData],
next_person_id: int,
next_family_id: int,
generation: int
) -> Tuple[int, int, Optional[str]]:
"""
Generate parents and siblings for a person if they don't have parents.
If they already have parents, generate additional siblings.
Args:
person_data: The person to generate parents/siblings for
person_data_map: Map of person IDs to PersonData
all_people: List of all person XML elements
all_events: List of all event XML elements
all_media: List of all media XML elements
all_families: List of all family XML elements
family_data_map: Map of family IDs to FamilyData
next_person_id: Next available person ID
next_family_id: Next available family ID
generation: Generation number for the person
Returns:
Tuple of (updated_next_person_id, updated_next_family_id, parent_family_handle or None)
"""
# If person already has parents, check if we need to add siblings
if person_data.childof:
parent_family_handle = person_data.childof[0]
# Check if parent family exists and has enough children
parent_family_id = None
for fid, fdata in family_data_map.items():
if fdata.handle == parent_family_handle:
parent_family_id = fid
break
if parent_family_id is not None:
parent_family_data = family_data_map[parent_family_id]
# If family already has 6+ children, don't add more
if len(parent_family_data.children_handles) >= MIN_CHILDREN_PER_FAMILY:
return next_person_id, next_family_id, parent_family_handle
# Generate additional siblings
# Get parent data
father_data = None
mother_data = None
for pid, pdata in person_data_map.items():
if pdata.handle == parent_family_data.father_handle:
father_data = pdata
if pdata.handle == parent_family_data.mother_handle:
mother_data = pdata
if father_data and mother_data:
# Calculate how many more siblings to add
current_children_count = len(parent_family_data.children_handles)
num_additional_siblings = random.randint(
max(0, MIN_CHILDREN_PER_FAMILY - current_children_count),
MAX_CHILDREN_PER_FAMILY - current_children_count
)
# Generate additional siblings
for _ in range(num_additional_siblings):
sibling_id = next_person_id
next_person_id += 1
sibling_gender = "M" if random.random() < 0.5 else "F"
sibling_name = random.choice(MALE_NAMES if sibling_gender == "M" else FEMALE_NAMES)
sibling_surname = father_data.surname
# Sibling birth year: around the same time as the person
sibling_birth_year = person_data.birth + random.randint(-5, 15)
# Ensure sibling is born before parent's death
if father_data.death and sibling_birth_year > father_data.death:
sibling_birth_year = father_data.death - 1
if mother_data.death and sibling_birth_year > mother_data.death:
sibling_birth_year = mother_data.death - 1
# Skip if birth year is invalid
if sibling_birth_year < parent_family_data.marriage_year + 1:
continue
sibling_death_year = calculate_death_year(sibling_birth_year, generation)
sibling_handle = gen_handle("PERSON", sibling_id)
sibling_person, sibling_birth, sibling_death, sibling_additional, _, sibling_portrait = gen_person(
sibling_id, sibling_name, sibling_surname, sibling_birth_year, sibling_death_year, sibling_gender,
childof_families=[parent_family_handle], generation=generation
)
all_people.append(sibling_person)
all_events.append(sibling_birth)
if sibling_death is not None:
all_events.append(sibling_death)
all_events.extend(sibling_additional)
if sibling_portrait:
media_handle, file_path = sibling_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {sibling_name} {sibling_surname}", sibling_id)
all_media.append(media_elem)
parent_family_data.children_handles.append(sibling_handle)
sibling_data = PersonData(
handle=sibling_handle,
name=sibling_name,
surname=sibling_surname,
birth=sibling_birth_year,
death=sibling_death_year,
gender=sibling_gender,
parentin=[],
childof=[parent_family_handle],
generation=generation
)
person_data_map[sibling_id] = sibling_data
return next_person_id, next_family_id, parent_family_handle
return next_person_id, next_family_id, parent_family_handle
# Generate parents for this person
# Parents should be from previous generation
parent_generation = max(1, generation - 1)
# Father's birth year: 20-40 years before person's birth
father_birth_year = person_data.birth - random.randint(20, 40)
mother_birth_year = person_data.birth - random.randint(18, 35)
# Ensure birth years are within the time period
if father_birth_year < GENERATION_START_YEAR:
father_birth_year = GENERATION_START_YEAR + random.randint(0, 50)
if mother_birth_year < GENERATION_START_YEAR:
mother_birth_year = GENERATION_START_YEAR + random.randint(0, 50)
father_death_year = calculate_death_year(father_birth_year, parent_generation)
mother_death_year = calculate_death_year(mother_birth_year, parent_generation)
# Generate parent names
father_name = random.choice(MALE_NAMES)
father_surname = person_data.surname # Child takes father's surname
mother_name = random.choice(FEMALE_NAMES)
mother_surname = random.choice(SURNAMES) # Mother may have different surname
# Create parent family
parent_family_id = next_family_id
next_family_id += 1
parent_family_handle = gen_handle("FAMILY", parent_family_id)
# Create father
father_id = next_person_id
next_person_id += 1
father_handle = gen_handle("PERSON", father_id)
father_person, father_birth_event, father_death_event, father_additional, _, father_portrait = gen_person(
father_id, father_name, father_surname, father_birth_year, father_death_year, "M",
parentin_families=[parent_family_handle], generation=parent_generation
)
all_people.append(father_person)
all_events.append(father_birth_event)
if father_death_event is not None:
all_events.append(father_death_event)
all_events.extend(father_additional)
if father_portrait:
media_handle, file_path = father_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {father_name} {father_surname}", father_id)
all_media.append(media_elem)
# Create mother
mother_id = next_person_id
next_person_id += 1
mother_handle = gen_handle("PERSON", mother_id)
mother_person, mother_birth_event, mother_death_event, mother_additional, _, mother_portrait = gen_person(
mother_id, mother_name, mother_surname, mother_birth_year, mother_death_year, "F",
parentin_families=[parent_family_handle], generation=parent_generation
)
all_people.append(mother_person)
all_events.append(mother_birth_event)
if mother_death_event is not None:
all_events.append(mother_death_event)
all_events.extend(mother_additional)
if mother_portrait:
media_handle, file_path = mother_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {mother_name} {mother_surname}", mother_id)
all_media.append(media_elem)
# Store parent data
father_data = PersonData(
handle=father_handle,
name=father_name,
surname=father_surname,
birth=father_birth_year,
death=father_death_year,
gender="M",
parentin=[parent_family_handle],
childof=[],
generation=parent_generation
)
person_data_map[father_id] = father_data
mother_data = PersonData(
handle=mother_handle,
name=mother_name,
surname=mother_surname,
birth=mother_birth_year,
death=mother_death_year,
gender="F",
parentin=[parent_family_handle],
childof=[],
generation=parent_generation
)
person_data_map[mother_id] = mother_data
# Calculate marriage year for parents
parent_marriage_year = max(
calculate_marriage_year(father_birth_year, "M"),
calculate_marriage_year(mother_birth_year, "F")
)
# Generate siblings (including the original person)
# Generate 6-12 children total, including the original person
num_siblings_total = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY)
siblings_handles: List[str] = []
# Add the original person as a child
siblings_handles.append(person_data.handle)
if parent_family_handle not in person_data.childof:
person_data.childof.append(parent_family_handle)
# Generate other siblings
# Calculate reasonable birth year range for siblings
min_sibling_birth = parent_marriage_year + 1
max_sibling_birth = person_data.birth + 20 # Siblings can be born up to 20 years after the person
for sibling_idx in range(num_siblings_total - 1): # -1 because we already have the original person
sibling_id = next_person_id
next_person_id += 1
sibling_gender = "M" if random.random() < 0.5 else "F"
sibling_name = random.choice(MALE_NAMES if sibling_gender == "M" else FEMALE_NAMES)
sibling_surname = father_surname # Siblings take father's surname
# Sibling birth year: between parent marriage and reasonable range
sibling_birth_year = random.randint(min_sibling_birth, max_sibling_birth)
# Ensure sibling is born before parent's death
if father_death_year and sibling_birth_year > father_death_year:
sibling_birth_year = min(sibling_birth_year, father_death_year - 1)
if mother_death_year and sibling_birth_year > mother_death_year:
sibling_birth_year = min(sibling_birth_year, mother_death_year - 1)
# Skip if birth year is invalid
if sibling_birth_year < min_sibling_birth:
continue
sibling_death_year = calculate_death_year(sibling_birth_year, generation)
sibling_handle = gen_handle("PERSON", sibling_id)
sibling_person, sibling_birth, sibling_death, sibling_additional, _, sibling_portrait = gen_person(
sibling_id, sibling_name, sibling_surname, sibling_birth_year, sibling_death_year, sibling_gender,
childof_families=[parent_family_handle], generation=generation
)
all_people.append(sibling_person)
all_events.append(sibling_birth)
if sibling_death is not None:
all_events.append(sibling_death)
all_events.extend(sibling_additional)
if sibling_portrait:
media_handle, file_path = sibling_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {sibling_name} {sibling_surname}", sibling_id)
all_media.append(media_elem)
siblings_handles.append(sibling_handle)
sibling_data = PersonData(
handle=sibling_handle,
name=sibling_name,
surname=sibling_surname,
birth=sibling_birth_year,
death=sibling_death_year,
gender=sibling_gender,
parentin=[],
childof=[parent_family_handle],
generation=generation
)
person_data_map[sibling_id] = sibling_data
# Create parent family element
parent_family_elem, parent_marriage_event = gen_family(
parent_family_id, father_handle, mother_handle, parent_marriage_year, siblings_handles, parent_generation
)
all_families.append(parent_family_elem)
all_events.append(parent_marriage_event)
family_data_map[parent_family_id] = FamilyData(
handle=parent_family_handle,
father_handle=father_handle,
mother_handle=mother_handle,
children_handles=siblings_handles,
marriage_year=parent_marriage_year,
marriage_handle=gen_handle("EVENT", parent_family_id * FAMILY_ID_OFFSET),
family_id=parent_family_id,
generation=parent_generation
)
return next_person_id, next_family_id, parent_family_handle
def main() -> None:
"""Main function to generate the medieval German family database."""
print("Generating medieval German family database (5 generations, 1000-1300 AD)...")
# Track all data
all_people: List[ET.Element] = []
all_events: List[ET.Element] = []
all_families: List[ET.Element] = []
all_media: List[ET.Element] = []
# Track person and family data
person_data_map: Dict[int, PersonData] = {}
family_data_map: Dict[int, FamilyData] = {}
# ID counters
next_person_id = 1
next_family_id = 1
# Generation 1: Root couple
print("Generating Generation 1...")
gen1_father_id = next_person_id
next_person_id += 1
gen1_mother_id = next_person_id
next_person_id += 1
gen1_father_birth = calculate_birth_year(1)
gen1_mother_birth = calculate_birth_year(1)
gen1_father_death = calculate_death_year(gen1_father_birth, 1)
gen1_mother_death = calculate_death_year(gen1_mother_birth, 1)
gen1_family_id = next_family_id
next_family_id += 1
gen1_family_handle = gen_handle("FAMILY", gen1_family_id)
gen1_father_name = random.choice(MALE_NAMES)
gen1_father_surname = random.choice(SURNAMES)
gen1_father_handle = gen_handle("PERSON", gen1_father_id)
gen1_mother_name = random.choice(FEMALE_NAMES)
gen1_mother_surname = random.choice(SURNAMES)
gen1_mother_handle = gen_handle("PERSON", gen1_mother_id)
# Generate Generation 1 couple
gen1_father_person, gen1_father_birth_event, gen1_father_death_event, gen1_father_additional, _, gen1_father_portrait = gen_person(
gen1_father_id, gen1_father_name, gen1_father_surname, gen1_father_birth, gen1_father_death, "M",
parentin_families=[gen1_family_handle], generation=1
)
gen1_mother_person, gen1_mother_birth_event, gen1_mother_death_event, gen1_mother_additional, _, gen1_mother_portrait = gen_person(
gen1_mother_id, gen1_mother_name, gen1_mother_surname, gen1_mother_birth, gen1_mother_death, "F",
parentin_families=[gen1_family_handle], generation=1
)
all_people.extend([gen1_father_person, gen1_mother_person])
all_events.extend([gen1_father_birth_event, gen1_mother_birth_event])
if gen1_father_death_event is not None:
all_events.append(gen1_father_death_event)
if gen1_mother_death_event is not None:
all_events.append(gen1_mother_death_event)
all_events.extend(gen1_father_additional + gen1_mother_additional)
# Add portraits
if gen1_father_portrait:
media_handle, file_path = gen1_father_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {gen1_father_name} {gen1_father_surname}", gen1_father_id)
all_media.append(media_elem)
if gen1_mother_portrait:
media_handle, file_path = gen1_mother_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {gen1_mother_name} {gen1_mother_surname}", gen1_mother_id)
all_media.append(media_elem)
# Store person data
person_data_map[gen1_father_id] = PersonData(
handle=gen1_father_handle,
name=gen1_father_name,
surname=gen1_father_surname,
birth=gen1_father_birth,
death=gen1_father_death,
gender="M",
parentin=[gen1_family_handle],
childof=[],
generation=1
)
person_data_map[gen1_mother_id] = PersonData(
handle=gen1_mother_handle,
name=gen1_mother_name,
surname=gen1_mother_surname,
birth=gen1_mother_birth,
death=gen1_mother_death,
gender="F",
parentin=[gen1_family_handle],
childof=[],
generation=1
)
# Generate children for Generation 1
num_children_gen1 = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY)
gen1_children_handles: List[str] = []
gen1_children_data: List[PersonData] = []
for i in range(num_children_gen1):
child_id = next_person_id
next_person_id += 1
gender = "M" if i % 2 == 0 else "F"
first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES)
# Children typically take father's surname
child_surname = gen1_father_surname
birth_year = max(gen1_father_birth, gen1_mother_birth) + random.randint(20, 35)
death_year = calculate_death_year(birth_year, 2)
child_handle = gen_handle("PERSON", child_id)
child_person, child_birth, child_death, child_additional, _, child_portrait = gen_person(
child_id, first_name, child_surname, birth_year, death_year, gender,
childof_families=[gen1_family_handle], generation=2
)
all_people.append(child_person)
all_events.append(child_birth)
if child_death is not None:
all_events.append(child_death)
all_events.extend(child_additional)
if child_portrait:
media_handle, file_path = child_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {first_name} {child_surname}", child_id)
all_media.append(media_elem)
gen1_children_handles.append(child_handle)
person_data_map[child_id] = PersonData(
handle=child_handle,
name=first_name,
surname=child_surname,
birth=birth_year,
death=death_year,
gender=gender,
parentin=[],
childof=[gen1_family_handle],
generation=2
)
gen1_children_data.append(person_data_map[child_id])
# Create Generation 1 family
marriage_year = calculate_marriage_year(max(gen1_father_birth, gen1_mother_birth), "M")
gen1_family, gen1_marriage_event = gen_family(gen1_family_id, gen1_father_handle, gen1_mother_handle, marriage_year, gen1_children_handles, 1)
all_families.append(gen1_family)
all_events.append(gen1_marriage_event)
family_data_map[gen1_family_id] = FamilyData(
handle=gen1_family_handle,
father_handle=gen1_father_handle,
mother_handle=gen1_mother_handle,
children_handles=gen1_children_handles,
marriage_year=marriage_year,
marriage_handle=gen_handle("EVENT", gen1_family_id * FAMILY_ID_OFFSET),
family_id=gen1_family_id,
generation=1
)
# Update parentin for Generation 1 couple
person_data_map[gen1_father_id].parentin = [gen1_family_handle]
person_data_map[gen1_mother_id].parentin = [gen1_family_handle]
# Generate subsequent generations (2-5)
current_generation_people = gen1_children_data.copy()
for generation in range(2, NUM_GENERATIONS + 1):
print(f"Generating Generation {generation}...")
next_generation_people: List[PersonData] = []
# Pair up people from current generation to create families
# Shuffle to create more realistic pairing
males = [p for p in current_generation_people if p.gender == "M"]
females = [p for p in current_generation_people if p.gender == "F"]
random.shuffle(males)
random.shuffle(females)
# Create families by pairing males and females
# Try to pair everyone, but if numbers don't match, some may remain unmarried
num_families = min(len(males), len(females))
for family_idx in range(num_families):
father_data = males[family_idx]
mother_data = females[family_idx]
# Check if both survive to marriage age
father_marriage_age = calculate_marriage_year(father_data.birth, "M") - father_data.birth
mother_marriage_age = calculate_marriage_year(mother_data.birth, "F") - mother_data.birth
# Skip if either dies before marriage age
if father_data.death and father_data.death < father_data.birth + father_marriage_age:
continue
if mother_data.death and mother_data.death < mother_data.birth + mother_marriage_age:
continue
# Calculate marriage year
marriage_year = max(
calculate_marriage_year(father_data.birth, "M"),
calculate_marriage_year(mother_data.birth, "F")
)
# Don't create family if marriage would be after death
if father_data.death and marriage_year > father_data.death:
continue
if mother_data.death and marriage_year > mother_data.death:
continue
# Generate parents and siblings for father if needed
next_person_id, next_family_id, _ = generate_parents_and_siblings(
father_data, person_data_map, all_people, all_events, all_media, all_families,
family_data_map, next_person_id, next_family_id, generation
)
# Generate parents and siblings for mother if needed
next_person_id, next_family_id, _ = generate_parents_and_siblings(
mother_data, person_data_map, all_people, all_events, all_media, all_families,
family_data_map, next_person_id, next_family_id, generation
)
# Create family
family_id = next_family_id
next_family_id += 1
family_handle = gen_handle("FAMILY", family_id)
# Generate children (6-12 per family)
num_children = random.randint(MIN_CHILDREN_PER_FAMILY, MAX_CHILDREN_PER_FAMILY)
children_handles: List[str] = []
for child_idx in range(num_children):
child_id = next_person_id
next_person_id += 1
gender = "M" if random.random() < 0.5 else "F" # Random gender
first_name = random.choice(MALE_NAMES if gender == "M" else FEMALE_NAMES)
# Children take father's surname
child_surname = father_data.surname
# Children born 1-25 years after marriage
birth_year = marriage_year + random.randint(1, 25)
# Don't create child if born after parent's death
if father_data.death and birth_year > father_data.death:
continue
if mother_data.death and birth_year > mother_data.death:
continue
death_year = calculate_death_year(birth_year, generation)
child_handle = gen_handle("PERSON", child_id)
child_person, child_birth, child_death, child_additional, _, child_portrait = gen_person(
child_id, first_name, child_surname, birth_year, death_year, gender,
childof_families=[family_handle], generation=generation
)
all_people.append(child_person)
all_events.append(child_birth)
if child_death is not None:
all_events.append(child_death)
all_events.extend(child_additional)
if child_portrait:
media_handle, file_path = child_portrait
media_elem = create_media_element(media_handle, file_path, f"Portrait of {first_name} {child_surname}", child_id)
all_media.append(media_elem)
children_handles.append(child_handle)
child_data = PersonData(
handle=child_handle,
name=first_name,
surname=child_surname,
birth=birth_year,
death=death_year,
gender=gender,
parentin=[],
childof=[family_handle],
generation=generation
)
person_data_map[child_id] = child_data
next_generation_people.append(child_data)
# Only create family if there are children
if children_handles:
# Create family element
family_elem, marriage_event = gen_family(family_id, father_data.handle, mother_data.handle, marriage_year, children_handles, generation)
all_families.append(family_elem)
all_events.append(marriage_event)
# Update parent data
if family_handle not in father_data.parentin:
father_data.parentin.append(family_handle)
if family_handle not in mother_data.parentin:
mother_data.parentin.append(family_handle)
family_data_map[family_id] = FamilyData(
handle=family_handle,
father_handle=father_data.handle,
mother_handle=mother_data.handle,
children_handles=children_handles,
marriage_year=marriage_year,
marriage_handle=gen_handle("EVENT", family_id * FAMILY_ID_OFFSET),
family_id=family_id,
generation=generation
)
current_generation_people = next_generation_people
# Regenerate person elements with updated family references
print("Updating family references...")
# Rebuild events and people lists from person_data_map
all_people.clear()
all_events.clear()
all_media.clear()
# Regenerate all people with correct family references
for person_id, person_data in person_data_map.items():
person_elem, birth_event, death_event, additional_events, _, portrait_info = gen_person(
person_id, person_data.name, person_data.surname, person_data.birth, person_data.death, person_data.gender,
parentin_families=person_data.parentin if person_data.parentin else None,
childof_families=person_data.childof if person_data.childof else None,
generation=person_data.generation
)
all_people.append(person_elem)
# Add events
all_events.append(birth_event)
if death_event is not None:
all_events.append(death_event)
all_events.extend(additional_events)
# Add media
if portrait_info:
media_handle, file_path = portrait_info
media_elem = create_media_element(media_handle, file_path, f"Portrait of {person_data.name} {person_data.surname}", person_id)
all_media.append(media_elem)
# Add marriage events from families
for family_id, family_data in family_data_map.items():
marriage_handle = family_data.marriage_handle
marriage_event_data = EventData(
handle=marriage_handle,
event_type="Marriage",
year=family_data.marriage_year,
month=random.randint(MIN_MONTH, MAX_MONTH),
day=random.randint(MIN_DAY, MAX_DAY),
description="Marriage",
event_id=family_id * FAMILY_ID_OFFSET
)
marriage_event = create_event_element(marriage_event_data)
all_events.append(marriage_event)
# Create XML document
print("Creating Gramps XML document...")
tree = create_gramps_xml_document(all_events, all_people, all_families, all_media)
# Write to file
output_file = "medieval_german_family.gramps"
tree.write(output_file, encoding='utf-8', xml_declaration=True)
print(f"\nGenerated {output_file} with:")
print(f" - {len(all_people)} people")
print(f" - {len(all_families)} families")
print(f" - {len(all_events)} events")
print(f" - {len(all_media)} media objects")
print(f" - {NUM_GENERATIONS} generations")
print(f" - Time period: {GENERATION_START_YEAR}-{GENERATION_END_YEAR} AD")
if __name__ == "__main__":
main()