Source code for gws.ext.helper.alkis.data.adresse

import collections
import re

import gws
from gws.tools.console import ProgressIndicator

from . import resolver
from ..util import indexer
from ..util.connection import AlkisConnection

addr_index = 'idx_adresse'
gebs_index = 'idx_gebaeude'

MIN_GEBAEUDE_AREA = 0.5


[docs]def normalize_hausnummer(hn): # "12 a" -> "12a" return re.sub(r'\s+', '', hn.strip())
[docs]def int_hausnummer(hn): m = re.match(r'^(\d+)', str(hn or '').strip()) if not m: return None return int(m.group(1))
[docs]def street_name_key(s): s = s.strip().lower() s = s.replace(u'ä', 'ae') s = s.replace(u'ë', 'ee') s = s.replace(u'ö', 'oe') s = s.replace(u'ü', 'ue') s = s.replace(u'ß', 'ss') s = re.sub(r'\W+', ' ', s) # s = re.sub(r'(?<=\d)\s+', '', s) # s = re.sub(r'\s+(?=\d)', '', s) s = re.sub(r'\s?str\.$', '.strasse', s) s = re.sub(r'\s?pl\.$', '.platz', s) s = re.sub(r'\s?(strasse|allee|damm|gasse|pfad|platz|ring|steig|wall|weg|zeile)$', r'.\1', s) s = s.replace(' ', '.') return s
_lage_tables = ( 'ax_lagebezeichnungohnehausnummer', 'ax_lagebezeichnungmithausnummer', 'ax_lagebezeichnungmitpseudonummer') def _place_key(r): # schluesselgesamt should be equal to land+regierungsbezirk+kreis+gemeinde+lage # but sometimes it is not... so lets use our own key return r['land'], r['regierungsbezirk'], r['kreis'], r['gemeinde'], r['lage'] def _collect_gebs(conn): rs = conn.select_from_ax('ax_gebaeude', [ 'gml_id', 'gebaeudefunktion', 'weiteregebaeudefunktion', 'name', 'bauweise', 'anzahlderoberirdischengeschosse', 'anzahlderunterirdischengeschosse', 'hochhaus', 'objekthoehe', 'dachform', 'zustand', 'geschossflaeche', 'grundflaeche', 'umbauterraum', 'lagezurerdoberflaeche', 'dachart', 'dachgeschossausbau', 'description', 'art', 'individualname', 'baujahr', 'wkb_geometry' ]) gebs = [] for r in rs: r = gws.compact(r) r.update(resolver.attributes(conn, 'ax_gebaeude', r)) gebs.append({ 'gml_id': r.pop('gml_id'), 'geom': r.pop('wkb_geometry'), 'attributes': indexer.as_json(r) }) return gebs def _create_gebs_index(conn: AlkisConnection): fsx_temp = '_temp_geb_fsx' geb_temp = '_temp_geb_geb' dat = conn.data_schema idx = conn.index_schema gws.log.info('gebaeude: copying') conn.create_index_table(fsx_temp, f''' id SERIAL PRIMARY KEY, gml_id CHARACTER VARYING, isvalid BOOLEAN, geom geometry(GEOMETRY, {conn.srid}) ''') sql = conn.make_select_from_ax('ax_flurstueck', ['gml_id', 'wkb_geometry']) conn.exec(f'INSERT INTO {idx}.{fsx_temp} (gml_id, geom) {sql}') gebs = _collect_gebs(conn) conn.create_index_table(geb_temp, f''' id SERIAL PRIMARY KEY, gml_id CHARACTER VARYING, attributes CHARACTER VARYING, isvalid BOOLEAN, geom geometry(GEOMETRY, {conn.srid}) ''') conn.index_insert(geb_temp, gebs) conn.create_index_index(fsx_temp, 'geom', 'gist') conn.create_index_index(geb_temp, 'geom', 'gist') gws.log.info('gebaeude: validating') conn.validate_index_geoms(geb_temp) conn.validate_index_geoms(fsx_temp) cnt = conn.count(f'{idx}.{geb_temp}') step = 1000 conn.create_index_table(gebs_index, f''' id SERIAL PRIMARY KEY, gml_id CHARACTER VARYING, fs_id CHARACTER VARYING, attributes CHARACTER VARYING, area FLOAT, fs_geom geometry(GEOMETRY, {conn.srid}), gb_geom geometry(GEOMETRY, {conn.srid}) ''') with ProgressIndicator('gebaeude: search', cnt) as pi: for n in range(0, cnt, step): n1 = n + step conn.exec(f''' INSERT INTO {idx}.{gebs_index} (gml_id, fs_id, attributes, fs_geom, gb_geom) SELECT gb.gml_id, fs.gml_id, gb.attributes, fs.geom, gb.geom FROM {idx}.{geb_temp} AS gb, {idx}.{fsx_temp} AS fs WHERE {n} < gb.id AND gb.id <= {n1} AND ST_Intersects(gb.geom, fs.geom) ''') pi.update(step) cnt = conn.count(f'{idx}.{gebs_index}') step = 1000 with ProgressIndicator('gebaeude: areas', cnt) as pi: for n in range(0, cnt, step): n1 = n + step conn.exec(f''' UPDATE {idx}.{gebs_index} SET area = ST_Area(ST_Intersection(fs_geom, gb_geom)) WHERE {n} < id AND id <= {n1} ''') pi.update(step) gws.log.info('gebaeude: cleaning up') conn.exec(f'DELETE FROM {idx}.{gebs_index} WHERE area < %s', [MIN_GEBAEUDE_AREA]) conn.exec(f'DROP TABLE {idx}.{fsx_temp} CASCADE') conn.exec(f'DROP TABLE {idx}.{geb_temp} CASCADE') conn.mark_index_table(gebs_index) def _create_addr_index(conn: AlkisConnection): dat = conn.data_schema idx = conn.index_schema gws.log.info('adresse: reading') rs = conn.select_from_ax('ax_lagebezeichnungkatalogeintrag') lage_catalog = {} for r in rs: lage_catalog[_place_key(r)] = [r['gml_id'], r['schluesselgesamt'], r['bezeichnung']] lage = {} gws.log.info('adresse: collecting') for tab in _lage_tables: rs = conn.select_from_ax(tab) for r in rs: if r['unverschluesselt']: r['lage_id'] = '' r['lage_schluesselgesamt'] = '' r['strasse'] = r['unverschluesselt'] else: lg = lage_catalog.get(_place_key(r)) if lg: r['lage_id'] = lg[0] r['lage_schluesselgesamt'] = lg[1] r['strasse'] = lg[2] if 'strasse' not in r or r['strasse'] == 'ohne Lage': continue for hnr in 'hausnummer', 'pseudonummer', 'laufendenummer': if r.get(hnr): r['hausnummer'] = normalize_hausnummer(r[hnr]) r['hausnummer_type'] = hnr break if not r.get('hausnummer'): r['hausnummer'] = '' r['hausnummer_type'] = '' lage[r['gml_id']] = r rs = conn.select_from_ax('ax_flurstueck', [ 'gml_id', 'weistauf', 'zeigtauf', 'land', 'gemarkungsnummer', 'gemeinde', 'regierungsbezirk', 'kreis', 'ST_X(ST_Centroid(wkb_geometry)) AS x', 'ST_Y(ST_Centroid(wkb_geometry)) AS y' ]) total = conn.count(f'{dat}.ax_flurstueck') with ProgressIndicator('adresse: index', total) as pi: for fs in rs: fs_id = fs.pop('gml_id') for lage_id in (fs['zeigtauf'] or []) + (fs['weistauf'] or []): if lage_id not in lage: continue la = lage[lage_id] if 'fs_ids' not in la: la['fs_ids'] = [] la['fs_ids'].append(fs_id) if not la.get('gemarkungsnummer'): la.update(fs) la['x'] = fs['x'] la['y'] = fs['y'] la['xysrc'] = 'fs' pi.update(1) gws.log.info('adresse: coordinates') rs = conn.select_from_ax( 'ap_pto', [ 'dientzurdarstellungvon', 'ST_X(ST_Centroid(wkb_geometry)) AS x', 'ST_Y(ST_Centroid(wkb_geometry)) AS y' ], conditions={ 'art': "?? ='HNR'", 'endet': "?? IS NULL" } ) for r in rs: for lage_id in (r['dientzurdarstellungvon'] or []): if lage_id in lage: lage[lage_id]['x'] = r['x'] lage[lage_id]['y'] = r['y'] lage[lage_id]['xysrc'] = 'pto' gws.log.info('adresse: normalize gemarkung') gg = collections.defaultdict(set) for la in lage.values(): la.update(resolver.places(conn, la)) if 'gemarkung' in la: gg[la['gemarkung']].add(la['gemeinde']) gu = {} for gemarkung, gemeinde_list in gg.items(): if len(gemeinde_list) < 2: continue for gemeinde in gemeinde_list: gu[gemarkung, gemeinde] = '%s (%s)' % (gemarkung, gemeinde.replace('Stadt ', '')) for la in lage.values(): if 'gemarkung' in la: k = la['gemarkung'], la['gemeinde'] if k in gu: la['gemarkung_v'] = gu[k] else: la['gemarkung_v'] = la['gemarkung'] gws.log.info('adresse: normalize strasse') for la in lage.values(): if 'strasse' in la: la['strasse'] = re.sub(r'\s+', ' ', la['strasse'].strip()) la['strasse_k'] = street_name_key(la['strasse']) la['hausnummer_n'] = int_hausnummer(la.get('hausnummer')) la_buf = [] for la in lage.values(): if 'fs_ids' in la: for fs_id in la['fs_ids']: d = dict(la) d['fs_id'] = fs_id la_buf.append(d) gws.log.info(f'adresse: writing ({len(la_buf)})') conn.create_index_table(addr_index, f''' gml_id CHARACTER(16) NOT NULL, fs_id CHARACTER(16), land CHARACTER VARYING, land_id CHARACTER VARYING, regierungsbezirk CHARACTER VARYING, regierungsbezirk_id CHARACTER VARYING, kreis CHARACTER VARYING, kreis_id CHARACTER VARYING, gemeinde CHARACTER VARYING, gemeinde_id CHARACTER VARYING, gemarkung CHARACTER VARYING, gemarkung_v CHARACTER VARYING, gemarkung_id CHARACTER VARYING, strasse CHARACTER VARYING, strasse_k CHARACTER VARYING, hausnummer CHARACTER VARYING, hausnummer_n INTEGER, lage_id CHARACTER(16), lage_schluesselgesamt CHARACTER VARYING, x FLOAT, y FLOAT, xysrc CHARACTER VARYING ''') conn.index_insert(addr_index, la_buf) conn.mark_index_table(addr_index)
[docs]def create_index(conn: AlkisConnection): if not indexer.check_version(conn, gebs_index): _create_gebs_index(conn) if not indexer.check_version(conn, addr_index): _create_addr_index(conn)
[docs]def index_ok(conn: AlkisConnection): return indexer.check_version(conn, gebs_index) and indexer.check_version(conn, addr_index)
_DEFAULT_LIMIT = 100
[docs]def find(conn: AlkisConnection, query): where = [] parms = [] for k, v in query.items(): if k in ('land', 'regierungsbezirk', 'kreis', 'gemeinde', 'gemarkung'): where.append('AD.' + k + ' = %s') parms.append(v) elif k in ('landUid', 'regierungsbezirkUid', 'kreisUid', 'gemeindeUid', 'gemarkungUid'): where.append('AD.' + (k.replace('Uid', '_id')) + ' = %s') parms.append(v) elif k == 'strasse': where.append('AD.strasse_k = %s') parms.append(street_name_key(v)) hnr = query.get('hausnummer') if hnr == '*': where.append('AD.hausnummer IS NOT NULL') elif hnr: where.append('AD.hausnummer = %s') parms.append(normalize_hausnummer(hnr)) elif query.get('hausnummerNotNull'): where.append('AD.hausnummer IS NOT NULL') where = ('WHERE ' + ' AND '.join(where)) if where else '' limit = 'LIMIT %d' % (query.get('limit', _DEFAULT_LIMIT)) tables = f'{conn.index_schema}.{addr_index} AS AD' count_sql = f'SELECT COUNT(DISTINCT AD.*) FROM {tables} {where}' count = conn.select_value(count_sql, parms) data_sql = f'SELECT DISTINCT AD.* FROM {tables} {where} {limit}' gws.log.debug(f'sql={data_sql!r} parms={parms!r}') data = conn.select(data_sql, parms) return count, list(data)