UNIHAN - cihai.data.unihan#

Bootstrapping#

Fetch + extract + transform + load UNIHAN dataset to Cihai.

cihai.data.unihan.bootstrap.bootstrap_unihan(engine, metadata, options=None)[source]#

UNIHAN bootstrap script (download from web, import to database).

Return type:

None

cihai.data.unihan.bootstrap.is_bootstrapped(metadata)[source]#

Return True if cihai is correctly bootstrapped.

Return type:

bool

cihai.data.unihan.bootstrap.create_unihan_table(columns, metadata)[source]#

Create table and return sqlalchemy.sql.schema.Table.

Return type:

Table

Parameters:
Returns:

Newly created table with columns and index.

Return type:

sqlalchemy.schema.Table

class cihai.data.unihan.dataset.Unihan[source]#

Bases: Dataset, SQLAlchemyMixin

UNIHAN Dataset for cihai.

char: str#
kDefinition: str#
kTraditionhalVariant: str#
kSimplifiedVariant: str#
tagged_vars: Callable[[str], ParsedVars]#
untagged_vars: Callable[[str], UntaggedVars]#
bootstrap(options=None)[source]#

Fetch, extract, import UNIHAN to DB, and initialize DB mapping.

Return type:

None

lookup_char(char)[source]#

Return character information from datasets.

Return type:

Query[Unihan]

Parameters:

char (str) – character / string to lookup

Returns:

list of matches

Return type:

sqlalchemy.orm.query.Query

reverse_char(hints)[source]#

Return QuerySet of objects from SQLAlchemy of results.

Return type:

Query[Unihan]

Parameters:

hints (list of str) – strings to lookup

Returns:

reverse matches

Return type:

sqlalchemy.orm.query.Query

with_fields(fields)[source]#

Return list of characters with information for certain fields.

Return type:

Query[Unihan]

Parameters:

*fields (list of str) – fields for which information should be available

Returns:

list of matches

Return type:

sqlalchemy.orm.query.Query

property is_bootstrapped: bool#

Return True if UNIHAN and database is set up.

Returns:

True if Unihan application fixture data installed.

Return type:

bool

add_plugin(_cls, namespace, bootstrap=True)[source]#

Add plugin for Cihai dataset.

Return type:

None

sql: Database#
engine: Engine#

sqlalchemy.engine.Engine instance.

metadata: MetaData#

sqlalchemy.schema.MetaData instance.

session: Session#

sqlalchemy.orm.session.Session instance.

base: AutomapBase#

sqlalchemy.ext.automap.AutomapBase instance.

Constants for UNIHAN cihai dataset.

cihai.data.unihan.constants.UNIHAN_FILES = ['Unihan_DictionaryLikeData.txt', 'Unihan_IRGSources.txt', 'Unihan_NumericValues.txt', 'Unihan_RadicalStrokeCounts.txt', 'Unihan_Readings.txt', 'Unihan_Variants.txt']#

Mapping of files from unihan-etl (UNIHAN database)

cihai.data.unihan.constants.UNIHAN_FIELDS: List[str] = ['kAccountingNumeric', 'kCangjie', 'kCantonese', 'kCheungBauer', 'kCihaiT', 'kCompatibilityVariant', 'kDefinition', 'kFenn', 'kFourCornerCode', 'kFrequency', 'kGradeLevel', 'kHDZRadBreak', 'kHKGlyph', 'kHangul', 'kHanyuPinlu', 'kHanyuPinyin', 'kJapaneseKun', 'kJapaneseOn', 'kKorean', 'kMandarin', 'kOtherNumeric', 'kPhonetic', 'kPrimaryNumeric', 'kRSAdobe_Japan1_6', 'kRSUnicode', 'kSemanticVariant', 'kSimplifiedVariant', 'kSpecializedSemanticVariant', 'kTang', 'kTotalStrokes', 'kTraditionalVariant', 'kVietnamese', 'kXHC1983', 'kZVariant']#

Mapping of field names from unihan-etl (UNIHAN database)

cihai.data.unihan.constants.UNIHAN_ETL_DEFAULT_OPTIONS = {'expand': False, 'fields': ['kAccountingNumeric', 'kCangjie', 'kCantonese', 'kCheungBauer', 'kCihaiT', 'kCompatibilityVariant', 'kDefinition', 'kFenn', 'kFourCornerCode', 'kFrequency', 'kGradeLevel', 'kHDZRadBreak', 'kHKGlyph', 'kHangul', 'kHanyuPinlu', 'kHanyuPinyin', 'kJapaneseKun', 'kJapaneseOn', 'kKorean', 'kMandarin', 'kOtherNumeric', 'kPhonetic', 'kPrimaryNumeric', 'kRSAdobe_Japan1_6', 'kRSUnicode', 'kSemanticVariant', 'kSimplifiedVariant', 'kSpecializedSemanticVariant', 'kTang', 'kTotalStrokes', 'kTraditionalVariant', 'kVietnamese', 'kXHC1983', 'kZVariant'], 'format': 'python', 'input_files': ['Unihan_DictionaryLikeData.txt', 'Unihan_IRGSources.txt', 'Unihan_NumericValues.txt', 'Unihan_RadicalStrokeCounts.txt', 'Unihan_Readings.txt', 'Unihan_Variants.txt']}#

Default settings passed to unihan-etl

Variants plugin#

class cihai.data.unihan.dataset.UnihanVariants[source]#

Bases: DatasetPlugin, SQLAlchemyMixin

Support for CJK Variant lookups through UNIHAN dataset.

bootstrap()[source]#

Map custom lookup for UNIHAN variants to Unihan SQLAlchemy table.

Return type:

None

sql: Database#
engine: Engine#

sqlalchemy.engine.Engine instance.

metadata: MetaData#

sqlalchemy.schema.MetaData instance.

session: Session#

sqlalchemy.orm.session.Session instance.

base: AutomapBase#

sqlalchemy.ext.automap.AutomapBase instance.