comicbox.transforms.pdf

[docs] package comicbox.transforms.pdf

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""Transform PDF formats to comicbox and back."""

from bidict import frozenbidict

from comicbox.schemas.comicbox import (
    GENRES_KEY,
    SCAN_INFO_KEY,
    TAGGER_KEY,
    TAGS_KEY,
    TITLE_KEY,
    UPDATED_AT_KEY,
)
from comicbox.schemas.pdf import MuPDFSchema, PDFXmlSchema
from comicbox.transforms.base import BaseTransform
from comicbox.transforms.comicbox.name_objs import (
    name_obj_from_cb,
    name_obj_to_cb,
)
from comicbox.transforms.pdf.credits import (
    authors_to_credits_transform_from_cb,
    authors_to_credits_transform_to_cb,
)
from comicbox.transforms.spec import (
    MetaSpec,
    create_specs_from_comicbox,
    create_specs_to_comicbox,
)

XMLPDF_SIMPLE_KEY_MAP = frozenbidict(
    {
        "pdf:Creator": SCAN_INFO_KEY,  # original document creator
        "pdf:Producer": TAGGER_KEY,
        "pdf:ModDate": UPDATED_AT_KEY,
        "pdf:Title": TITLE_KEY,
    }
)
XMLPDF_NAME_OBJ_KEY_MAP = frozenbidict(
    {
        "pdf:Keywords": TAGS_KEY,
        "pdf:Subject": GENRES_KEY,
    },
)


class PDFXmlTransform(BaseTransform):
    """PDF Schema."""

    SCHEMA_CLASS = PDFXmlSchema
    SPECS_TO = create_specs_to_comicbox(
        MetaSpec(key_map=XMLPDF_SIMPLE_KEY_MAP.inverse),
        authors_to_credits_transform_to_cb("pdf:Author"),
        name_obj_to_cb(XMLPDF_NAME_OBJ_KEY_MAP.inverse),
        format_root_keypath=PDFXmlSchema.ROOT_KEYPATH,
    )
    SPECS_FROM = create_specs_from_comicbox(
        MetaSpec(key_map=XMLPDF_SIMPLE_KEY_MAP),
        authors_to_credits_transform_from_cb("pdf:Author"),
        name_obj_from_cb(XMLPDF_NAME_OBJ_KEY_MAP),
        format_root_keypath=PDFXmlSchema.ROOT_KEYPATH,
    )


MUPDF_SIMPLE_KEY_MAP = frozenbidict(
    {
        "creator": SCAN_INFO_KEY,  # original document creator
        "modDate": UPDATED_AT_KEY,
        "producer": TAGGER_KEY,
        "title": TITLE_KEY,
    }
)
MUPDF_NAME_OBJ_KEY_MAP = frozenbidict(
    {
        "keywords": TAGS_KEY,
        "subject": GENRES_KEY,
    },
)


class MuPDFTransform(PDFXmlTransform):
    """MuPDF Transformer."""

    SCHEMA_CLASS = MuPDFSchema
    SPECS_TO = create_specs_to_comicbox(
        MetaSpec(key_map=MUPDF_SIMPLE_KEY_MAP.inverse),
        authors_to_credits_transform_to_cb("author"),
        name_obj_to_cb(MUPDF_NAME_OBJ_KEY_MAP.inverse),
        format_root_keypath=MuPDFSchema.ROOT_KEYPATH,
    )
    SPECS_FROM = create_specs_from_comicbox(
        MetaSpec(key_map=MUPDF_SIMPLE_KEY_MAP),
        authors_to_credits_transform_from_cb("author"),
        name_obj_from_cb(MUPDF_NAME_OBJ_KEY_MAP),
        format_root_keypath=MuPDFSchema.ROOT_KEYPATH,
    )