comicbox.schemas.pdf

[docs] module comicbox.schemas.pdf

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""Mimic comicbox.Comicbox functions for PDFs."""

from types import MappingProxyType

from marshmallow.fields import Constant, Nested

from comicbox.fields.collection_fields import (
    LegacyNestedMDStringSetField,
    StringSetField,
)
from comicbox.fields.fields import StringField
from comicbox.fields.pdf import PdfDateTimeField
from comicbox.fields.xml_fields import (
    XmlLegacyNestedMDStringSetField,
    XmlPdfDateTimeField,
    XmlStringField,
    XmlStringSetField,
)
from comicbox.schemas.base import BaseSubSchema
from comicbox.schemas.json_schemas import JsonSchema, JsonSubSchema
from comicbox.schemas.xml_schemas import (
    XmlSchema,
    XmlSubHeadSchema,
    XmlSubSchema,
    create_xml_headers,
)


class MuPDFSubSchema(JsonSubSchema):
    """muPDF Sub Schema."""

    # https://pymupdf.readthedocs.io/en/latest/document.html#Document.metadata

    author = StringSetField(as_string=True)
    creator = StringField()
    keywords = LegacyNestedMDStringSetField(as_string=True)
    modDate = PdfDateTimeField()  # noqa: N815
    producer = StringField()
    subject = StringSetField(as_string=True)
    title = StringField()


class MuPDFSchema(JsonSchema):
    """muPDFSchema."""

    ROOT_TAG: str = "MuPDF"
    ROOT_KEYPATH: str = ROOT_TAG
    LEGACY_NESTED_MD_KEYPATH: str = f"{ROOT_KEYPATH}.keywords"

    MuPDF = Nested(MuPDFSubSchema)


class PDFSubSchema(BaseSubSchema):
    """PDF Data Sub Schema."""

    class Meta(BaseSubSchema.Meta):
        """Schema options."""

        include = MappingProxyType(
            {
                "@xmlns:pdf": Constant("http://ns.adobe.com/pdf/1.3/"),
                "pdf:Author": XmlStringSetField(as_string=True),
                "pdf:Creator": XmlStringField(),
                "pdf:Keywords": XmlLegacyNestedMDStringSetField(as_string=True),
                "pdf:ModDate": XmlPdfDateTimeField(),
                "pdf:Producer": XmlStringField(),
                "pdf:Subject": XmlStringSetField(as_string=True),
                "pdf:Title": XmlStringField(),
            }
        )


class PDFRDFDescriptionSchema(XmlSubSchema):
    """PDF RDF Description Schema."""

    class Meta(XmlSubSchema.Meta):
        """Schema options."""

        include = MappingProxyType(
            {
                "@xmlns:rdf": Constant("http://www.w3.org/1999/02/22-rdf-syntax-ns"),
                "rdf:Description": Nested(PDFSubSchema),
            },
        )


class PDFXMPMetaSchema(XmlSubHeadSchema):
    """PDF XMP Meta Schema."""

    class Meta(XmlSubHeadSchema.Meta):
        """Schema options."""

        NS = "x"
        NS_URI = "adobe:ns:meta/"
        XSD_URI = "http://ns.adobe.com/pdf/1.3/"

        include = MappingProxyType(
            {
                **create_xml_headers(NS, NS_URI, XSD_URI),
                "@x:xmptk": Constant(
                    "Adobe XMP Core 5.6-c140 79.160451, 2017/05/06-01:08:21"
                ),
                "rdf:RDF": Nested(PDFRDFDescriptionSchema),
            }
        )


class PDFXmlSchema(XmlSchema):
    """PDF Schema."""

    ROOT_TAG: str = "x:xmpmeta"
    ROOT_KEYPATH: str = f"{ROOT_TAG}.rdf:RDF.rdf:Description"
    LEGACY_NESTED_MD_KEYPATH: str = f"{ROOT_KEYPATH}.pdf:Keywords"

    class Meta(XmlSchema.Meta):
        """Schema options."""

        include = MappingProxyType({"x:xmpmeta": Nested(PDFXMPMetaSchema)})