import mime from "mime";

import EXIF from "exif-js";
import { pdfjs } from "react-pdf";
import OFFICEPROPS from "officeprops";

const extractMetadata = async (file) => {
  const mimeType = mime.getType(file.name) ?? "";

  let metadata;
  if (Object.values(OFFICEPROPS.mimeTypes).includes(mimeType)) {
    metadata = await extractOffice(file);
  } else {
    switch (mimeType) {
      case "image/jpeg":
        metadata = await extractExif(file);
        break;
      case "application/pdf":
        metadata = await extractPdf(file);
        break;
      default:
        metadata = {};
    }
  }

  metadata = { ...metadata, format: mimeType };
  const defaultMetadata = {
    title: "",
    description: "",
    format: "",
    source: "",
    language: "",
    rights: "",
    subjects: [],
    dates: [],
    types: [],
    coverages: [],
    creators: [],
    publishers: [],
    contributors: [],
  };

  return { ...defaultMetadata, ...metadata };
};

const extractExif = async (file) => {
  const metadata = await new Promise((resolve, _reject) => {
    EXIF.getData(file, () => {
      const exifdata = file.exifdata;

      resolve({
        title: exifdata.ImageDescription ?? "",
        rights: exifdata.Copyright ?? "",
      });
    });
  });
  return metadata;
};

const extractPdf = async (file) => {
  const buffer = await file.arrayBuffer();
  const byteArray = new Uint8Array(buffer);

  const doc = await pdfjs.getDocument(byteArray).promise;
  const metadata = await doc.getMetadata();
  const info = metadata.info;

  return {
    title: info.Title ?? "",
    description: info.Keywords ?? "",
    subjects: info.Subject?.split(";") ?? [],
  };
};

const extractOffice = async (file) => {
  try {
    const officeMetadata = await OFFICEPROPS.getData(file);
    const editable = officeMetadata.editable;

    return {
      title: editable.title.value,
      description: editable.description.value,
      subjects: editable.subject.value ? [editable.subject.value] : [],
    };
  } catch (e) {
    // Fails for empty Word docs
    // Swallow the exception and skip extracting metadata
    return null;
  }
};

/**
 * Returns an object like react-select uses.
 */
export const stringToSelect = (value) => ({ label: value, value });

/**
 * Strips the metadata object of all JSON control characters.
 */
const stripMetadata = (metadata) =>
  Object.fromEntries(Object.entries(metadata).map(entryToStrippedEntry));

const entryToStrippedEntry = ([key, property]) =>
  Array.isArray(property)
    ? [key, property.map(stripJsonCharacters)]
    : [key, stripJsonCharacters(property)];

const stripJsonCharacters = (string) =>
  String(string).replaceAll(/[[\]{}:\n\r"]/g, "_");

const formatMetadata = (
  fileName,
  {
    id = null,
    title = "",
    description = "",
    format = "",
    source = "",
    language = "",
    relation = "FA",
    rights = "",
    subjects = [],
    subjectIds = [],
    dates = [],
    dateIds = [],
    types = [],
    typeIds = [],
    coverages = [],
    coverageIds = [],
    creators = [],
    creatorIds = [],
    publishers = [],
    publisherIds = [],
    contributors = [],
    contributorIds = [],
  }
) => {
  const actors = formatActors(
    creators,
    creatorIds,
    publishers,
    publisherIds,
    contributors,
    contributorIds
  );

  return {
    id,
    fileName,
    dcIdentifier: "",
    dcTitle: title,
    dcDescription: description,
    dcFormat: format,
    dcSource: source,
    dcLanguage: language,
    dcRelation: relation,
    dcRights: rights,

    subjects: subjects.map((subject, index) => ({
      id: subjectIds[index] ?? null,
      dcSubject: subject,
    })),
    dates: dates.map((date, index) => ({
      id: dateIds[index] ?? null,
      dcDate: date,
    })),
    types: types.map((type, index) => ({
      id: typeIds[index] ?? null,
      dcType: type,
    })),
    coverages: coverages.map((coverage, index) => ({
      id: coverageIds[index] ?? null,
      dcCoverage: coverage,
    })),

    actors,
  };
};

const formatActors = (
  creators,
  creatorIds,
  publishers,
  publisherIds,
  contributors,
  contributorIds
) => {
  let actors = [];

  actors = actors.concat(
    creators.map((creator, index) => ({
      id: creatorIds[index] ?? null,
      actorName: creator,
      actorType: "dcCreator",
    }))
  );

  actors = actors.concat(
    publishers.map((publisher, index) => ({
      id: publisherIds[index] ?? null,
      actorName: publisher,
      actorType: "dcPublisher",
    }))
  );

  actors = actors.concat(
    contributors.map((contributor, index) => ({
      id: contributorIds[index] ?? null,
      actorName: contributor,
      actorType: "dcContributor",
    }))
  );

  return actors;
};

/**
 * Returns a metadata object from an apiMetadata object. The latter is what is
 * received from the back-end. The former is what we use in the front-end.
 *
 * Ids for metadata and its properties (like coverages) are not used in the
 * front-end, but are needed by the back-end for updating the metadata. Therefore,
 * we save them in separate properties, unused before the request is made. If a
 * metadata object has an id, that is a sure sign that it is stored in the database
 * already.
 *
 * There is probably a more elegant way to store the ids, but this way we don't have
 * to redo most of the application.
 */
const apiMetadataToMetadata = (apiMetadata) => ({
  id: apiMetadata.id,
  fileId: apiMetadata.fileId,
  title: apiMetadata.dcTitle,
  description: apiMetadata.dcDescription,
  format: apiMetadata.dcFormat,
  source: apiMetadata.dcSource,
  language: apiMetadata.dcLanguage,
  rights: apiMetadata.dcRights,
  subjects: apiMetadata.subjects.map((apiValue) => apiValue["dcSubject"]),
  subjectIds: apiMetadata.subjects.map((apiValue) => apiValue["id"]),
  dates: apiMetadata.dates.map((apiValue) => apiValue["dcDate"]),
  dateIds: apiMetadata.dates.map((apiValue) => apiValue["id"]),
  types: apiMetadata.types.map((apiValue) => apiValue["dcType"]),
  typeIds: apiMetadata.types.map((apiValue) => apiValue["id"]),
  coverages: apiMetadata.coverages.map((apiValue) => apiValue["dcCoverage"]),
  coverageIds: apiMetadata.coverages.map((apiValue) => apiValue["id"]),
  creators: apiMetadata.actors
    .filter((apiValue) => apiValue.actorType === "dcCreator")
    .map((apiValue) => apiValue["actorName"]),
  creatorIds: apiMetadata.actors
    .filter((apiValue) => apiValue.actorType === "dcCreator")
    .map((apiValue) => apiValue["id"]),
  publishers: apiMetadata.actors
    .filter((apiValue) => apiValue.actorType === "dcPublisher")
    .map((apiValue) => apiValue["actorName"]),
  publisherIds: apiMetadata.actors
    .filter((apiValue) => apiValue.actorType === "dcPublisher")
    .map((apiValue) => apiValue["id"]),
  contributors: apiMetadata.actors
    .filter((apiValue) => apiValue.actorType === "dcContributor")
    .map((apiValue) => apiValue["actorName"]),
  contributorIds: apiMetadata.actors
    .filter((apiValue) => apiValue.actorType === "dcContributor")
    .map((apiValue) => apiValue["id"]),
});

const metadataService = {
  extractMetadata,
  formatMetadata,
  stripMetadata,
  stringToSelect,
  apiMetadataToMetadata,
};
export default metadataService;
