moldybreadpkg/fedora

Types

FedoraRequest = ref object
  base_url*: string
  results*: seq[string]
  client: HttpClient
  max_results*: int
  output_directory*: string
  dc_values: string
  pid_part: string
  terms: string
Type to Handle Fedora requests   Source Edit
Message = ref object
  errors*: seq[string]
  successes*: seq[string]
  attempts*: int
Type to handle messaging   Source Edit

Procs

proc initFedoraRequest(url: string = "http://localhost:8080";
                       auth = ("fedoraAdmin", "fedoraAdmin");
                       output_directory, dc_values, terms, pid_part = "";
                       max_results = 100): FedoraRequest {...}{.raises: [], tags: [].}

Initializes new Fedora Request.

Example with namespace / pid_part:

let fedora_connection = initFedoraRequest(pid_part="test")

Example with dc_values string:

let fedora_connection = initFedoraRequest(dc_values="title:Pencil;contributor:Wiley")
  Source Edit
proc process_versions(pids_and_versions: seq[(string, int)];
                      version_target: int; operation: string): seq[string] {...}{.
    raises: [], tags: [].}

Helper function to process pids and versions against user expectations.

Example:

var versions = @[("abc:1", 1), ("abc:2", 2), ("abc:3", 1)]
assert process_versions(versions, 2, "==") == @["abc:2"]
  Source Edit

Methods

method populate_results(this: FedoraRequest): seq[string] {...}{.base, raises: [
    ValueError, IOError, IOError, ValueError, RegexError, HttpRequestError,
    Exception, XmlError, SslError, TimeoutError, ProtocolError, KeyError],
    tags: [WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Populates results for a Fedora request.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
echo fedora_connection.populate_results()
  Source Edit
method harvest_datastream(this: FedoraRequest; datastream_id = "MODS"): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, InvalidPositionError], tags: [ReadEnvEffect,
    WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect, ReadDirEffect,
    WriteDirEffect].}

Populates results for a Fedora request.

Examples:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.harvest_datastream("DC")
  Source Edit
method download_page_with_book_relationship(this: FedoraRequest;
    datastream_id: string): seq[(string, string, string)] {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, InvalidPositionError], tags: [
    ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect,
    ReadDirEffect, WriteDirEffect].}

Downloads parts of pages of a book and names it based on the relationship.

Example:

let fedora_connection = initFedoraRequest(pid_part="test", output_directory="output")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.download_page_with_book_relationship("OBJ")
  Source Edit
method harvest_datastream_no_pages(this: FedoraRequest; datastream_id = "MODS"): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, InvalidPositionError], tags: [ReadEnvEffect,
    WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect, ReadDirEffect,
    WriteDirEffect].}

Harvests metadata for matching objects unless its content model is a page.

This method requires a datastream_id and downloads the metadata record if the object does not have an isMemberOf relationship.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.harvest_datastream_no_pages("DC")
  Source Edit
method update_metadata(this: FedoraRequest; datastream_id, directory: string;
                       gsearch_auth: (string, string); clean_up = false): Message {...}{.
    base, raises: [OSError, ValueError, IOError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, XmlError, RegexError, InvalidPositionError], tags: [
    ReadDirEffect, ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect,
    TimeEffect].}

Updates metadata records based on files in a directory.

This method requires a datastream_id and a directory (use full paths for now). Files must follow the same naming convention as their PIDs and end with a .xml extension (i.e test:1.xml).

Examples:

let fedora_connection = initFedoraRequest(pid_part="test")
discard fedora_connection.update_metadata("MODS", "/home/mark/nim_projects/moldybread/experiment")
  Source Edit
method download_foxml(this: FedoraRequest): Message {...}{.base, raises: [ValueError,
    IOError, OSError, HttpRequestError, Exception, LibraryError, SslError,
    TimeoutError, ProtocolError, KeyError, InvalidPositionError], tags: [
    ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect,
    ReadDirEffect, WriteDirEffect].}

Downloads the FOXML record for each object in a results set.

This method downloads the foxml record for all matching objects.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/harrison/nim_projects/moldybread/output", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.download_foxml().successes
  Source Edit
method download_object_xml(this: FedoraRequest): Message {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, InvalidPositionError], tags: [
    ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect,
    ReadDirEffect, WriteDirEffect].}

Downloads the object XML for each object in a results set.

This method downloads the object xml record for all matching objects.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/harrison/nim_projects/moldybread/output", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.download_object_xml().successes
  Source Edit
method audit_responsibility(this: FedoraRequest; username: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, XmlError, RegexError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Looks for objects created or modified by a specific user.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.audit_responsibility("fedoraAdmin").successes
  Source Edit
method version_datastream(this: FedoraRequest; dsid: string; versionable: bool): Message {...}{.
    base, raises: [ValueError, IOError, OSError, KeyError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Makes a datastream versioned or not versioned.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
doAssert(typeOf(fedora_connection.version_datastream("MODS", false)) == Message)
  Source Edit
method change_object_state(this: FedoraRequest; state: string): Message {...}{.base, raises: [
    ValueError, IOError, OSError, KeyError, Exception, LibraryError, SslError,
    TimeoutError, ProtocolError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Change the state of a datastream for a results set.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
doAssert(typeOf(fedora_connection.change_object_state("I")) == Message)
  Source Edit
method purge_old_versions_of_datastream(this: FedoraRequest; dsid: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, XmlError, RegexError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Purges all but the latest version of a datastream.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
doAssert(typeOf(fedora_connection.purge_old_versions_of_datastream("MODS")) == Message)
  Source Edit
method find_objects_missing_datastream(this: FedoraRequest; dsid: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Lists the objects missing a specific datastream as a error.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.find_objects_missing_datastream("RELS-INT").errors
  Source Edit
method get_datastreams(this: FedoraRequest; profiles = true;
                       as_of_date = getTime()): seq[(string, seq[TaintedString])] {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, XmlError, RegexError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Returns a sequence of tuples with the pid and a sequence of datastreams that belong to it.

Optionally, you can specify whether you want an entire datastream profile returned (defaults to true) or just the datastream id and a date for which you want to base the query on (defaults to now). Use yyyy-MM-dd or yyyy-MM-ddTHH:mm:ssZ.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.get_datastreams(profiles=true)
  Source Edit
method get_datastream_history(this: FedoraRequest; dsid: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, InvalidPositionError], tags: [ReadEnvEffect,
    WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect, ReadDirEffect,
    WriteDirEffect].}

Serializes the history of a datastream for a results set to disk.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.get_datastream_history("MODS").successes
  Source Edit
method get_datastream_at_date(this: FedoraRequest; dsid: string; date: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, InvalidPositionError], tags: [ReadEnvEffect,
    WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect, ReadDirEffect,
    WriteDirEffect].}

Downloads the specified datastream at a specific date for all items in a result set.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.get_datastream_at_date("DC", "2019-12-25")
  Source Edit
method download_all_versions_of_datastream(this: FedoraRequest; dsid: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, HttpRequestError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   KeyError, XmlError, RegexError, InvalidPositionError], tags: [
    ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect,
    ReadDirEffect, WriteDirEffect].}

Downloads all versions of a specific datastream and names it as pid-datetime.extension (test:223-2020-01-07T17:25:32.085Z.xml).

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.download_all_versions_of_datastream("MODS").successes
  Source Edit
method validate_checksums(this: FedoraRequest; dsid: string): Message {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, XmlError, RegexError,
    InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Checks if the current checksum of datastreams in a result set matches the checksum of the same datastream on ingest.

If so, the check is considered a success. If not, the check is an error. If a datastream is not found for an object, niether a success or error is registered.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.validate_checksums("MODS").successes
  Source Edit
method validate_checksums(this: FedoraRequest): Message {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, XmlError, RegexError,
    InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Checks if the current checksum of all datastreams belonging to a particular object matches the checksum of the datastream when it was ingested.

If the validation is confirmed, the pid and datastream are appended to the successes sequence of the result Message. If the validation fails, the pid and datastream are appended to the errors sequence of the result Message.

NOTE: By design, this method only checks the current version of the datastream and ignores previous versions.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.validate_checksums().errors
  Source Edit
method find_distinct_datastreams(this: FedoraRequest): seq[string] {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, XmlError, RegexError,
    InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Filters distinct datastreams from all objects in a result set.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.find_distinct_datastreams()
  Source Edit
method get_content_models(this: FedoraRequest): seq[(string, string)] {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, XmlError, RegexError,
    InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Returns a sequence of tuples with pids with a human readable version of its content model.

Example:

let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
echo fedora_connection.get_content_models()
  Source Edit
method update_solr_with_gsearch(this: FedoraRequest;
                                gsearch_auth: (string, string)): Message {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Updates solr records for objects with gsearch.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.update_solr_with_gsearch()
  Source Edit
method count_versions_of_datastream(this: FedoraRequest; dsid: string): seq[
    (string, int)] {...}{.base, raises: [ValueError, IOError, OSError,
                                    HttpRequestError, Exception, LibraryError,
                                    SslError, TimeoutError, ProtocolError,
                                    KeyError, XmlError, RegexError,
                                    InvalidPositionError], tags: [ReadEnvEffect,
    WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Returns pids with the total number of versions a specified datastream has

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.count_versions_of_datastream("MODS")
  Source Edit
method find_xacml_restrictions(this: FedoraRequest): seq[
    (string, seq[XACMLRule])] {...}{.base, raises: [ValueError, IOError, OSError,
    HttpRequestError, Exception, LibraryError, SslError, TimeoutError,
    ProtocolError, KeyError, XmlError, RegexError, InvalidPositionError], tags: [
    ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Returns all XACML rules for objects in a set.

Example:

let fedora_connection = initFedoraRequest(pid_part="test")
fedora_connection = fedora_connection.populate_results()
discard fedora_connection.find_xacml_restrictions()
  Source Edit
method purge_xacml_inheritance_relationships(this: FedoraRequest;
    inheritance_object: string): Message {...}{.base, raises: [ValueError, IOError,
    OSError, KeyError, Exception, LibraryError, SslError, TimeoutError,
    ProtocolError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Returns a sequence of tuples with the object and whether or not its xacml inheritance relationship was deleted.

Example:

let fedora_connection = initFedoraRequest(pid_part="test", output_directory="output")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.purge_xacml_inheritance_relationships("islandora:test")
  Source Edit
method change_model_to_binary(this: FedoraRequest; old_model: string): Message {...}{.
    base, raises: [ValueError, IOError, OSError, KeyError, Exception,
                   LibraryError, SslError, TimeoutError, ProtocolError,
                   HttpRequestError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Changes objects in a set from its old content model to the binary content model.

Example:

let fedora_connection = initFedoraRequest(pid_part="test", output_directory="output")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.change_content_models("sp%5fbasic%5fimage").successes
  Source Edit
method add_new_relationship(this: FedoraRequest; predicate: string; obj: string;
                            is_literal: bool): Message {...}{.base, raises: [
    ValueError, IOError, OSError, HttpRequestError, Exception, LibraryError,
    SslError, TimeoutError, ProtocolError, KeyError, InvalidPositionError],
    tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, RootEffect, TimeEffect].}

Add a new relationship with the digital object as subject for all objects in a set.

Example:

let fedora_connection = initFedoraRequest(pid_part="test", output_directory="output")
fedora_connection.results = fedora_connection.populate_results()
discard fedora_connection.add_new_relationship(predicate="info%3afedora%2ffedora%2dsystem%3adef%2fmodel%23hasModel", obj="info%3afedora%2fislandora%3abinaryObjectCModel", is_literal=false)
  Source Edit