Source code for tokio.connectors.hpss

"""Connect to various outputs made available by HPSS
"""

import re
import copy
import datetime
from tokio.connectors.common import SubprocessOutputDict

REX_HEADING_LINE = re.compile(r"^[= ]+$")
REX_EMPTY_LINE = re.compile(r"^\s*$")
REX_TIMEDELTA = re.compile(r"^(\d+)-(\d+):(\d+):(\d+)$")

FLOAT_KEYS = set([
    'io_gb',
    'write_gb',
    'read_gb',
    'copy_gb',
    'mig (gb)',
    'purge(gb)',
    'lock%'
])
INT_KEYS = set([
    'users',
    'ops',
    'w_ops',
    'r_ops',
    'c_ops',
    'migfiles',
    'purfiles',
    'count',
    'cleans',
    'locks',
    'mounts',
])
DELTIM_KEYS = set([
    'migtime',
    'purgetime',
    'availtime',
    'locktime',
    'mounttime',
])

REKEY_TABLES = {
    'io totals by client application': 'client',
    'io totals by client host': 'host',
    'io totals by hpss client gateway (ui) host': 'host',
#   'largest users': 'user', # degenerate users will occur if one user uses multiple client apps
    'migration purge report': 'sc',
#   'tape drive report': 'drivetyp',
}

[docs]class HpssDailyReport(SubprocessOutputDict): """Representation for the daily report that HPSS can generate """ def __init__(self, *args, **kwargs): super(HpssDailyReport, self).__init__(*args, **kwargs) self.date = None self.load()
[docs] def load_str(self, input_str): """Parse the HPSS daily report text """ lines = input_str.splitlines() num_lines = len(lines) start_line = 0 # Look for the header for the whole report to get the report date for start_line, line in enumerate(lines): if line.startswith("HPSS Report for Date"): self.date = datetime.datetime.strptime(line.split()[-1], "%Y-%m-%d") break if not self.date: raise IndexError("No report date found") # Try to find tables encoded in the remainder of the report while start_line < num_lines: parsed_table, finish_line = _parse_section(lines, start_line) if finish_line != start_line and 'records' in parsed_table: if parsed_table['system'] not in self: self.__setitem__(parsed_table['system'], {}) # convert a list of records into a dict of indices if parsed_table['title'] in REKEY_TABLES: parsed_table = _rekey_table(parsed_table, key=REKEY_TABLES[parsed_table['title']]) self[parsed_table['system']][parsed_table['title']] = parsed_table['records'] start_line += 1
[docs]def _parse_section(lines, start_line=0): """Parse a single table of the HPSS daily report Converts a table from the HPSS daily report into a dictionary. For example an example table may appear as:: Archive : IO Totals by HPSS Client Gateway (UI) Host Host Users IO_GB Ops =============== ===== ========= ======== heart 53 148740.6 27991 dtn11 5 29538.6 1694 Total 58 178279.2 29685 HPSS ACCOUNTING: 224962.6 which will return a dict of form:: { "system": "archive", "title": "io totals by hpss client gateway (ui) host", "records": { "heart": { "io_gb": "148740.6", "ops": "27991", "users": "53", }, "dtn11": { "io_gb": "29538.6", "ops": "1694", "users": "5", }, "total": { "io_gb": "178279.2", "ops": "29685", "users": "58", } ] } This function is robust to invalid data, and any lines that do not appear to be a valid table will be treated as the end of the table. Args: lines (list of str): Text of the HPSS report start_line (int): Index of ``lines`` defined such that * ``lines[start_line]`` is the table title * ``lines[start_line + 1]`` is the table heading row * ``lines[start_line + 2]`` is the line separating the table heading and the first row of data * ``lines[start_line + 3:]`` are the rows of the table Returns: tuple: Tuple of (dict, int) where * dict contains the parsed contents of the table * int is the index of the last line of the table + 1 """ results = {} # Skip any initial whitespace num_lines = len(lines) while start_line < num_lines and REX_EMPTY_LINE.match(lines[start_line]): start_line += 1 # Did we skip past the end of the input data? if start_line >= num_lines: return results, start_line # Parse table title (if available). This can pick up times (0:00:00) so do # not treat system, title as legitimate values until we also identify the # line below column headings. if ':' not in lines[start_line]: return results, start_line else: system, title = lines[start_line].split(':', 1) # Determine column delimiters separator_line = lines[start_line + 2] col_extents = _find_columns(separator_line) if len(col_extents) == 0: return results, start_line # At this point, we are reasonably confident we have found a table. # Populate results so that this function returns some indicator of # success. results['system'] = system.strip().lower() results['title'] = title.strip().lower() # Determine column headers heading_line = lines[start_line + 1] headings = [] for start_pos, str_len in col_extents: headings.append(heading_line[start_pos:start_pos + str_len].strip()) records = [] index = 0 for index, line in enumerate(lines[start_line + 3:]): # check for end of record (empty line) if REX_EMPTY_LINE.match(line): # an empty line denotes end of table break elif len(line) < (col_extents[-1][0] + col_extents[-1][1] - 1): # line is malformed; this happens for table summaries break record = {} for heading_idx, (start_pos, str_len) in enumerate(col_extents): col_name = headings[heading_idx].lower() col_val = line[start_pos:start_pos + str_len].lower().strip() if col_name in FLOAT_KEYS: record[col_name] = float(col_val) elif col_name in INT_KEYS: record[col_name] = int(col_val) elif col_name in DELTIM_KEYS: record[col_name] = col_val record[col_name + "secs"] = _hpss_timedelta_to_secs(col_val) else: record[col_name] = col_val records.append(record) if records: results['records'] = records return (results, index + 1)
[docs]def _find_columns(line, sep="=", gap=' ', strict=False): """Determine the column start/end positions for a header line separator Takes a line separator such as the one denoted below: Host Users IO_GB =============== ===== ========= heart 53 148740.6 and returns a tuple of (start index, end index) values that can be used to slice table rows into column entries. Args: line (str): Text comprised of separator characters and spaces that define the extents of columns sep (str): The character used to draw the column lines gap (str): The character separating ``sep`` characters strict (bool): If true, restrict column extents to only include sep characters and not the spaces that follow them. Returns: list of tuples: """ columns = [] # if line is not comprised exclusively of separators and gaps, it is not a # valid heading line if line.replace(sep, 'X').replace(gap, 'X').strip('X') != "": return columns if strict: col_start = None else: col_start = 0 for index, char in enumerate(line): if strict: # col_start == None == looking for start of a new column if col_start is None and char == sep: col_start = index # if this is the end of an inter-column gap elif index > 0 and char == gap and line[index - 1] == sep: columns.append((col_start, index - col_start)) col_start = None else: # if this is the end of an inter-column gap if index > 0 and char == gap and line[index - 1] == sep: columns.append((col_start, index - col_start)) col_start = index if line and line[-1] == sep and col_start is not None: columns.append((col_start, len(line) - col_start)) return columns
[docs]def _rekey_table(table, key): """Converts a list of records into a dict of records Converts a table of records as returned by _parse_section() of the form:: { "records": [ { "host": "heart", "io_gb": "148740.6", "ops": "27991", "users": "53", }, ... ] } Into a table of key-value pairs the form:: { "records": { "heart": { "io_gb": "148740.6", "ops": "27991", "users": "53", }, ... } } Does not handle degenerate keys when re-keying, so only some tables with a uniquely identifying key can be rekeyed. Args: table (dict): Output of the _parse_section() function key (str): Key to pull out of each element of table['records'] to use as the key for each record Returns: dict: Table with records expressed as key-value pairs instead of a list """ new_table = copy.deepcopy(table) new_records = {} for record in new_table['records']: new_key = record.pop(key) if new_key in new_records: raise KeyError("Degenerate key %s=%s" % (key, new_key)) new_records[new_key] = record new_table['records'] = new_records return new_table
[docs]def _hpss_timedelta_to_secs(timedelta_str): """Convert HPSS-encoded timedelta string into seconds Args: timedelta_str (str): String in form d-HH:MM:SS where d is the number of days, HH is hours, MM minutes, and SS seconds Returns: int: number of seconds represented by timedelta_str """ match = REX_TIMEDELTA.match(timedelta_str) if match: seconds = int(match.group(1)) * 86400 seconds += int(match.group(2)) * 3600 seconds += int(match.group(3)) * 60 seconds += int(match.group(4)) else: seconds = -1 return seconds