Commit ef2397f4 authored by Vincent Stehlé's avatar Vincent Stehlé
Browse files

parser.py: identify the sequence file



We add a database of the sequence files we know about with their sha256,
and we try to identify the input sequence file among those.

A `--seq-db' option is added to allow specifying the database.

Update documentation accordingly. While at it, fix spacing.
Signed-off-by: Vincent Stehlé's avatarVincent Stehlé <vincent.stehle@arm.com>
parent 126803b3
...@@ -265,13 +265,29 @@ override the result of some tests with the following ones: ...@@ -265,13 +265,29 @@ override the result of some tests with the following ones:
filesystem implementation limitations and they do filesystem implementation limitations and they do
not prevent an OS to boot. not prevent an OS to boot.
`KNOWN ACS LIMITATION` Genuine bugs, which are fixed in a more recent version `KNOWN ACS LIMITATION` Genuine bugs, which are fixed in a more recent
of the ACS or which must ultimately be fixed and which version of the ACS or which must ultimately be fixed
we know about. and which we know about.
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
Some of the rules just add a `comments` field with some help text. Some of the rules just add a `comments` field with some help text.
### Database of sequence files
The `seq.db` file contains a list of known sequence files, which allows to
identify the input sequence file.
This database file contains lines describing each known sequence file in turn,
in the following format:
```
sha256 description
```
Everything appearing after a '#' sign is treated as a comment and ignored.
The database filename can be specified with the `--seq-db` option.
## Notes ## Notes
### Known Issues: ### Known Issues:
* "comment" is currently not implemented, as formatting is not currently consistent, should reflect the comments from the test. * "comment" is currently not implemented, as formatting is not currently consistent, should reflect the comments from the test.
......
...@@ -8,6 +8,8 @@ import csv ...@@ -8,6 +8,8 @@ import csv
import logging import logging
import json import json
import re import re
import hashlib
import os
try: try:
from packaging import version from packaging import version
...@@ -600,6 +602,50 @@ def combine_dbs(db1, db2): ...@@ -600,6 +602,50 @@ def combine_dbs(db1, db2):
return cross_check return cross_check
# Load the database of known sequence files.
def load_known_seq(seq_db):
known_seqs = {}
with open(seq_db, 'r') as f:
for line in f:
line = line.rstrip()
line = re.sub(r'#.*', '', line)
m = re.match(r'\s*([0-9a-fA-F]+)\s+(.*)', line)
if not m:
continue
kh = m.group(1)
d = m.group(2)
assert(kh not in known_seqs)
logging.debug(f'{kh} {d}')
known_seqs[kh] = d
logging.debug(f'{len(known_seqs)} known seq file(s)')
return known_seqs
# Try to identify the .seq file in a list of known versions using its sha256.
def ident_seq(seq_file, seq_db):
known_seqs = load_known_seq(seq_db)
# Hash seq file
hm = 'sha256'
hl = hashlib.new(hm)
with open(seq_file, 'rb') as f:
hl.update(f.read())
h = hl.hexdigest()
logging.debug(f'{hm} {h} {seq_file}')
# Try to identify the seq file
if h in known_seqs:
logging.info(f"""Identified `{seq_file}' as "{known_seqs[h]}".""")
else:
logging.debug(f"Could not identify `{seq_file}'...")
# Read the .ekl log file and the .seq file and combine them into a single # Read the .ekl log file and the .seq file and combine them into a single
# database, which we return. # database, which we return.
def read_log_and_seq(log_file, seq_file): def read_log_and_seq(log_file, seq_file):
...@@ -717,6 +763,8 @@ def read_md(input_md): ...@@ -717,6 +763,8 @@ def read_md(input_md):
if __name__ == '__main__': if __name__ == '__main__':
me = os.path.realpath(__file__)
here = os.path.dirname(me)
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Process SCT results.' description='Process SCT results.'
' This program takes the SCT summary and sequence files,' ' This program takes the SCT summary and sequence files,'
...@@ -743,6 +791,9 @@ if __name__ == '__main__': ...@@ -743,6 +791,9 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
'--print', action='store_true', help='Print results to stdout') '--print', action='store_true', help='Print results to stdout')
parser.add_argument('--input-md', help='Input .md filename') parser.add_argument('--input-md', help='Input .md filename')
parser.add_argument(
'--seq-db', help='Known sequence files database filename',
default=f'{here}/seq.db')
parser.add_argument('log_file', help='Input .ekl filename') parser.add_argument('log_file', help='Input .ekl filename')
parser.add_argument('seq_file', help='Input .seq filename') parser.add_argument('seq_file', help='Input .seq filename')
parser.add_argument('find_key', nargs='?', help='Search key') parser.add_argument('find_key', nargs='?', help='Search key')
...@@ -768,6 +819,10 @@ if __name__ == '__main__': ...@@ -768,6 +819,10 @@ if __name__ == '__main__':
else: else:
# Command line argument 1 is the ekl file to open. # Command line argument 1 is the ekl file to open.
# Command line argument 2 is the seq file to open. # Command line argument 2 is the seq file to open.
# Try to identify the sequence file
ident_seq(args.seq_file, args.seq_db)
# Read both and combine them into a single cross_check database. # Read both and combine them into a single cross_check database.
cross_check = read_log_and_seq(args.log_file, args.seq_file) cross_check = read_log_and_seq(args.log_file, args.seq_file)
......
# List of known sequence files and their sha256
6a381192057c511b2b69282c58d6107c1daeaf0b95038605d4c58383eb5cc88b Test sample.seq
6ae5a92f0b83b3a1469c89f6a02b0771ae3818a424e7118a28bba053be394078 Supplied SBBR.seq
6b83dbfbd1f07fc61a918297f02f449591a72131b64ac746f969a4210f97aee8 ACS-IR v21.05_0.8_BETA-0 EBBR.seq
c06684b3f8b35871e37b9447f609f9aab6070a7ca1c4ba63a52e029c018c9b73 ACS-IR v21.07_0.9_BETA EBBR.seq
ec730fd81eada415278b39533fe7cc21147b39183447dc11fa77ad4419d13969 Supplied EBBR.seq
f7793d53c10106c1c275a4992e1710ce9863e210dd07581a3d783c4f4cf2312b ACS-IR v21.07_0.9_BETA EBBR_manual.seq
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment