1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401 |
- #!/usr/bin/env python
- from hh_reader import read_result
- from copy import deepcopy
- from pdbx.reader.PdbxReader import PdbxReader
- from pdbx.writer.PdbxWriter import PdbxWriter
- import re, os, sys, tempfile, glob
- from operator import itemgetter # hzhu
- from itertools import groupby # hzhu
- EMPTY = '*'
- GAP = '-'
- DEBUG_MODE = False
- class Gap:
- """ A gap is a continuous stretch of indels.
- It is defined by a opening position and a size/length
- """
- def __init__(self, open_pos, size):
- self.open_pos = open_pos # gap opening position
- self.size = size # num of indels in the gap
- def __repr__(self):
- return 'Gap opening pos = %d, size = %d' % (self.open_pos, self.size)
-
- class Grid:
- """
- Implementation of 2D grid of cells
- Includes boundary handling
- """
-
- def __init__(self, grid_height, grid_width):
- """
- Initializes grid to be empty, take height and width of grid as parameters
- Indexed by rows (left to right), then by columns (top to bottom)
- """
-
- self._grid_height = grid_height
- self._grid_width = grid_width
- self._cells = [ [ EMPTY for dummy_col in range(self._grid_width) ]
- for dummy_row in range(self._grid_height)]
-
- def __str__(self):
- """ Return multi-line string represenation for grid """
-
- ans = ''
- for row in range(self._grid_height):
- ans += ''.join(self._cells[row])
- ans += '\n'
- return ans
- def clear(self):
- """ Clears grid to be empty """
-
- self._cells = [[EMPTY for dummy_col in range(self._grid_width)]
- for dummy_row in range(self._grid_height)]
-
- def get_grid_height(self):
- """ Return the height of the grid """
-
- return self._grid_height
- def get_grid_width(self):
- """ Return the width of the grid """
-
- return self._grid_width
-
- def get_cell(self, row, col):
- return self._cells[row][col]
- def get_seq_start(self, row):
- """ Returns the start position of the sequence """
-
- index = 0
- for pos in self._cells[row]:
- if pos != EMPTY:
- return index
- index += 1
- return None
- def get_seq_end(self, row):
- """ Returns the end position of the sequence """
-
- index = 0
- for pos in reversed(self._cells[row]):
- if pos != EMPTY:
- return self.get_grid_width() - index
- index += 1
- return None
- def get_gaps(self, row):
- """ Return the position of gaps in a row """
-
- gaps = list()
- index = 0
- for pos in self._cells[row]:
- if pos == GAP:
- gaps.append(index)
- index += 1
- return gaps
- def get_gaps_ref_gapless(self, row):
- """ Return the pos of gaps in a row.
- The opening positions of the gaps are wrt. the gapless seq
- """
- # get all the indels
- indels = self.get_gaps(row)
- gaps = []
- # combine continuous indels into a gap
- for k,i in groupby( enumerate(indels), lambda x: x[0]-x[1] ):
- g = list(map(itemgetter(1), i))
- gaps.append( Gap(g[0], len(g)) )
- # offset the gap opening positions
- for i in range(1, len(gaps)):
- # offset by total gap number before
- gaps[i].open_pos -= sum([gaps[j].size for j in range(i)])
-
- return gaps # a list of Gap instances
-
- def get_seq_indeces(self, row):
- seq = list()
- for pos, res in enumerate(self._cells[row]):
- if res != EMPTY and res != GAP:
- seq.append(pos)
- return seq
-
- ## def get_gap_list(self): # hzhu commented this out. wrote a new version
- ## """ Returns a list of list of all gap positions in the sequence grid. """
- ## gap_pos = set()
- ## for row in range(self.get_grid_height()):
- ## for gap in self.get_gaps(row):
- ## gap_pos.add(gap)
- ## gap_pos = list(sorted(gap_pos))
- ## boundaries = [ (x + 1) for x, y in zip(gap_pos, gap_pos[1:]) if y - x != 1 ]
-
- ## gap_list = list()
- ## prev = 0
-
- ## for boundary in boundaries:
- ## sub_list = [ pos for pos in gap_pos[prev:] if pos < boundary ]
- ## gap_list.append(sub_list)
- ## prev += len(sub_list)
-
- ## gap_list.append([ x for x in gap_pos[prev:]])
-
- ## return gap_list
- def get_gap_list(self):
- """ Returns a list of Gap instances for all rows in the grid
- """
- gap_dict = dict() # each position should occur as gap at most once
- # keys are gap openning positions
- # values are Gap instances
- gap_list = []
- for row in range(self.get_grid_height()):
- gap_pos = []
- gaps = self.get_gaps_ref_gapless(row)
-
- for g in gaps:
- if g.open_pos in gap_dict: # if there is already gaps at this open pos
- if g.size > gap_dict[g.open_pos].size: # if new gap is bigger
- gap_dict[g.open_pos] = g # keep the larger gap as they overlap
- else:
- gap_dict[g.open_pos] = g
-
- gap_list = sorted(list(gap_dict.values()), key=lambda x: x.open_pos) # sort according to start position
- return gap_list # a list of Gap instances
-
- def set_gap(self, row, col):
- """ Set cell with index (row, col) to be a gap """
-
- self._cells[row][col] = GAP
- def set_empty(self, row, col):
- """ Set cell with index (row, col) to be a gap """
-
- self._cells[row][col] = EMPTY
-
- def set_cell(self, row, col, res):
- """ Set cell with index (row, col) to be full """
-
- self._cells[row][col] = res
- def is_empty(self, row, col):
- """ Checks whether cell with index (row, col) is empty """
-
- return self._cells[row][col] == EMPTY
- def is_gap(self, row, col):
- """ Checks whetehr cell with indxex (row, col) is a gap """
-
- return self._cells[row][col] == GAP
- def insert_gaps(self, cols):
- """ Inserts a gaps into a column of the template grid """
-
- for col in cols:
- for row in range(self._grid_height):
- if col >= self.get_seq_start(row) and col < self.get_seq_end(row):
- self._cells[row].insert(col, GAP)
- else:
- self._cells[row].insert(col, EMPTY)
-
- self._grid_width += 1
- def insert_gaps_row(self, cols, row):
- """ Intert gaps into cols only for certain row"""
- for col in cols:
- if col >= self.get_seq_start(row) and col < self.get_seq_end(row):
- self._cells[row].insert(col, GAP)
- else:
- self._cells[row].insert(col, EMPTY)
- # NOTE: grid_with should not be changed after every row is updated.
- #self._grid_width += 1
- def clean_trail_empty(self):
- """ Remove all trailing EMPTY and pad grid to same width"""
- # first find out the max length (exluding trailing EMPTY)
- max_width = 0
- for row in range(self._grid_height):
- for i in range(len(self._cells[row])-1, -1, -1):
- if self._cells[row][i] != EMPTY:
- break
- if i+1 > max_width:
- max_width = i+1
-
- # delete excessive EMPTY
- for row in range(self._grid_height):
- del self._cells[row][max_width:]
- # then pad all rows to the same length
- [self._cells[row].append( EMPTY * (max_width-len(self._cells[row])) ) \
- for row in range(self._grid_height) if len(self._cells[row]) < max_width]
- self._grid_width = max_width
- return
-
- def remove_gaps(self, keep_width=True): # hzhu add keep_width option
- """ Removes all gaps from the grid. """
- for row in range(self.get_grid_height()):
- not_gap = list()
- for col in range(self.get_grid_width()):
- if not self.is_gap(row, col):
- not_gap.append(col)
- self._cells[row] = [ self._cells[row][col] for col in not_gap ]
- if keep_width: # hzhu only pad to original width if desired
- for del_pos in range(self._grid_width - len(not_gap)):
- self._cells[row].append(EMPTY)
- if not keep_width: # hzhu if width is not kept, make sure width is consistent
- self.clean_trail_empty()
-
- return
-
- class QueryGrid(Grid):
- def __init__(self, grid_height, grid_width):
- Grid.__init__(self, grid_height, grid_width)
-
- def get_query_start(self, row):
- """ Returns the query start position """
- return self.get_seq_start(row) + 1
- def get_query_end(self, row):
- """ Returns the query end postion """
-
- return self.get_seq_end(row) - len(self.get_gaps(row))
- def get_col_residue(self, col):
- """ Tries to find a the query residue in a given column. Used by derive_global_seq() to
- identify the global query sequence """
-
- for row in range(self.get_grid_height()):
- if not self.is_empty(row, col):
- return self._cells[row][col]
- return GAP
- class TemplateGrid(Grid):
- def __init__(self, grid_height, grid_width):
- Grid.__init__(self, grid_height, grid_width)
- self._start = list()
- self._end = list()
- self._pdb_code = list()
- self._chain = list()
- self._organism = list()
- self._resolution = list()
-
- def display(self):
- """ Return multi-line string represenation for grid """
-
- ans = ''
- for row in range(self._grid_height):
- ans += '>P1;{p}\nstructure:{p}:{s}:{c}:{e}:{c}::{o}:{r}:\n{a}*\n'.format(
- p = self._pdb_code[row],
- s = add_white_space_end(self.get_template_start(row), 4),
- e = add_white_space_end(self.get_template_end(row), 4),
- c = self._chain[row],
- o = self._organism[row],
- r = self._resolution[row],
- a = ''.join(self._cells[row]).replace(EMPTY, GAP).replace('#', GAP))
- return ans
- def debug(self, row):
- """ Return multi-line string represenation for grid, for debugging purposes """
- ans = '{p}\nInternal: {s}, {e} Query: {qs}, {qe} Gaps ({g1}): {g2}\n{seq}\n'.format(
- p = self._pdb_code[row],
- s = self.get_seq_start(row),
- e = self.get_seq_end(row),
- qs = self.get_template_start(row),
- qe = self.get_template_end(row),
- g1 = len(self.get_gaps(row)),
- g2 = ', '.join([str(gap) for gap in self.get_gaps(row)]),
- seq = ''.join(self._cells[row]))
- return ans
- def set_metadata(self, row, start, end, pdb_code, chain, organism, resolution):
- """ Used by create_template_grid() to setup metadata of pir template """
-
- self._start.append(start)
- self._end.append(end)
- self._pdb_code.append(pdb_code)
- self._chain.append(chain)
- self._organism.append(organism)
- self._resolution.append(resolution)
- def set_map(self, row, start, end):
-
- self._start[row] = start
- self._end[row] = end
- def get_template_start(self, row):
- """ Returns the template start position """
-
- return self._start[row]
- def get_template_end(self, row):
- """ Return sthe template end position """
-
- return self._end[row]
- def del_row(self, row):
- """ Removes a complete template entry from the grid """
-
- del self._cells[row]
- del self._start[row]
- del self._end[row]
- del self._pdb_code[row]
- del self._chain[row]
- del self._organism[row]
- del self._resolution[row]
- self._grid_height -= 1
- # Helper functions
- def add_white_space_end(string, length):
- """ Adds whitespaces to a string until it has the wished length"""
-
- edited_string = str(string)
-
- if len(edited_string) >= length:
- return string
- else:
- while len(edited_string) != length:
- edited_string += ' '
-
- return edited_string
- def convert_aa_code(three_letter, convert):
- """
- Assumes a string that contains a three letter aminoacid code and
- returns the corresponding one letter code.
- """
-
- aa_code = {
- 'CYS': 'C',
- 'ASP': 'D',
- 'SER': 'S',
- 'GLN': 'Q',
- 'LYS': 'K',
- 'ILE': 'I',
- 'PRO': 'P',
- 'THR': 'T',
- 'PHE': 'F',
- 'ASN': 'N',
- 'GLY': 'G',
- 'HIS': 'H',
- 'LEU': 'L',
- 'ARG': 'R',
- 'TRP': 'W',
- 'ALA': 'A',
- 'VAL': 'V',
- 'GLU': 'E',
- 'TYR': 'Y',
- 'MET': 'M',
- }
- non_canonical = {
- 'MSE': 1,
- 'HYP': 2,
- 'MLY': 3,
- 'SEP': 4,
- 'TPO': 5,
- 'CSO': 6,
- 'PTR': 7,
- 'KCX': 8,
- 'CME': 9,
- 'CSD': 10,
- 'CAS': 11,
- 'MLE': 12,
- 'DAL': 13,
- 'CGU': 14,
- 'DLE': 15,
- 'FME': 16,
- 'DVA': 17,
- 'OCS': 18,
- 'DPR': 19,
- 'MVA': 20,
- 'TYS': 21,
- 'M3L': 22,
- 'SMC': 23,
- 'ALY': 24,
- 'CSX': 25,
- 'DCY': 26,
- 'NLE': 27,
- 'DGL': 28,
- 'DSN': 29,
- 'CSS': 30,
- 'DLY': 31,
- 'MLZ': 32,
- 'DPN': 33,
- 'DAR': 34,
- 'PHI': 35,
- 'IAS': 36,
- 'DAS': 37,
- 'HIC': 38,
- 'MP8': 39,
- 'DTH': 40,
- 'DIL': 41,
- 'MEN': 42,
- 'DTY': 43,
- 'CXM': 44,
- 'DGN': 45,
- 'DTR': 46,
- 'SAC': 47,
- 'DSG': 48,
- 'MME': 49,
- 'MAA': 50,
- 'YOF': 51,
- 'FP9': 52,
- 'FVA': 53,
- 'MLU': 54,
- 'OMY': 55,
- 'FGA': 56,
- 'MEA': 57,
- 'CMH': 58,
- 'DHI': 59,
- 'SEC': 60,
- 'OMZ': 61,
- 'SCY': 62,
- 'MHO': 63,
- 'MED': 64,
- 'CAF': 65,
- 'NIY': 66,
- 'OAS': 67,
- 'SCH': 68,
- 'MK8': 69,
- 'SME': 70,
- 'LYZ': 71
- }
- if three_letter in aa_code.keys():
- return aa_code[three_letter]
- elif convert and (three_letter in non_canonical.keys()):
- return non_canonical[three_letter]
- else:
- return '-'
- def get_query_name(hhr_file):
- with open(hhr_file) as fh:
- for line in fh:
- if line.startswith('Query'):
- # match the PDB Code
- m = re.search('(\d[A-Z0-9]{3})_(\S)', line)
- if m:
- pdb_code = m.group(1)
- chain = m.group(2)
- else:
- pdb_code = 'UKNP'
- chain = 'A'
- # raise ValueError('Input HHR-File Does not seem to be a PDB-Structure')
- break
- return pdb_code, chain
- def get_cif_files(folder):
- """ Gets all cif files located in folder. """
-
- return glob(os.path.join(folder, '*.cif'))
- def open_cif(cif_file):
- """ Assumes a mmCif file and returns a data block used for subsequent procedures """
- # The "usual" procedure to open a mmCIF with pdbX/mmCIF
- with open(cif_file) as cif_fh:
- data = []
- reader = PdbxReader(cif_fh)
- reader.read(data)
- block = data[0]
-
- return block
- def get_pdb_entry_id(block):
- """ Extracts the PDB entry information of a cif file and returns it as a string """
- entry = block.getObj('entry')
- entry_id = entry.getValue('id')
-
- return entry_id
- def template_id_to_pdb(template_id):
- """
- Extracts PDB ID and chain name from the provided template id
- """
- # match PDBID without chain (8fab, 1a01)
- m = re.match(r'/^(\d[A-Za-z0-9]{3})$', template_id)
- if m:
- return m.group(1).upper(), 'A'
-
- # PDB CODE with chain Identifier
- m = re.match(r'^(\d[A-Za-z0-9]{3})_(\S)$', template_id)
- if m:
- return m.group(1).upper(), m.group(2).upper()
- # Match DALI ID
- m = re.match(r'^(\d[A-Za-z0-9]{3})([A-Za-z0-9]?)_\d+$', template_id)
- if m:
- return m.group(1).upper(), m.group(2).upper()
-
- # No PDB code and chain identified
- return None, None
- def create_template_grid(hhr_data):
- """ Creates a template grid """
- total_seq = len(hhr_data)
- templ_max = max( [ hhr.start[0] + len(to_seq(hhr.template_ali)) for hhr in hhr_data ] ) - 1
- template_grid = TemplateGrid(total_seq, templ_max)
- for row, template in enumerate(hhr_data):
- seq_start = template.start[0] - 1
- templatealignment = to_seq(template.template_ali)
- seq_end = seq_start + len(templatealignment)
- # Load Meta Data
- start = template.start[1]
- end = template.end[1]
- # Get pdb_code and chain identifier of template
- pdb_code, chain = template_id_to_pdb(template.template_id)
- m = re.search("(\d+.\d+)A", template.template_info) # try to extract resolution of the structure
-
- if m:
- resolution = m.group(1)
- else:
- resolution = ""
- m = re.search("\{(.*)\}", template.template_info) # try to extract the organism
- if m:
- organism = m.group(1).replace(":", " ") # make sure that no colons are in the organism
- else:
- organism = ""
- template_grid.set_metadata(row, start, end, pdb_code, chain, organism, resolution)
- # Write sequence into the grid
- for pos, col in enumerate(range(seq_start, seq_end)):
- template_grid.set_cell(row, col, templatealignment[pos])
- return template_grid
- def to_seq(ali):
- if isinstance(ali, list):
- return ''.join(ali)
- else:
- return ali
-
- def create_query_grid(hhr_data):
- """ Creates a Query Grid """
-
- total_seq = len(hhr_data)
- query_max = max( [ hhr.start[0] + len(to_seq(hhr.query_ali)) for hhr in hhr_data ] ) - 1
- query_grid = QueryGrid(total_seq, query_max)
- for row, query in enumerate(hhr_data):
- queryalignment = to_seq(query.query_ali)
- query_start = query.start[0] - 1
- query_end = query_start + len(queryalignment)
- for pos, col in enumerate(range(query_start, query_end)):
- if queryalignment[pos] not in ['Z', 'U', 'O', 'J', 'X', 'B']: # CAUTION
- query_grid.set_cell(row, col, queryalignment[pos])
- return query_grid
- def create_gapless_grid(grid):
- """ Returns a gapless grid """
- gapless = deepcopy(grid)
- gapless.remove_gaps(keep_width=False) # hzhu: shrink grid
- return gapless
- def process_query_grid(query_grid, gapless_grid):
- """ Processes a query grid sucht that it contains all gaps
- """
- gaplist = query_grid.get_gap_list()
- off_set = 0
-
- for g in gaplist:
- gapless_grid.insert_gaps([ p + off_set for p in range(g.open_pos, g.open_pos+g.size) ])
- off_set += g.size
-
- return gapless_grid
- def derive_global_seq(processed_query_grid, query_name, query_chain):
- global_seq = list()
- for col in range(processed_query_grid.get_grid_width()):
- global_seq.append(processed_query_grid.get_col_residue(col))
- # this is the query entry
- header = '>P1;{q}\nsequence:{q}:1 :{c}:{l} :{c}::::\n'.format(
- q = query_name,
- l = len(global_seq),
- c = query_chain)
- return header + ''.join(global_seq) + '*'
- def process_template_grid(query_grid, template_grid):
- """ Insertes Gaps into the template grid
- Only add gaps from **other** query_grids into template grid (NOT gapless)
- """
- gaplist = query_grid.get_gap_list() # use this to keep the offset
-
- for row in range(template_grid.get_grid_height()):
- # do NOT consider gaps in current query row
- gaplist_row = query_grid.get_gaps_ref_gapless(row)
- gapdict_row = dict(zip([g.open_pos for g in gaplist_row],
- [g.size for g in gaplist_row]))
- off_set = 0
- for g in gaplist:
- # if there is a gap with same opening position in the current row,
- # only consider g if it is larger than the on in the current row
- if g.open_pos in gapdict_row:
- if g.size > gapdict_row[g.open_pos]:
- template_grid.insert_gaps_row([ p + off_set for p in range(g.open_pos,
- g.open_pos+g.size-gapdict_row[g.open_pos]) ], row)
- else:
- template_grid.insert_gaps_row([ p + off_set for p in range(g.open_pos, g.open_pos+g.size) ], row)
-
- off_set += g.size # even if the gaps are not inserted, the offset should be adjusted
- template_grid.clean_trail_empty() # clean the redundant trailing EMPTY char
-
- return template_grid
- def compare_with_cifs(template_grid, folder, output_path, convert, threshold):
- """
- Compare the PIR Alignment with Atomsection of a mmCIF file. To make the ATOM-Section of
- a mmCIF file compatible with MODELLER, each residue has in the ATOM-Section has to match
- corresponding positions in the PIR-Alignment
- """
- # glob the mmCif files from given directory and map the PDB identifier to the path
- cif_files = glob.glob(os.path.join(folder, '*.cif'))
- cif_paths = { path.split('/')[-1].split('.')[0].upper() : path for path in cif_files }
- cif_edits = dict()
-
- # create the path where renumbered cifs are saved to
- if not os.path.exists(output_path):
- os.mkdir(output_path)
-
- # if the cif does not contain any residue of the por alignment we delete it
- del_row = list()
- for row in range(template_grid.get_grid_height()):
- # get the pdb code and strand id from the current template
- pdb_code = template_grid._pdb_code[row]
- chain = template_grid._chain[row] # hhr users pdb chain ID
- # load mmCif file accordingly
- if pdb_code in cif_edits.keys():
- block = cif_edits[pdb_code]
- else:
- try:
- block = open_cif(cif_paths[pdb_code])
- except KeyError:
- del_row.append(row)
- print ('! Did not find the mmCIF file for {pdb}. Removing it from the alignment.'.format(
- pdb = pdb_code))
- continue
- # Create a mapping of the atom site
- atom_site = block.getObj('atom_site')
- ########################################################################
- ## Get the mapping of the residues in the atom section ##
- ########################################################################
- cif_seq = dict()
- # For the case that we have to rename a chain
- cif_chains = set([])
- # Iterate through the atomsection of the cif file
- for atom_row in range(0, atom_site.getRowCount()):
-
- try:
- if atom_site.getValue('label_comp_id', atom_row) == 'HOH':
- continue
- cif_chain = atom_site.getValue('label_asym_id', atom_row)
- pdb_chain = atom_site.getValue('auth_asym_id', atom_row) # use PDB chain ID
- except IndexError:
- pass
- cif_chains.add(cif_chain)
- # We do not care about the residues apart from the chain
- #if cif_chain != chain: # hzhu
- if pdb_chain != chain: # hhr uses PDB chain, not the cif chain! hzhu
- continue
- # and update the chain id from pdb_chain to cif_chain
- if atom_site.getValue('group_PDB', atom_row).startswith('ATOM'): # hzhu in case HETATM ruins ch id
- template_grid._chain[row] = cif_chain
-
- # get the residue and the residue number
- try:
- res_num = int(atom_site.getValue("label_seq_id", atom_row))
- except ValueError:
- continue
- residue = atom_site.getValue('label_comp_id', atom_row)
- residue = convert_aa_code(residue, convert)
- if res_num not in cif_seq.keys():
- cif_seq[res_num] = residue
- elif res_num in cif_seq.keys() and cif_seq[res_num] == residue:
- continue
- elif res_num in cif_seq.keys() and cif_seq[res_num] != residue:
- cif_seq[res_num] = '-'
-
- if DEBUG_MODE:
- print ('! {p} {c}: mmCIF contains a residue position that is assigned {cr} to two residues. Removing it.'.format(
- p = pdb_code,
- c = chain,
- cr = res_num))
- ########################################################################
- ## Rename chain if necessary ##
- ########################################################################
- chain_idx = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- if len(template_grid._chain[row]) != 1:
- i = 0
- new_chain = 0
-
- while i < len(chain_idx):
- if chain_idx[i] in cif_chains:
- if DEBUG_MODE:
- print ('! {p} {c}: Chain identifier {i} is already taken.'.format(
- p = pdb_code,
- c = chain,
- i = chain_idx[i]))
- i += 1
- else:
- new_chain = chain_idx[i]
- break
- if new_chain == 0:
- if DEBUG_MODE:
- print ('! {p} {c}: Could not use {p}. The chain identifier {c} is not compatible with MODELLER (2 letters) and could not be renanmed.'.format(
- p = pdb_code,
- c = chain))
-
- del_row.append(row)
- continue
- if new_chain != 0:
- print ('Selected new chain name {c}'.format(c = new_chain))
- #TODO
- ########################################################################
- ## Compare cif positions with the atom positions ##
- ########################################################################
- del_pos = list()
- mod_pos = dict()
- mapping = dict()
- for pos_cif, pos_tem in zip(range(template_grid.get_template_start(row),
- template_grid.get_template_end(row) + 1), template_grid.get_seq_indeces(row)):
- res_tem = template_grid.get_cell(row, pos_tem)
- try:
- res_cif = cif_seq[pos_cif]
- except KeyError:
- res_cif = -1
-
- match = True if res_tem == res_cif else False
- if not match:
- if res_cif == 1 and res_tem == 'M':
- mod_pos[pos_cif] = 1
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 2 and res_tem == 'P':
- mod_pos[pos_cif] = 2
- mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
- elif res_cif == 3 and res_tem == 'K':
- mod_pos[pos_cif] = 3
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- elif res_cif == 4 and res_tem == 'S':
- mod_pos[pos_cif] = 4
- mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
- elif res_cif == 5 and res_tem == 'T':
- mod_pos[pos_cif] = 5
- mapping[(pos_tem, res_tem)] = (pos_cif, 'T')
- elif res_cif == 6 and res_tem == 'C':
- mod_pos[pos_cif] = 6
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 7 and res_tem == 'Y':
- mod_pos[pos_cif] = 7
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 8 and res_tem == 'K':
- mod_pos[pos_cif] = 8
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- elif res_cif == 9 and res_tem == 'C':
- mod_pos[pos_cif] = 9
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 10 and res_tem == 'A':
- mod_pos[pos_cif] = 10
- mapping[(pos_tem, res_tem)] = (pos_cif, 'A')
- elif res_cif == 11 and res_tem == 'C':
- mod_pos[pos_cif] = 11
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 12 and res_tem == 'L':
- mod_pos[pos_cif] = 12
- mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
- elif res_cif == 13 and res_tem == 'A':
- mod_pos[pos_cif] = 13
- mapping[(pos_tem, res_tem)] = (pos_cif, 'A')
- elif res_cif == 14 and res_tem == 'E':
- mod_pos[pos_cif] = 14
- mapping[(pos_tem, res_tem)] = (pos_cif, 'E')
- elif res_cif == 15 and res_tem == 'L':
- mod_pos[pos_cif] = 15
- mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
- elif res_cif == 16 and res_tem == 'M':
- mod_pos[pos_cif] = 16
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 17 and res_tem == 'V':
- mod_pos[pos_cif] = 17
- mapping[(pos_tem, res_tem)] = (pos_cif, 'V')
- elif res_cif == 18 and res_tem == 'C':
- mod_pos[pos_cif] = 18
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 19 and res_tem == 'P':
- mod_pos[pos_cif] = 19
- mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
- elif res_cif == 20 and res_tem == 'V':
- mod_pos[pos_cif] = 20
- mapping[(pos_tem, res_tem)] = (pos_cif, 'V')
- elif res_cif == 21 and res_tem == 'Y':
- mod_pos[pos_cif] = 21
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 22 and res_tem == 'K':
- mod_pos[pos_cif] = 22
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- elif res_cif == 23 and res_tem == 'C':
- mod_pos[pos_cif] = 23
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 24 and res_tem == 'K':
- mod_pos[pos_cif] = 24
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- elif res_cif == 25 and res_tem == 'C':
- mod_pos[pos_cif] = 25
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 26 and res_tem == 'C':
- mod_pos[pos_cif] = 26
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 27 and res_tem == 'L':
- mod_pos[pos_cif] = 27
- mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
- elif res_cif == 28 and res_tem == 'E':
- mod_pos[pos_cif] = 28
- mapping[(pos_tem, res_tem)] = (pos_cif, 'E')
- elif res_cif == 29 and res_tem == 'S':
- mod_pos[pos_cif] = 29
- mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
- elif res_cif == 30 and res_tem == 'C':
- mod_pos[pos_cif] = 30
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 31 and res_tem == 'K':
- mod_pos[pos_cif] = 31
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- elif res_cif == 32 and res_tem == 'K':
- mod_pos[pos_cif] = 32
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- elif res_cif == 33 and res_tem == 'F':
- mod_pos[pos_cif] = 33
- mapping[(pos_tem, res_tem)] = (pos_cif, 'F')
- elif res_cif == 34 and res_tem == 'R':
- mod_pos[pos_cif] = 34
- mapping[(pos_tem, res_tem)] = (pos_cif, 'R')
- elif res_cif == 35 and res_tem == 'F':
- mod_pos[pos_cif] = 35
- mapping[(pos_tem, res_tem)] = (pos_cif, 'F')
- elif res_cif == 36 and res_tem == 'D':
- mod_pos[pos_cif] = 36
- mapping[(pos_tem, res_tem)] = (pos_cif, 'D')
- elif res_cif == 37 and res_tem == 'D':
- mod_pos[pos_cif] = 37
- mapping[(pos_tem, res_tem)] = (pos_cif, 'D')
- elif res_cif == 38 and res_tem == 'H':
- mod_pos[pos_cif] = 38
- mapping[(pos_tem, res_tem)] = (pos_cif, 'H')
- elif res_cif == 39 and res_tem == 'P':
- mod_pos[pos_cif] = 39
- mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
- elif res_cif == 40 and res_tem == 'T':
- mod_pos[pos_cif] = 40
- mapping[(pos_tem, res_tem)] = (pos_cif, 'T')
- elif res_cif == 41 and res_tem == 'I':
- mod_pos[pos_cif] = 41
- mapping[(pos_tem, res_tem)] = (pos_cif, 'I')
- elif res_cif == 42 and res_tem == 'N':
- mod_pos[pos_cif] = 42
- mapping[(pos_tem, res_tem)] = (pos_cif, 'N')
- elif res_cif == 43 and res_tem == 'Y':
- mod_pos[pos_cif] = 43
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 44 and res_tem == 'M':
- mod_pos[pos_cif] = 44
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 45 and res_tem == 'G':
- mod_pos[pos_cif] = 45
- mapping[(pos_tem, res_tem)] = (pos_cif, 'G')
- elif res_cif == 46 and res_tem == 'W':
- mod_pos[pos_cif] = 46
- mapping[(pos_tem, res_tem)] = (pos_cif, 'W')
- elif res_cif == 47 and res_tem == 'S':
- mod_pos[pos_cif] = 47
- mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
- elif res_cif == 48 and res_tem == 'N':
- mod_pos[pos_cif] = 48
- mapping[(pos_tem, res_tem)] = (pos_cif, 'N')
- elif res_cif == 49 and res_tem == 'M':
- mod_pos[pos_cif] = 49
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 50 and res_tem == 'A':
- mod_pos[pos_cif] = 50
- mapping[(pos_tem, res_tem)] = (pos_cif, 'A')
- elif res_cif == 51 and res_tem == 'Y':
- mod_pos[pos_cif] = 51
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 52 and res_tem == 'P':
- mod_pos[pos_cif] = 52
- mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
- elif res_cif == 53 and res_tem == 'V':
- mod_pos[pos_cif] = 53
- mapping[(pos_tem, res_tem)] = (pos_cif, 'V')
- elif res_cif == 54 and res_tem == 'L':
- mod_pos[pos_cif] = 54
- mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
- elif res_cif == 55 and res_tem == 'Y':
- mod_pos[pos_cif] = 55
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 56 and res_tem == 'E':
- mod_pos[pos_cif] = 56
- mapping[(pos_tem, res_tem)] = (pos_cif, 'E')
-
- elif res_cif == 57 and res_tem == 'F':
- mod_pos[pos_cif] = 57
- mapping[(pos_tem, res_tem)] = (pos_cif, 'F')
- elif res_cif == 58 and res_tem == 'C':
- mod_pos[pos_cif] = 58
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 59 and res_tem == 'H':
- mod_pos[pos_cif] = 59
- mapping[(pos_tem, res_tem)] = (pos_cif, 'H')
- elif res_cif == 60 and res_tem == 'C':
- mod_pos[pos_cif] = 60
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 61 and res_tem == 'Y':
- mod_pos[pos_cif] = 61
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 62 and res_tem == 'C':
- mod_pos[pos_cif] = 62
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 63 and res_tem == 'M':
- mod_pos[pos_cif] = 63
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 64 and res_tem == 'M':
- mod_pos[pos_cif] = 64
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 65 and res_tem == 'C':
- mod_pos[pos_cif] = 65
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 66 and res_tem == 'Y':
- mod_pos[pos_cif] = 66
- mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
- elif res_cif == 67 and res_tem == 'S':
- mod_pos[pos_cif] = 67
- mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
- elif res_cif == 68 and res_tem == 'C':
- mod_pos[pos_cif] = 68
- mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
- elif res_cif == 69 and res_tem == 'L':
- mod_pos[pos_cif] = 69
- mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
- elif res_cif == 70 and res_tem == 'M':
- mod_pos[pos_cif] = 70
- mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
- elif res_cif == 71 and res_tem == 'K':
- mod_pos[pos_cif] = 71
- mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
- else:
- # insert a gap
- template_grid.set_empty(row, pos_tem)
- mapping[(pos_tem, res_tem)] = (pos_cif, res_cif)
-
- if DEBUG_MODE:
- print ('! {p} {c}: template pos {pt} ({rt}) does not match cif pos {pc} ({rc}). Replacing with gap.'.format(
- p = pdb_code,
- c = chain,
- pt = pos_tem,
- rt = res_tem,
- pc = pos_cif,
- rc = res_cif if res_cif != -1 else 'DNE'))
-
- if res_cif != -1:
- del_pos.append(pos_cif)
- else:
- mapping[(pos_tem, res_tem)] = (pos_cif, res_cif)
- # adjust template start and end positions
- correct_mapping = { key:value for key, value in mapping.items() if key[1] == value[1] }
- try:
- tstart = correct_mapping[sorted(correct_mapping.keys())[0]][0]
- tend = correct_mapping[sorted(correct_mapping.keys())[-1]][0]
- template_grid.set_map(row, tstart, tend)
- except IndexError:
- # This exception handles cases in which all residues were deleted
- if DEBUG_MODE:
- print ('! {p} {c}: Removing {p} from alignment. No residues matched the alignment sequence.'.format(
- p = pdb_code,
- c = chain))
- del_row.append(row)
- continue
- ########################################################################
- ## Delete rows from the PIR Alignment if the residue ratio is to low ##
- ########################################################################
- if threshold > 0:
-
- gaps = 0
- res = 0
- for col in range(template_grid.get_grid_width()):
- if template_grid.is_empty(row, col):
- template_grid.set_gap(row, col)
- if template_grid.is_gap(row, col):
- gaps += 1
- else:
- res += 1
- ratio = res/float(gaps + res)
-
- if ratio > threshold:
- print ('! Template {p} successfully passed residue ratio ({r:.2f} / {t}).'.format(
- p = pdb_code,
- r = ratio,
- t = threshold ))
- else:
- print ('! Template {p} did not passed residue ratio ({r:.2f} / {t}). Removing it from pir Alignment.'.format(
- p = pdb_code,
- r = ratio,
- t = threshold ))
-
- if row not in del_row:
- del_row.append(row)
- continue
- ########################################################################
- ## Edit cif files ##
- ########################################################################
- rem_row = list() # verbosity: saves information about removed residues
- mod_row = list() # verbosity: saves information about modified residues
- cha_row = list() # verbosity: saves any other changes
- for atom_row in reversed(range(0, atom_site.getRowCount())):
-
- try:
- cif_chain = atom_site.getValue('label_asym_id', atom_row)
- except IndexError:
- pass
- # We do not care about the residues apart from the chain
- if cif_chain != chain:
- continue
- # get the residue number
- try:
- res_num = int(atom_site.getValue("label_seq_id", atom_row))
- except ValueError:
- continue
- # pdb_PDB_model_num has to be set to 1
- try:
- model_num = int(atom_site.getValue('pdbx_PDB_model_num', atom_row))
- except IndexError:
- model_num = 1 # if we cannot extract, assume that it is alright
- try:
- ins_code = atom_site.getValue('pdbx_PDB_ins_code', atom_row)
- except IndexError:
- ins_code = '?' # assume it has no insertion code
- group_PDB = atom_site.getValue('group_PDB', atom_row)
- residue = atom_site.getValue('label_comp_id', atom_row)
- residue = convert_aa_code(residue, convert)
- # MODELLER accepts only structures if pdbx_PDB_model_num is set to 1
- if model_num != 1:
- if (res_num, residue, 'model_num') not in cha_row:
- cha_row.append((res_num, residue, 'model_num'))
-
- atom_site.setValue(1, "pdbx_PDB_model_num", atom_row)
- if ins_code != '?':
-
- if (res_num, residue, 'ins_code') not in cha_row:
- cha_row.append((res_num, residue, 'ins_code'))
-
- atom_site.setValue('?', "pdbx_PDB_ins_code", atom_row)
- if group_PDB != 'ATOM':
- if (res_num, residue, 'group_PDB') not in cha_row:
- cha_row.append((res_num, residue, 'group_PDB'))
- atom_site.setValue('ATOM', 'group_PDB', atom_row)
- ########################################################################
- ## Delete residues ##
- ########################################################################
- if res_num in del_pos:
- if (res_num, residue) not in rem_row:
- rem_row.append((res_num, residue))
-
- atom_site.removeRow(atom_row)
- ########################################################################
- ## Modify residues ##
- ########################################################################
- if res_num in mod_pos.keys():
-
- # Get the data
- type_symbol = atom_site.getValue('type_symbol', atom_row)
- label_atom_id = atom_site.getValue('label_atom_id', atom_row)
- auth_atom_id = atom_site.getValue('auth_atom_id', atom_row)
- if mod_pos[res_num] == 1: # try to convert MSE to M
-
- atom_site.setValue('MET', 'label_comp_id', atom_row)
-
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if type_symbol == 'SE':
- atom_site.setValue('S', 'type_symbol', atom_row)
- if label_atom_id == 'SE':
- atom_site.setValue('S', 'label_atom_id', atom_row)
- if auth_atom_id == 'SE':
- atom_site.setValue('S', 'auth_atom_id', atom_row)
- if (res_num, residue, 'MSE -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'MSE -> MET'))
- elif mod_pos[res_num] == 2: # try to convert HYP to PRO
- # apparently it is enough to rename the label_comp_id to PRO to get
- # MODELLER working with Hydroxyprolines (HYP)
- atom_site.setValue('PRO', 'label_comp_id', atom_row)
-
- try:
- atom_site.setValue('PRO', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'HYP -> PRO') not in mod_row:
- mod_row.append((res_num, residue, 'HYP -> PRO'))
- elif mod_pos[res_num] == 3: # try to convert MLY to LYS
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
-
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MLY -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'MLY -> LYS'))
- elif mod_pos[res_num] == 4: # converts Phosphoserine to Serine
- atom_site.setValue('SER', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('SER', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SEP -> SER') not in mod_row:
- mod_row.append((res_num, residue, 'SEP -> SER'))
- elif mod_pos[res_num] == 5: # converts Phosphothreonine to Threonine
- atom_site.setValue('THR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('THR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'TPO -> THR') not in mod_row:
- mod_row.append((res_num, residue, 'TPO -> THR'))
- elif mod_pos[res_num] == 6: # converts S-HYDROXYCYSTEINE to Cysteine
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CSO -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CSO -> CYS'))
- elif mod_pos[res_num] == 7: # converts O-PHOSPHOTYROSINE to Tyrosine
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'PTR -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'PTR -> TYR'))
- elif mod_pos[res_num] == 8: # converts LYSINE NZ-CARBOXYLIC ACID to Lysine
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'KCX -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'KCX -> LYS'))
- elif mod_pos[res_num] == 9: # converts S,S-(2-HYDROXYETHYL)THIOCYSTEINE to Cysteine
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CME -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CME -> CYS'))
- elif mod_pos[res_num] == 10: # converts 3-SULFINOALANINE to Alanine
- atom_site.setValue('ALA', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ALA', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CSD -> ALA') not in mod_row:
- mod_row.append((res_num, residue, 'CSD -> ALA'))
- elif mod_pos[res_num] == 11: # converts S-(DIMETHYLARSENIC)CYSTEINE to Cysteine
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CAS -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CAS -> CYS'))
- elif mod_pos[res_num] == 12: # converts N-METHYLLEUCINE (MLE) to Leucine
- atom_site.setValue('LEU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LEU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MLE -> LEU') not in mod_row:
- mod_row.append((res_num, residue, 'MLE -> LEU'))
- elif mod_pos[res_num] == 13: # converts D-ALANINE (DAL) to ALA
- atom_site.setValue('ALA', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ALA', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DAL -> ALA') not in mod_row:
- mod_row.append((res_num, residue, 'DAL -> ALA'))
- elif mod_pos[res_num] == 14: # converts GAMMA-CARBOXY-GLUTAMIC ACID (CGU) to GLU
- atom_site.setValue('GLU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('GLU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CGU -> GLU') not in mod_row:
- mod_row.append((res_num, residue, 'CGU -> GLU'))
- elif mod_pos[res_num] == 15: # converts D-LEUCINE (DLE) to LEU
- atom_site.setValue('LEU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LEU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DLE -> LEU') not in mod_row:
- mod_row.append((res_num, residue, 'DLE -> LEU'))
- elif mod_pos[res_num] == 16: # converts N-FORMYLMETHIONINE (FME) to MET
- atom_site.setValue('MET', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'FME -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'FME -> MET'))
- elif mod_pos[res_num] == 17: # converts D-VAL (DVA) to VAL
- atom_site.setValue('VAL', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('VAL', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DVA -> VAL') not in mod_row:
- mod_row.append((res_num, residue, 'DVA -> VAL'))
- elif mod_pos[res_num] == 18: # converts CYSTEINESULFONIC ACID (OCS) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'OCS -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'OCS -> CYS'))
- elif mod_pos[res_num] == 19: # converts D-PROLINE (DPR) to PRO
- atom_site.setValue('PRO', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('PRO', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DPR -> PRO') not in mod_row:
- mod_row.append((res_num, residue, 'DPR -> PRO'))
- elif mod_pos[res_num] == 20: # converts N-METHYLVALINE (MVA) to VAL
- atom_site.setValue('VAL', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('VAL', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MVA -> VAL') not in mod_row:
- mod_row.append((res_num, residue, 'MVA -> VAL'))
- elif mod_pos[res_num] == 21: # converts O-SULFO-L-TYROSINE (TYS) to VAL
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'TYS -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'TYS -> TYR'))
- elif mod_pos[res_num] == 22: # converts N-TRIMETHYLLYSINE (M3L) to LYS
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'M3L -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'M3L -> LYS'))
- elif mod_pos[res_num] == 23: # converts S-METHYLCYSTEINE (SMC) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SMC -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'SMC -> CYS'))
- elif mod_pos[res_num] == 24: # converts N(6)-ACETYLLYSINE (ALY) to LYS
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'ALY -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'ALY -> LYS'))
- elif mod_pos[res_num] == 25: # converts S-OXY CYSTEINE (CSX) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CSX -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CSX -> CYS'))
- elif mod_pos[res_num] == 26: # converts D-CYSTEINE (DCY) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DCY -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'DCY -> CYS'))
- elif mod_pos[res_num] == 27: # converts NORLEUCINE (NLE) to LEU
- atom_site.setValue('LEU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LEU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'NLE -> LEU') not in mod_row:
- mod_row.append((res_num, residue, 'NLE -> LEU'))
- elif mod_pos[res_num] == 28: # converts D-GLUTAMIC ACID (DGL) to GLU
- atom_site.setValue('GLU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('GLU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DGL -> GLU') not in mod_row:
- mod_row.append((res_num, residue, 'DGL -> GLU'))
- elif mod_pos[res_num] == 29: # converts D-SERINE (DSN) to SER
- atom_site.setValue('SER', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('SER', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DSN -> SER') not in mod_row:
- mod_row.append((res_num, residue, 'DSN -> SER'))
- elif mod_pos[res_num] == 30: # converts S-MERCAPTOCYSTEINE (CSS) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CSS -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CSS -> CYS'))
- elif mod_pos[res_num] == 31: # converts D-LYSINE (DLY) to LYS
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DLY -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'DLY -> LYS'))
- elif mod_pos[res_num] == 32: # converts N-METHYL-LYSINE (MLZ) to LYS
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MLZ -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'MLZ -> LYS'))
- elif mod_pos[res_num] == 33: # converts D-PHENYLALANINE (DPN) to PHE
- atom_site.setValue('PHE', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('PHE', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DPN -> PHE') not in mod_row:
- mod_row.append((res_num, residue, 'DPN -> PHE'))
- elif mod_pos[res_num] == 34: # converts D-ARGININE (DAR) to ARG
- atom_site.setValue('ARG', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ARG', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DAR -> ARG') not in mod_row:
- mod_row.append((res_num, residue, 'DAR -> ARG'))
- elif mod_pos[res_num] == 35: # converts IODO-PHENYLALANINE (PHI) to PHE
- atom_site.setValue('PHE', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('PHE', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'PHI -> PHE') not in mod_row:
- mod_row.append((res_num, residue, 'PHI -> PHE'))
- elif mod_pos[res_num] == 36: # converts BETA-L-ASPARTIC ACID (IAS) to ASP
- atom_site.setValue('ASP', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ASP', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'IAS -> ASP') not in mod_row:
- mod_row.append((res_num, residue, 'IAS -> ASP'))
- elif mod_pos[res_num] == 37: # converts D-ASPARTIC ACID (DAS) to ASP
- atom_site.setValue('ASP', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ASP', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DAS -> ASP') not in mod_row:
- mod_row.append((res_num, residue, 'DAS -> ASP'))
- elif mod_pos[res_num] == 38: # converts 4-METHYL-HISTIDINE (HIC) to HIS
- atom_site.setValue('HIS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('HIS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'HIC -> HIS') not in mod_row:
- mod_row.append((res_num, residue, 'HIC -> HIS'))
- elif mod_pos[res_num] == 39: # converts (4R)-4-methyl-L-proline (MP8) to PRO
- atom_site.setValue('PRO', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('PRO', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MP8 -> PRO') not in mod_row:
- mod_row.append((res_num, residue, 'MP8 -> PRO'))
- elif mod_pos[res_num] == 40: # converts D-THREONINE (DTH) to THR
- atom_site.setValue('THR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('THR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DTH -> THR') not in mod_row:
- mod_row.append((res_num, residue, 'DTH -> THR'))
- elif mod_pos[res_num] == 41: # converts D-ISOLEUCINE (DIL) to ILE
- atom_site.setValue('ILE', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ILE', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DIL -> ILE') not in mod_row:
- mod_row.append((res_num, residue, 'DIL -> ILE'))
- elif mod_pos[res_num] == 42: # converts N-METHYL ASPARAGINE (MEN) to ASN
- atom_site.setValue('ASN', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ASN', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MEN -> ASN') not in mod_row:
- mod_row.append((res_num, residue, 'MEN -> ASN'))
- elif mod_pos[res_num] == 43: # converts D-TYROSINE (DTY) to TYR
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DTY -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'DTY -> TYR'))
- elif mod_pos[res_num] == 44: # converts N-CARBOXYMETHIONINE (CXM) to MET
- atom_site.setValue('MET', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CXM -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'CXM -> MET'))
- elif mod_pos[res_num] == 45: # converts D-GLUTAMINE (DGN) to MET
- atom_site.setValue('GLN', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('GLN', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DGN -> GLN') not in mod_row:
- mod_row.append((res_num, residue, 'DGN -> GLN'))
- elif mod_pos[res_num] == 46: # converts D-TRYPTOPHAN (DTR) to TRP
- atom_site.setValue('TRP', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TRP', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DTR -> TRP') not in mod_row:
- mod_row.append((res_num, residue, 'DTR -> TRP'))
- elif mod_pos[res_num] == 47: # converts N-ACETYL-SERINE (SAC) to SER
- atom_site.setValue('SER', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('SER', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SAC -> SER') not in mod_row:
- mod_row.append((res_num, residue, 'SAC -> SER'))
- elif mod_pos[res_num] == 48: # converts D-ASPARAGINE (DSG) to ASN
- atom_site.setValue('ASN', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ASN', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DSG -> ASN') not in mod_row:
- mod_row.append((res_num, residue, 'DSG -> ASN'))
- elif mod_pos[res_num] == 49: # converts N-METHYL METHIONINE (MME) to MET
- atom_site.setValue('MET', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MME -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'MME -> MET'))
- elif mod_pos[res_num] == 50: # converts N-methyl-L-alanine (MAA) to ALA
- atom_site.setValue('ALA', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('ALA', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MAA -> ALA') not in mod_row:
- mod_row.append((res_num, residue, 'MAA -> ALA'))
- elif mod_pos[res_num] == 51: # converts 3-FLUOROTYROSINE (YOF) to TYR
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'YOF -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'YOF -> TYR'))
- elif mod_pos[res_num] == 52: # converts (4R)-4-fluoro-L-proline (FP9) to PRO
- atom_site.setValue('PRO', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('PRO', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'FP9 -> PRO') not in mod_row:
- mod_row.append((res_num, residue, 'FP9 -> PRO'))
- elif mod_pos[res_num] == 53: # converts N-formyl-L-valine (FVA) to VAL
- atom_site.setValue('VAL', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('VAL', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'FVA -> VAL') not in mod_row:
- mod_row.append((res_num, residue, 'FVA -> VAL'))
- elif mod_pos[res_num] == 54: # converts N-methyl-D-leucine (MLU) to LEU
- atom_site.setValue('LEU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LEU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MLU -> LEU') not in mod_row:
- mod_row.append((res_num, residue, 'MLU -> LEU'))
- elif mod_pos[res_num] == 55: # converts (betaR)-3-chloro-beta-hydroxy-L-tyrosine (OMY) to TYR
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'OMY -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'OMY -> TYR'))
- elif mod_pos[res_num] == 56: # converts GAMMA-D-GLUTAMIC ACID (FGA) to GLU
- atom_site.setValue('GLU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('GLU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'FGA -> GLU') not in mod_row:
- mod_row.append((res_num, residue, 'FGA -> GLU'))
- elif mod_pos[res_num] == 57: # converts N-METHYLPHENYLALANINE (MEA) to PHE
- atom_site.setValue('PHE', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('PHE', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MEA -> PHE') not in mod_row:
- mod_row.append((res_num, residue, 'MEA -> PHE'))
- elif mod_pos[res_num] == 58: # converts S-(METHYLMERCURY)-L-CYSTEINE (CMH) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CMH -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CMH -> CYS'))
- elif mod_pos[res_num] == 59: # converts D-HISTIDINE (DHI) to HIS
- atom_site.setValue('HIS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('HIS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'DHI -> HIS') not in mod_row:
- mod_row.append((res_num, residue, 'DHI -> HIS'))
- elif mod_pos[res_num] == 60: # converts SELENOCYSTEINE (SEC) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SEC -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'SEC -> CYS'))
- elif mod_pos[res_num] == 61: # converts (betaR)-3-CHLORO-BETA-HYDROXY-D-TYROSINE (OMZ) to TYR
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'OMZ -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'OMZ -> TYR'))
- elif mod_pos[res_num] == 62: # converts S-ACETYL-CYSTEINE (SCY) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SCY -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'SCY -> CYS'))
- elif mod_pos[res_num] == 63: # converts S-OXYMETHIONINE (MHO) to MET
- atom_site.setValue('MET', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MHO -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'MHO -> MET'))
- elif mod_pos[res_num] == 64: # converts D-METHIONINE (MED) to MET
- atom_site.setValue('MET', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MED -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'MED -> MET'))
- elif mod_pos[res_num] == 65: # converts S-DIMETHYLARSINOYL-CYSTEINE (CAF) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'CAF -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'CAF -> CYS'))
- elif mod_pos[res_num] == 66: # converts META-NITRO-TYROSINE (NIY) to TYR
- atom_site.setValue('TYR', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('TYR', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'NIY -> TYR') not in mod_row:
- mod_row.append((res_num, residue, 'NIY -> TYR'))
- elif mod_pos[res_num] == 67: # converts O-ACETYLSERINE (OAS) to SER
- atom_site.setValue('SER', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('SER', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'OAS -> SER') not in mod_row:
- mod_row.append((res_num, residue, 'OAS -> SER'))
- elif mod_pos[res_num] == 68: # converts S-METHYL-THIO-CYSTEINE (SCH) to CYS
- atom_site.setValue('CYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('CYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SCH -> CYS') not in mod_row:
- mod_row.append((res_num, residue, 'SCH -> CYS'))
- elif mod_pos[res_num] == 69: # converts 2-methyl-L-norleucine (MK8) to LEU
- atom_site.setValue('LEU', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LEU', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'MK8 -> LEU') not in mod_row:
- mod_row.append((res_num, residue, 'MK8 -> LEU'))
- elif mod_pos[res_num] == 70: # converts METHIONINE SULFOXIDE (SME) to MET
- atom_site.setValue('MET', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('MET', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'SME -> MET') not in mod_row:
- mod_row.append((res_num, residue, 'SME -> MET'))
- elif mod_pos[res_num] == 71: # converts 5-HYDROXYLYSINE (LYZ) to LYS
- atom_site.setValue('LYS', 'label_comp_id', atom_row)
- try:
- atom_site.setValue('LYS', 'auth_comp_id', atom_row)
- except IndexError:
- pass
- if (res_num, residue, 'LYZ -> LYS') not in mod_row:
- mod_row.append((res_num, residue, 'LYZ -> LYS'))
- ########################################################################
- ## Notify user about modification made to cif data ##
- ########################################################################
- if DEBUG_MODE:
- mod_model_num = len([ msg for msg in cha_row if msg[2] == 'model_num' ])
- mod_ins_code = len([ msg for msg in cha_row if msg[2] == 'ins_code' ])
- mod_group_PDB = len([ msg for msg in cha_row if msg[2] == 'group_PDB' ])
-
- if mod_model_num != 0:
- print ('! {p} {c}: modified atom_site.pdbx_PDB_model_num for {cr} residues to 1.'.format(
- p = pdb_code,
- c = chain,
- cr = mod_model_num))
- if mod_ins_code != 0:
- print ('! {p} {c}: modified atom_site.pdbx_PDB_ins_code for {cr} residues to "?".'.format(
- p = pdb_code,
- c = chain,
- cr = mod_ins_code))
- if mod_group_PDB != 0:
- print ('! {p} {c}: modified atom_site.group_PDB for {cr} residues to "ATOM".'.format(
- p = pdb_code,
- c = chain,
- cr = mod_group_PDB))
- for residue in reversed(mod_row):
- print ('! {p} {c}: modified cif pos {cr} ({nr}).'.format(
- p = pdb_code,
- c = chain,
- cr = residue[0],
- ca = residue[1],
- nr = residue[2]))
- for residue in reversed(rem_row):
- print ('! {p} {c}: removed cif pos {cr} ({ca})'.format(
- p = pdb_code,
- c = chain,
- cr = residue[0],
- ca = residue[1]))
- cif_edits[pdb_code] = block
- # write modified pir to disk
- for pdb_code in cif_edits:
- out = open(os.path.join(output_path, pdb_code + '.cif'), 'w')
- writer = PdbxWriter(out)
- writer.writeContainer(cif_edits[pdb_code])
- # Delete missing entries from the last template sequence to the first
- for row in reversed(del_row):
- template_grid.del_row(row)
- return template_grid
- def remove_self_alignment(template_grid, query_name):
- """ Removes a self alignment from the final pir alignment to prevent clashes with MODELLER """
-
- to_delete = list()
-
- for row in range(template_grid.get_grid_height()):
- if template_grid._pdb_code[row] == query_name:
- to_delete.append(row)
- for row in reversed(to_delete):
- template_grid.del_row(row)
- return True
- def write_to_file(line_list, fname):
- """ Writes the final pir file """
-
- with open(fname, 'w+') as fout:
- for line in line_list:
- fout.write(line + "\n")
- def arg():
- import argparse
- description = """Creates a MODELLER alignment (*.pir) from a HHSearch results file (*.hhr)."""
- epilog= '2016 Harald Voehringer.'
- # Initiate a ArgumentParser Class
- parser = argparse.ArgumentParser(description = description, epilog = epilog)
-
- # Call add_options to the parser
- parser.add_argument('input', help = 'results file from HHsearch with hit list and alignment', metavar = 'FILE')
- parser.add_argument('cifs', help = 'path to the folder containing cif files', metavar = 'DIR')
- parser.add_argument('pir', help = 'output file (PIR-formatted multiple alignment)', metavar = 'FILE')
- parser.add_argument('output', help = 'path to the folder where modified cif files should be written to', metavar = 'DIR')
- parser.add_argument('-v', '--verbose', action = 'store_true', help = 'verbose mode')
- parser.add_argument('-m', nargs = '+', help = 'pick hits with specified indices (e.g. -m 2 5)', metavar = 'INT')
- parser.add_argument('-e', type = float, help = 'maximum E-Value threshold (e.g. -e 0.001)', metavar = 'FLOAT')
- parser.add_argument('-r', type = float, help = 'residue ratio (filter alignments that have contribute at least residues according to the specified ratio).',
- default = 0, metavar = 'FLOAT')
- parser.add_argument('-c', help = 'convert non-canonical residues (default = True)', action = 'store_true', default = True)
-
- return parser
- def main():
- import sys
- parser = arg()
- args = parser.parse_args(sys.argv[1:])
-
- global DEBUG_MODE
-
- if args.verbose:
- DEBUG_MODE = True
- query_name, query_chain = get_query_name(args.input)
- data = read_result(args.input)
- selected_templates = list()
- if args.m and not args.e:
- selection = map(lambda x: int(x), args.m)
- print ('Selected templates {st}.'.format(st = ', '.join(args.m)))
-
- for i in selection:
- tmp_info = str(data[i - 1].template_info.split('>')[1])
- print ('{i}: {t}'.format(
- i = i,
- t = tmp_info[0:80]))
- selected_templates.append(data[i - 1])
- elif args.e and not args.m:
- print ('Selected templates satisfying E-val <= {e}'.format(e = args.e))
-
- e_values = { float(j.evalue):i for i, j in enumerate(data) }
- selection = sorted([ val for key, val in e_values.items() if key <= args.e ])
- for i in selection:
- tmp_info = str(data[i - 1].template_info.split('>')[1])
- print ('{i}: {t}'.format(
- i = i + 1,
- t = tmp_info[0:80]))
- selected_templates.append(data[i - 1])
- elif args.m and args.e:
- print ('! Please do not use option -m and -e at the same time ! Exiting.')
- sys.exit()
- else:
- selected_templates = data
-
- print ('Creating pir file using all templates ({n})'.format(
- n = len(selected_templates)))
- query_grid = create_query_grid(selected_templates) # load query grid
- print ('query_grid')
- print(query_grid)
- gapless_query_grid = create_gapless_grid(query_grid) # remove gaps
- print ('gapless_query_grid')
- print(gapless_query_grid)
- processed_query_grid = process_query_grid(query_grid, gapless_query_grid) # insert gaps
- ##processed_query_grid = process_query_grid(query_grid, query_grid) # insert gaps
- print ('processed_query_grid')
- print (processed_query_grid)
- glob_seq = derive_global_seq(processed_query_grid, query_name, query_chain) # derive query sequence
- template_grid = create_template_grid(selected_templates) # create template grid
- print ('template_grid')
- print (template_grid)
- processed_template_grid = process_template_grid(query_grid, template_grid) # insert gaps to template sequnces
- print ('processed_query_grid')
- print (processed_query_grid)
- print ('hzhu processed_template_grid')
- print (processed_template_grid)
- final_grid = compare_with_cifs(processed_template_grid, args.cifs, args.output, args.c, args.r) # compare with atom section of cifs
- remove_self_alignment(final_grid, query_name) # remove self alignment if any
- write_to_file([glob_seq, final_grid.display()], args.pir)
- if __name__ == "__main__":
- main()
|