Posts Generation of Biological Assembly and related information
Post
Cancel

Generation of Biological Assembly and related information

Get Info from mmCIF

1
2
3
4
5
6
7
8
9
assg_cols = ('_pdbx_struct_assembly_gen.asym_id_list',
       '_pdbx_struct_assembly_gen.oper_expression',
       '_pdbx_struct_assembly_gen.assembly_id')

oper_cols = ('_pdbx_struct_oper_list.id', 
             '_pdbx_struct_oper_list.symmetry_operation')

with Path("demo.cif").open('rt') as handle:
    mmcif_dict = MMCIF2DictPlus(handle, assg_cols+oper_cols)

MMCIF2DictPlus

Output

2Q4N

1
2
3
4
5
6
{"_pdbx_struct_assembly_gen.assembly_id": ["1"],
 "_pdbx_struct_assembly_gen.asym_id_list": ["A,B"],
 "_pdbx_struct_assembly_gen.oper_expression": ["1,2"],
 "_pdbx_struct_oper_list.id": ["1", "2"],
 "_pdbx_struct_oper_list.symmetry_operation": ["x,y,z", "-x,-y+2,z"],
 "data_": "2Q4N"}
Asymmetric unit of 2q4n Biological assembly 1 of 2q4n

3HL2

1
2
3
4
5
6
7
8
9
10
11
{"_pdbx_struct_assembly_gen.assembly_id": ["1", "1", "2", "2"],
 "_pdbx_struct_assembly_gen.asym_id_list": ["A,B,F,G,H,I,J,K,Q,R",
                                            "A,B,E,F,G,H,I,J,K,Q,R,U",
                                            "C,D,L,M,N,O,P,S,T",
                                            "C,D,E,L,M,N,O,P,S,T,U"],
 "_pdbx_struct_assembly_gen.oper_expression": ["1", "2", "3", "2"],
 "_pdbx_struct_oper_list.id": ["1", "2", "3"],
 "_pdbx_struct_oper_list.symmetry_operation": ["x,x-y-1,-z",
                                               "x,y,z",
                                               "-x+y,y,-z+1/3"],
 "data_": "3HL2"}
Asymmetric unit of 3hl2 Biological assembly 1 of 3hl2 Biological assembly 2 of 3hl2

1M11

1
2
3
4
5
6
7
8
{
    "data_": "1M11",
    "_pdbx_struct_assembly_gen.asym_id_list": ["A,B,C,D", "A,B,C,D", "A,B,C,D", "A,B,C,D", "A,B,C,D"],
    "_pdbx_struct_assembly_gen.assembly_id": ["1", "2", "3", "4", "5"],
    "_pdbx_struct_assembly_gen.oper_expression": ["(1-60)", "1", "(1-5)", "(1,2,6,10,23,24)", "P"],
    "_pdbx_struct_oper_list.id": ["P", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60"],
    "_pdbx_struct_oper_list.symmetry_operation": ["?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"]
}
Asymmetric unit of 1m11 Biological assembly 1 of 1m11 Biological assembly 2 of 1m11
Biological assembly 3 of 1m11 Biological assembly 4 of 1m11 Biological assembly 5 of 1m11

1M4X

1
2
3
4
5
6
7
8
{
    "data_": "1M4X",
    "_pdbx_struct_assembly_gen.asym_id_list": ["A,B,C", "A,B,C", "A,B,C", "A,B,C", "A,B,C", "A,B,C", "A,B,C"],
    "_pdbx_struct_assembly_gen.assembly_id": ["1", "2", "3", "4", "5", "6", "7"],
    "_pdbx_struct_assembly_gen.oper_expression": ["(1-60)(61-88)", "(61-88)", "(1-5)(61-88)", "(1,2,6,10,23,24)(61-88)", "(1-5)(63-68)", "(1,10,23)(61,62,69-88)", "(P)(61-88)"],
    "_pdbx_struct_oper_list.id": ["P", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88"],
    "_pdbx_struct_oper_list.symmetry_operation": ["?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"]
}

1M4X example from https://github.com/biojava/biojava/issues/801#issue-356603172

Asymmetric unit of 1m4x Biological assembly 1 of 1m4x Biological assembly 2 of 1m4x Biological assembly 3 of 1m4x
Biological assembly 4 of 1m4x Biological assembly 5 of 1m4x </td> Biological assembly 6 of 1m4x Biological assembly 7 of 1m4x

How molstar handle assembly info

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/*
 * Code from https://github.com/molstar/molstar/
 * src/mol-model-formats/structure/property/assembly.ts
 */

function parseOperatorList(value: string): string[][] {
    // '(X0)(1-5)' becomes [['X0'], ['1', '2', '3', '4', '5']]
    // kudos to Glen van Ginkel.

    const oeRegex = /\(?([^\(\)]+)\)?]*/g, groups: string[] = [], ret: string[][] = [];

    let g: any;
    while (g = oeRegex.exec(value)) groups[groups.length] = g[1];

    groups.forEach(g => {
        const group: string[] = [];
        g.split(',').forEach(e => {
            const dashIndex = e.indexOf('-');
            if (dashIndex > 0) {
                const from = parseInt(e.substring(0, dashIndex)), to = parseInt(e.substr(dashIndex + 1));
                for (let i = from; i <= to; i++) group[group.length] = i.toString();
            } else {
                group[group.length] = e.trim();
            }
        });
        ret[ret.length] = group;
    });

    return ret;
}

function expandOperators(operatorList: string[][]) {
    const ops: string[][] = [];
    const currentOp: string[] = [];
    for (let i = 0; i < operatorList.length; i++) currentOp[i] = '';
    expandOperators1(operatorList, ops, operatorList.length - 1, currentOp);
    return ops;
}

function expandOperators1(operatorNames: string[][], list: string[][], i: number, current: string[]) {
    if (i < 0) {
        list[list.length] = current.slice(0);
        return;
    }

    let ops = operatorNames[i], len = ops.length;
    for (let j = 0; j < len; j++) {
        current[i] = ops[j];
        expandOperators1(operatorNames, list, i - 1, current);
    }
}

//[In]
console.log(expandOperators(parseOperatorList("(X0)(1-4)")));
//[Out]
[ [ "X0", "1" ], [ "X0", "2" ], [ "X0", "3" ], [ "X0", "4" ] ] 
This post is licensed under CC BY 4.0 by the author.