|
6 | 6 |
|
7 | 7 | from .omop_xref import xref_to_omop_standard_concept, omop_map_to_standard, omop_map_from_standard, \ |
8 | 8 | xref_from_omop_standard_concept, xref_from_omop_local, xref_to_omop_local |
9 | | -from .cohd_utilities import ln_ratio_ci, rel_freq_ci, log_odds |
| 9 | +from .cohd_utilities import ln_ratio_ci, rel_freq_ci, log_odds, clip |
10 | 10 | from .app import cache |
11 | 11 |
|
12 | 12 | # Configuration |
@@ -1206,7 +1206,7 @@ def query_association(method, concept_id_1, concept_id_2=None, dataset_id=None, |
1206 | 1206 |
|
1207 | 1207 |
|
1208 | 1208 | @cache.memoize(timeout=86400) |
1209 | | -def query_trapi(concept_id_1, concept_id_2=None, dataset_id=None, domain_id=None, concept_class_id=None, |
| 1209 | +def query_trapi_old(concept_id_1, concept_id_2=None, dataset_id=None, domain_id=None, concept_class_id=None, |
1210 | 1210 | ln_ratio_sign=0, confidence=DEFAULT_CONFIDENCE): |
1211 | 1211 | """ Query for TRAPI. Performs the calculations for all association methods |
1212 | 1212 |
|
@@ -1425,6 +1425,281 @@ def query_trapi(concept_id_1, concept_id_2=None, dataset_id=None, domain_id=None |
1425 | 1425 | return json_return |
1426 | 1426 |
|
1427 | 1427 |
|
| 1428 | +def get_total_pair_counts(dataset_id: int) -> int: |
| 1429 | + """ Returns total number of pairs of concepts in the given dataset |
| 1430 | +
|
| 1431 | + Params |
| 1432 | + ------ |
| 1433 | + dataset_id: (int) dataset id |
| 1434 | +
|
| 1435 | + Returns |
| 1436 | + ------- |
| 1437 | + (int) total number of pairs of concepts |
| 1438 | + """ |
| 1439 | + if not get_total_pair_counts.total_pair_counts: |
| 1440 | + # Get the total pair counts and store locally for faster future retrieval |
| 1441 | + get_total_pair_counts.total_pair_counts = dict() |
| 1442 | + conn = sql_connection() |
| 1443 | + cur = conn.cursor() |
| 1444 | + sql = '''SELECT dataset_id, SUM(count) AS pair_count |
| 1445 | + FROM domain_pair_concept_counts |
| 1446 | + GROUP BY dataset_id;''' |
| 1447 | + cur.execute(sql) |
| 1448 | + results = cur.fetchall() |
| 1449 | + for result in results: |
| 1450 | + get_total_pair_counts.total_pair_counts[result['dataset_id']] = int(result['pair_count']) |
| 1451 | + |
| 1452 | + return get_total_pair_counts.total_pair_counts[dataset_id] |
| 1453 | +get_total_pair_counts.total_pair_counts = None |
| 1454 | + |
| 1455 | + |
| 1456 | +@cache.memoize(timeout=86400) |
| 1457 | +def query_trapi(concept_id_1, concept_id_2=None, dataset_id=None, domain_id=None, concept_class_id=None, |
| 1458 | + ln_ratio_sign=0, confidence=DEFAULT_CONFIDENCE): |
| 1459 | + """ Query for TRAPI. Performs the calculations for all association methods |
| 1460 | +
|
| 1461 | + Parameters |
| 1462 | + ---------- |
| 1463 | + concept_id_1: String - OMOP concept ID |
| 1464 | + concept_id_2: (optional) String - OMOP concept ID |
| 1465 | + dataset_id: (optional) String - COHD dataset ID |
| 1466 | + domain_id: (optional) String - OMOP domain ID |
| 1467 | + concept_class_id: (optional) String - OMOP concept class ID |
| 1468 | + ln_ratio_sign: (optional) Int - 1: positive ln_ratio only; -1: negative ln_ratio only; 0: any ln_ratio |
| 1469 | + confidence: (optional) Float - Confidence level |
| 1470 | +
|
| 1471 | + Returns |
| 1472 | + ------- |
| 1473 | + Dict results |
| 1474 | + """ |
| 1475 | + assert concept_id_1 is not None and str(concept_id_1), \ |
| 1476 | + 'query_cohd_mysql.py::query_trapi() - Bad input. concept_id_1={concept_id_1}'.format( |
| 1477 | + concept_id_1=str(concept_id_1) |
| 1478 | + ) |
| 1479 | + |
| 1480 | + # Connect to MYSQL database |
| 1481 | + conn = sql_connection() |
| 1482 | + cur = conn.cursor() |
| 1483 | + |
| 1484 | + # Get the total number of pairs for Bonferonni adjustment |
| 1485 | + pair_count = get_total_pair_counts(dataset_id) |
| 1486 | + |
| 1487 | + # Filter ln ratio |
| 1488 | + if ln_ratio_sign == 0: |
| 1489 | + ln_ratio_filter = '' |
| 1490 | + elif ln_ratio_sign > 0: |
| 1491 | + ln_ratio_filter = 'AND log(cp.concept_count * pc.count / (c1.concept_count * c2.concept_count + 0E0)) > 0' |
| 1492 | + elif ln_ratio_sign < 0: |
| 1493 | + ln_ratio_filter = 'AND log(cp.concept_count * pc.count / (c1.concept_count * c2.concept_count + 0E0)) < 0' |
| 1494 | + |
| 1495 | + if concept_id_2 is not None: |
| 1496 | + # concept_id_2 is specified, only return the results for the pair (concept_id_1, concept_id_2) |
| 1497 | + concept_id_2 = int(concept_id_2) |
| 1498 | + order = concept_id_1 < concept_id_2 |
| 1499 | + sql = '''SELECT |
| 1500 | + cp.dataset_id, |
| 1501 | + cp.concept_id_1 AS {rename_id_1}, |
| 1502 | + cp.concept_id_2 AS {rename_id_2}, |
| 1503 | + c1.concept_count AS {rename_count_1}, |
| 1504 | + c2.concept_count AS {rename_count_2}, |
| 1505 | + cp.concept_count AS concept_pair_count, |
| 1506 | + c1.concept_count * c2.concept_count / (pc.count + 0E0) AS expected_count, |
| 1507 | + p_value, |
| 1508 | + ln_ratio, |
| 1509 | + ln_ratio_ci_low, |
| 1510 | + ln_ratio_ci_high, |
| 1511 | + cp.concept_count / (c1.concept_count + 0E0) AS {rename_rf1}, |
| 1512 | + cp.pair_count_ci_lo / (c1.ci_hi + 0E0) AS {rename_rf1_ci_lo}, |
| 1513 | + cp.pair_count_ci_hi / (c1.ci_lo + 0E0) AS {rename_rf1_ci_hi}, |
| 1514 | + cp.concept_count / (c2.concept_count + 0E0) AS {rename_rf2}, |
| 1515 | + cp.pair_count_ci_lo / (c2.ci_hi + 0E0) AS {rename_rf2_ci_lo}, |
| 1516 | + cp.pair_count_ci_hi / (c2.ci_lo + 0E0) AS {rename_rf2_ci_hi}, |
| 1517 | + log_odds, |
| 1518 | + log_odds_ci_lo, |
| 1519 | + log_odds_ci_hi, |
| 1520 | + pc.count AS patient_count |
| 1521 | + FROM cohd.concept_pair_counts cp |
| 1522 | + JOIN cohd.concept_counts c1 ON cp.concept_id_1 = c1.concept_id |
| 1523 | + JOIN cohd.concept_counts c2 ON cp.concept_id_2 = c2.concept_id |
| 1524 | + JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id |
| 1525 | + WHERE cp.dataset_id = %(dataset_id)s |
| 1526 | + AND c1.dataset_id = %(dataset_id)s |
| 1527 | + AND c2.dataset_id = %(dataset_id)s |
| 1528 | + AND cp.concept_id_1 = %(concept_id_1)s |
| 1529 | + AND cp.concept_id_2 = %(concept_id_2)s |
| 1530 | + {ln_ratio_filter} |
| 1531 | + ;''' |
| 1532 | + params = { |
| 1533 | + 'dataset_id': dataset_id, |
| 1534 | + 'concept_id_1': concept_id_1 if order else concept_id_2, |
| 1535 | + 'concept_id_2': concept_id_2 if order else concept_id_1 |
| 1536 | + } |
| 1537 | + rename_id_1 = 'concept_id_1' |
| 1538 | + rename_id_2 = 'concept_id_2' |
| 1539 | + rename_count_1 = 'concept_1_count' |
| 1540 | + rename_count_2 = 'concept_2_count' |
| 1541 | + rename_rf1 = 'relative_frequency_1' |
| 1542 | + rename_rf2 = 'relative_frequency_2' |
| 1543 | + rename_rf1_ci_lo = 'rf1_ci_lo' |
| 1544 | + rename_rf1_ci_hi = 'rf1_ci_hi' |
| 1545 | + rename_rf2_ci_lo = 'rf2_ci_lo' |
| 1546 | + rename_rf2_ci_hi = 'rf2_ci_hi' |
| 1547 | + |
| 1548 | + if not order: |
| 1549 | + rename_id_1 = 'concept_id_2' |
| 1550 | + rename_id_2 = 'concept_id_1' |
| 1551 | + rename_count_1 = 'concept_2_count' |
| 1552 | + rename_count_2 = 'concept_1_count' |
| 1553 | + rename_rf1 = 'relative_frequency_2' |
| 1554 | + rename_rf2 = 'relative_frequency_1' |
| 1555 | + rename_rf1_ci_lo = 'relative_frequency_2_ci_lo' |
| 1556 | + rename_rf1_ci_hi = 'relative_frequency_2_ci_hi' |
| 1557 | + rename_rf2_ci_lo = 'relative_frequency_1_ci_lo' |
| 1558 | + rename_rf2_ci_hi = 'relative_frequency_1_ci_hi' |
| 1559 | + sql = sql.format(rename_id_1=rename_id_1, rename_id_2=rename_id_2, rename_count_1=rename_count_1, |
| 1560 | + rename_count_2=rename_count_2, rename_rf1=rename_rf1, rename_rf2=rename_rf2, |
| 1561 | + ln_ratio_filter=ln_ratio_filter, |
| 1562 | + rename_rf1_ci_lo=rename_rf1_ci_lo, rename_rf1_ci_hi=rename_rf1_ci_hi, |
| 1563 | + rename_rf2_ci_lo=rename_rf2_ci_lo, rename_rf2_ci_hi=rename_rf2_ci_hi) |
| 1564 | + |
| 1565 | + else: |
| 1566 | + # If concept_id_2 is not specified, get results for all pairs that include concept_id_1 |
| 1567 | + sql = '''SELECT * |
| 1568 | + FROM |
| 1569 | + ((SELECT |
| 1570 | + cp.dataset_id, |
| 1571 | + cp.concept_id_1, |
| 1572 | + cp.concept_id_2, |
| 1573 | + c1.concept_count AS concept_1_count, |
| 1574 | + c2.concept_count AS concept_2_count, |
| 1575 | + cp.concept_count AS concept_pair_count, |
| 1576 | + c1.concept_count * c2.concept_count / (pc.count + 0E0) AS expected_count, |
| 1577 | + p_value, |
| 1578 | + ln_ratio, |
| 1579 | + ln_ratio_ci_lo, |
| 1580 | + ln_ratio_ci_hi, |
| 1581 | + cp.concept_count / (c1.concept_count + 0E0) AS relative_frequency_1, |
| 1582 | + cp.pair_count_ci_lo / (c1.ci_hi + 0E0) AS rf1_ci_lo, |
| 1583 | + cp.pair_count_ci_hi / (c1.ci_lo + 0E0) AS rf1_ci_hi, |
| 1584 | + cp.concept_count / (c2.concept_count + 0E0) AS relative_frequency_2, |
| 1585 | + cp.pair_count_ci_lo / (c2.ci_hi + 0E0) AS rf2_ci_lo, |
| 1586 | + cp.pair_count_ci_hi / (c2.ci_lo + 0E0) AS rf2_ci_hi, |
| 1587 | + log_odds, |
| 1588 | + log_odds_ci_lo, |
| 1589 | + log_odds_ci_hi, |
| 1590 | + c.concept_name AS concept_2_name, |
| 1591 | + c.domain_id AS concept_2_domain, |
| 1592 | + c.concept_class_id AS concept_2_class_id, |
| 1593 | + pc.count AS patient_count |
| 1594 | + FROM cohd.concept_pair_counts cp |
| 1595 | + JOIN cohd.concept_counts c1 ON cp.concept_id_1 = c1.concept_id |
| 1596 | + JOIN cohd.concept_counts c2 ON cp.concept_id_2 = c2.concept_id |
| 1597 | + JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id |
| 1598 | + JOIN cohd.concept c ON cp.concept_id_2 = c.concept_id |
| 1599 | + WHERE cp.dataset_id = %(dataset_id)s |
| 1600 | + AND c1.dataset_id = %(dataset_id)s |
| 1601 | + AND c2.dataset_id = %(dataset_id)s |
| 1602 | + AND cp.concept_id_1 = %(concept_id_1)s |
| 1603 | + {domain_filter} |
| 1604 | + {concept_class_filter} |
| 1605 | + {ln_ratio_filter}) |
| 1606 | + UNION |
| 1607 | + (SELECT |
| 1608 | + cp.dataset_id, |
| 1609 | + cp.concept_id_2 AS concept_id_1, |
| 1610 | + cp.concept_id_1 AS concept_id_2, |
| 1611 | + c2.concept_count AS concept_1_count, |
| 1612 | + c1.concept_count AS concept_2_count, |
| 1613 | + cp.concept_count AS concept_pair_count, |
| 1614 | + c1.concept_count * c2.concept_count / (pc.count + 0E0) AS expected_count, |
| 1615 | + p_value, |
| 1616 | + ln_ratio, |
| 1617 | + ln_ratio_ci_lo, |
| 1618 | + ln_ratio_ci_hi, |
| 1619 | + cp.concept_count / (c2.concept_count + 0E0) AS relative_frequency_1, |
| 1620 | + cp.pair_count_ci_lo / (c2.ci_hi + 0E0) AS rf1_ci_lo, |
| 1621 | + cp.pair_count_ci_hi / (c2.ci_lo + 0E0) AS rf1_ci_hi, |
| 1622 | + cp.concept_count / (c1.concept_count + 0E0) AS relative_frequency_2, |
| 1623 | + cp.pair_count_ci_lo / (c1.ci_hi + 0E0) AS rf2_ci_lo, |
| 1624 | + cp.pair_count_ci_hi / (c1.ci_lo + 0E0) AS rf2_ci_hi, |
| 1625 | + log_odds, |
| 1626 | + log_odds_ci_lo, |
| 1627 | + log_odds_ci_hi, |
| 1628 | + c.concept_name AS concept_2_name, |
| 1629 | + c.domain_id AS concept_2_domain, |
| 1630 | + c.concept_class_id AS concept_2_class_id, |
| 1631 | + pc.count AS patient_count |
| 1632 | + FROM cohd.concept_pair_counts cp |
| 1633 | + JOIN cohd.concept_counts c1 ON cp.concept_id_1 = c1.concept_id |
| 1634 | + JOIN cohd.concept_counts c2 ON cp.concept_id_2 = c2.concept_id |
| 1635 | + JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id |
| 1636 | + JOIN cohd.concept c ON cp.concept_id_1 = c.concept_id |
| 1637 | + WHERE cp.dataset_id = %(dataset_id)s |
| 1638 | + AND c1.dataset_id = %(dataset_id)s |
| 1639 | + AND c2.dataset_id = %(dataset_id)s |
| 1640 | + AND cp.concept_id_2 = %(concept_id_1)s |
| 1641 | + {domain_filter} |
| 1642 | + {concept_class_filter} |
| 1643 | + {ln_ratio_filter})) x |
| 1644 | + ORDER BY ABS(ln_ratio) DESC;''' |
| 1645 | + params = { |
| 1646 | + 'dataset_id': dataset_id, |
| 1647 | + 'concept_id_1': concept_id_1, |
| 1648 | + } |
| 1649 | + |
| 1650 | + if domain_id is not None and not domain_id == ['']: |
| 1651 | + # Restrict the associated concept by domain |
| 1652 | + domain_filter = 'AND c.domain_id = %(domain_id)s' |
| 1653 | + params['domain_id'] = domain_id |
| 1654 | + else: |
| 1655 | + # Unrestricted domain |
| 1656 | + domain_filter = '' |
| 1657 | + |
| 1658 | + # Filter concepts by concept_class |
| 1659 | + if concept_class_id is None or not concept_class_id or concept_class_id == [''] or \ |
| 1660 | + concept_class_id.isspace(): |
| 1661 | + concept_class_filter = '' |
| 1662 | + else: |
| 1663 | + concept_class_filter = 'AND concept_class_id = %(concept_class_id)s' |
| 1664 | + params['concept_class_id'] = concept_class_id |
| 1665 | + |
| 1666 | + sql = sql.format(domain_filter=domain_filter, concept_class_filter=concept_class_filter, |
| 1667 | + ln_ratio_filter=ln_ratio_filter) |
| 1668 | + |
| 1669 | + cur.execute(sql, params) |
| 1670 | + json_return = cur.fetchall() |
| 1671 | + |
| 1672 | + # Perform calculations for results |
| 1673 | + for row in json_return: |
| 1674 | + cpc = row['concept_pair_count'] |
| 1675 | + c1 = row['concept_1_count'] |
| 1676 | + c2 = row['concept_2_count'] |
| 1677 | + |
| 1678 | + # Confidence interval for obsExpRatio |
| 1679 | + # The CI bounds may hit Inf, which causes issues with JSON serialization. Limit it to 999 |
| 1680 | + row['ln_ratio_ci'] = (clip(row['ln_ratio_ci_lo'], JSON_INFINITY_REPLACEMENT), |
| 1681 | + clip(row['ln_ratio_ci_hi'], JSON_INFINITY_REPLACEMENT)) |
| 1682 | + |
| 1683 | + # Confidence intervals for relative frequencies |
| 1684 | + row['relative_frequency_1_ci'] = row['rf1_ci_lo'], min(row['rf1_ci_hi'], JSON_INFINITY_REPLACEMENT) |
| 1685 | + row['relative_frequency_2_ci'] = row['rf2_ci_lo'], min(row['rf2_ci_hi'], JSON_INFINITY_REPLACEMENT) |
| 1686 | + |
| 1687 | + # Chi-square |
| 1688 | + p_value = row['p_value'] |
| 1689 | + row['chi_square_p-value'] = max(p_value, MIN_P) |
| 1690 | + row['chi_square_p-value_adjusted'] = max(min(p_value * pair_count, 1.0), MIN_P) # Bonferonni adjustment |
| 1691 | + |
| 1692 | + # Log-odds |
| 1693 | + row['log_odds_ci'] = [clip(row['log_odds_ci_lo'], JSON_INFINITY_REPLACEMENT), |
| 1694 | + clip(row['log_odds_ci_hi'], JSON_INFINITY_REPLACEMENT)] |
| 1695 | + |
| 1696 | + cur.close() |
| 1697 | + conn.close() |
| 1698 | + |
| 1699 | + json_return = {"results": json_return} |
| 1700 | + return json_return |
| 1701 | + |
| 1702 | + |
1428 | 1703 | def health(): |
1429 | 1704 | """ Quick health check of MySQL database |
1430 | 1705 | Simply queries the dataset table |
|
0 commit comments