3737from crowdgit .models .service_execution import ServiceExecution
3838from crowdgit .services .base .base_service import BaseService
3939from crowdgit .services .maintainer .bedrock import invoke_bedrock
40+ from crowdgit .services .maintainer .section_extractor import SectionExtractor
4041from crowdgit .services .utils import run_shell_command
4142from crowdgit .settings import MAINTAINER_RETRY_INTERVAL_DAYS , MAINTAINER_UPDATE_INTERVAL_HOURS
4243
@@ -93,6 +94,7 @@ class MaintainerService(BaseService):
9394 "code_owners" ,
9495 "emeritus" ,
9596 "workgroup" ,
97+ "readme" ,
9698 }
9799
98100 VALID_EXTENSIONS = {
@@ -132,6 +134,12 @@ class MaintainerService(BaseService):
132134 STEM_MATCH_SCORE = 50
133135 PARTIAL_STEM_SCORE = 25
134136
137+ # Files in KNOWN_PATHS that still need section filtering (contain non-governance content)
138+ SECTION_FILTERED_PATHS = {"readme.md" , "governance.md" }
139+ SCORING_KEYWORDS_SET = frozenset (SCORING_KEYWORDS )
140+
141+ _section_extractor = SectionExtractor ()
142+
135143 def make_role (self , title : str ):
136144 title = title .lower ()
137145 title = (
@@ -360,8 +368,10 @@ async def process_chunk(chunk_index: int, chunk: str):
360368 self .get_extraction_prompt (maintainer_filename , content ),
361369 pydantic_model = MaintainerInfo ,
362370 )
363- self .logger .info ("Maintainers file content analyzed by AI" )
364- self .logger .info (f"Maintainers response: { maintainer_info } " )
371+ info_count = len (maintainer_info .output .info ) if maintainer_info .output .info else 0
372+ self .logger .info (
373+ f"Maintainers file content analyzed by AI (found={ info_count } , cost={ maintainer_info .cost :.4f} )"
374+ )
365375 if maintainer_info .output .info is not None :
366376 return AggregatedMaintainerInfo (
367377 output = AggregatedMaintainerInfoItems (info = maintainer_info .output .info ),
@@ -373,7 +383,7 @@ async def process_chunk(chunk_index: int, chunk: str):
373383 )
374384 else :
375385 self .logger .error (
376- f"Expected a list of maintainer info or an error message, got: { str ( maintainer_info ) } "
386+ f"Expected a list of maintainer info or an error message, got error= { maintainer_info . output . error } "
377387 )
378388 raise MaintanerAnalysisError (
379389 error_message = "Unexpected response from AI for Maintainers analysis" ,
@@ -586,6 +596,16 @@ async def analyze_and_build_result(self, filename: str, content: str) -> Maintai
586596 f"Skipping README file '{ filename } ': no governance keyword found in content"
587597 )
588598 raise MaintanerAnalysisError (error_code = ErrorCode .NO_MAINTAINER_FOUND )
599+
600+ fname = os .path .basename (filename ).lower ()
601+ if fname not in self .KNOWN_PATHS or fname in self .SECTION_FILTERED_PATHS :
602+ extracted = self ._section_extractor .extract (fname , content , self .SCORING_KEYWORDS_SET )
603+ if extracted :
604+ self .logger .info (f"Using extracted sections for '{ filename } '" )
605+ content = extracted
606+ else :
607+ self .logger .debug (f"No sections extracted for '{ filename } ', using full content" )
608+
589609 result = await self .analyze_file_content (filename , content )
590610
591611 if not result .output .info :
@@ -664,12 +684,6 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
664684 root_candidates , subdir_candidates = await self .find_candidate_files (repo_path )
665685 all_candidates = root_candidates + subdir_candidates
666686 candidate_files = [(path , score ) for path , _ , score in all_candidates ][:100 ]
667- self .logger .debug (
668- f"Detection step 2: { len (root_candidates )} root candidate(s), "
669- f"{ len (subdir_candidates )} subdir candidate(s); "
670- f"root={ [p for p , _ , _ in root_candidates ]} , "
671- f"subdir_top={ [p for p , _ , _ in subdir_candidates [:3 ]]} "
672- )
673687
674688 # Step 3: Try root-level files first (in score order), then top subdirectory file
675689 failed_candidates : set [str ] = set ()
@@ -757,7 +771,6 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
757771 f"Passing { len (ai_input_files )} files to AI for maintainer file detection "
758772 f"(total repo files: { len (file_names )} )"
759773 )
760- self .logger .debug (f"AI input files: { [f for f , _ in ai_input_files ]} " )
761774 ai_file_name , ai_cost = await self .find_maintainer_file_with_ai (ai_input_files )
762775 ai_suggested_file = ai_file_name
763776 total_cost += ai_cost
0 commit comments