<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "https://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1" specific-use="sps-1.9" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
	<front>
		<journal-meta>
			<journal-id journal-id-type="publisher-id">rfing</journal-id>
			<journal-title-group>
				<journal-title>Revista Facultad de Ingeniería</journal-title>
				<abbrev-journal-title abbrev-type="publisher">Rev. Fac. ing.</abbrev-journal-title>
			</journal-title-group>
			<issn pub-type="ppub">0121-1129</issn>
			<publisher>
				<publisher-name>Universidad Pedagógica y Tecnológica de Colombia</publisher-name>
			</publisher>
		</journal-meta>
		<article-meta>
			<article-id pub-id-type="doi">10.19053/01211129.v34.n73.2025.20194</article-id>
			<article-id pub-id-type="other">5</article-id>
			<article-categories>
				<subj-group subj-group-type="heading">
					<subject>Articles</subject>
				</subj-group>
			</article-categories>
			<title-group>
				<article-title>EFFORT ESTIMATION IN SOFTWARE DEVELOPMENT PROJECTS USING SUPERVISED MACHINE LEARNING TECHNIQUES</article-title>
				<trans-title-group xml:lang="es">
					<trans-title>Estimación de esfuerzo en proyectos de desarrollo de software utilizando técnicas de aprendizaje automático</trans-title>
				</trans-title-group>
				<trans-title-group xml:lang="pt">
					<trans-title>ESTIMATIVA DE ESFORÇO EM PROJETOS DE DESENVOLVIMENTO DE SOFTWARE UTILIZANDO TÉCNICAS DE APRENDIZADO DE MÁQUINA</trans-title>
				</trans-title-group>
			</title-group>
			<contrib-group>
				<contrib contrib-type="author">
					<contrib-id contrib-id-type="orcid">0000-0002-4492-1915</contrib-id>
					<name>
						<surname>Getial-Barragán</surname>
						<given-names>Jesús</given-names>
					</name>
					<xref ref-type="aff" rid="aff1">1</xref>
				</contrib>
				<contrib contrib-type="author">
					<contrib-id contrib-id-type="orcid">0000-0002-0006-6654</contrib-id>
					<name>
						<surname>Timarán-Pereira</surname>
						<given-names>Ricardo</given-names>
					</name>
					<xref ref-type="aff" rid="aff2">2</xref>
				</contrib>
				<contrib contrib-type="author">
					<contrib-id contrib-id-type="orcid">0009-0009-4007-8420</contrib-id>
					<name>
						<surname>Bastidas-Torres</surname>
						<given-names>David-Ramiro</given-names>
					</name>
					<xref ref-type="aff" rid="aff3">3</xref>
				</contrib>
			</contrib-group>
			<aff id="aff1">
				<label>1</label>
				<institution content-type="original"> Universidad de Nariño (Pasto-Nariño, Colombia). jesusgetial@udenar.edu.co</institution>
				<institution content-type="normalized">Universidad de Nariño</institution>
				<institution content-type="orgname">Universidad de Nariño</institution>
				<addr-line>
					<city>Pasto</city>
					<state>Nariño</state>
				</addr-line>
				<country country="CO">Colombia</country>
				<email>jesusgetial@udenar.edu.co</email>
			</aff>
			<aff id="aff2">
				<label>2</label>
				<institution content-type="original"> Universidad de Nariño (Pasto-Nariño, Colombia). ritimar@udenar.edu.co</institution>
				<institution content-type="normalized">Universidad de Nariño</institution>
				<institution content-type="orgname">Universidad de Nariño</institution>
				<addr-line>
					<city>Pasto</city>
					<state>Nariño</state>
				</addr-line>
				<country country="CO">Colombia</country>
				<email>ritimar@udenar.edu.co</email>
			</aff>
			<aff id="aff3">
				<label>3</label>
				<institution content-type="original"> Pontificia Universidad Javeriana (Cali-Vale del Cauca, Colombia). davba@javerianacali.edu.co</institution>
				<institution content-type="normalized">Pontificia Universidad Javeriana</institution>
				<institution content-type="orgname">Pontificia Universidad Javeriana</institution>
				<addr-line>
					<city>Cali</city>
					<state>Vale del Cauca</state>
				</addr-line>
				<country country="CO">Colombia</country>
				<email>davba@javerianacali.edu.co</email>
			</aff>
			<author-notes>
				<fn fn-type="equal" id="fn2">
					<label>AUTHORS’ CONTRIBUTION</label>
					<p><bold>Jesús-Alberto Getial-Barragán:</bold> Research, Data curation, Software, Writing original draft. <bold>Ricardo Timarán-Pereira:</bold> Conceptualization, Supervision, Validation, Writing - review &amp; editing. <bold>David Ramiro-Bastidas:</bold> Formal analysis, Writing - review &amp; editing.</p>
				</fn>
				<fn fn-type="conflict" id="fn3">
					<label>CONFLICT OF INTEREST</label>
					<p> The authors declare no conflict of interest.</p>
				</fn>
			</author-notes>
			<pub-date date-type="pub" publication-format="electronic">
				<day>17</day>
				<month>10</month>
				<year>2025</year>
			</pub-date>
			<pub-date date-type="collection" publication-format="electronic">
				<season>Jul-Sep</season>
				<year>2025</year>
			</pub-date>
			<volume>34</volume>
			<issue>73</issue>
			<elocation-id>a5</elocation-id>
			<history>
				<date date-type="received">
					<day>24</day>
					<month>06</month>
					<year>2025</year>
				</date>
				<date date-type="accepted">
					<day>09</day>
					<month>09</month>
					<year>2025</year>
				</date>
				<date date-type="pub">
					<day>30</day>
					<month>09</month>
					<year>2025</year>
				</date>
			</history>
			<permissions>
				<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/" xml:lang="en">
					<license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License</license-p>
				</license>
			</permissions>
			<abstract>
				<title>ABSTRACT</title>
				<p>This paper presents the results of a research project focused on effort estimation in software development using supervised machine learning techniques. To structure the analysis process, the CRISP-DM methodology was adopted, given that it is recognized for its comprehensive approach and wide acceptance in data mining. The study was based on a dataset provided by the International Software Benchmarking Standards Group (ISBSG), to which rigorous cleaning, transformation, and variable selection procedures were applied. Four effort categories were defined, and key variables for their classification were identified, including the functional size of the software, team productivity, programming language, and the implementation platform. Eight predictive models were developed using representative supervised learning algorithms: AdaBoost, Decision Trees, Random Forests, SVM, Multilayer Perceptron, KNN, Naive Bayes, and Logistic Regression. Their evaluation was carried out using metrics such as the F1-score, MCC, ROC-AUC, Gini index, accuracy, and standard deviations to assess performance and stability. The results show that tree-based models, particularly Random Forest, offer superior performance, achieving an accuracy of 80%. It is concluded that having systematized and high-quality data is fundamental for building reliable predictive models. As future work, the study proposes examining additional ensemble configurations, incorporating new algorithms, and using updated versions of the ISBSG repository.</p>
			</abstract>
			<trans-abstract xml:lang="es">
				<title>RESUMEN</title>
				<p>Este artículo presenta los resultados de un proyecto de investigación orientado a estimar el esfuerzo en proyectos de desarrollo de software mediante técnicas supervisadas de aprendizaje automático. El análisis se desarrolló siguiendo la metodología CRISP-DM, reconocida por su enfoque estructurado y amplia aceptación en minería de datos. El estudio utilizó el conjunto de datos del International Software Benchmarking Standards Group (ISBSG), al cual se aplicaron rigurosos procesos de limpieza, transformación y selección de variables. Se definieron cuatro categorías de esfuerzo y se identificaron variables clave para su clasificación, como el tamaño funcional del software, la productividad del equipo, el lenguaje de programación y la plataforma de implementación. Se construyeron ocho modelos predictivos empleando algoritmos representativos del aprendizaje supervisado: AdaBoost, Árboles de Decisión, Bosques Aleatorios, SVM, Perceptrón Multicapa, KNN, Naive Bayes y Regresión Logística. Su desempeño se evaluó mediante métricas como F1-score, MCC, ROC-AUC, índice de Gini, exactitud y desviaciones estándar, con el propósito de medir rendimiento y estabilidad. Los resultados muestran que los modelos basados en árboles, especialmente el Bosque Aleatorio, alcanzan el mejor desempeño, con una exactitud del 80 %. El estudio destaca contar con datos sistematizados y de alta calidad para construir modelos predictivos confiables. Como trabajo futuro se propone explorar configuraciones de ensamble, incorporar nuevos algoritmos y usar versiones actualizadas del ISBSG</p>
			</trans-abstract>
			<trans-abstract xml:lang="pt">
				<title>RESUMO</title>
				<p>Este artigo apresenta os resultados de um projeto de pesquisa com o objetivo de estimar o esforço em projetos de desenvolvimento de software utilizando técnicas de aprendizado de máquina supervisionado. A análise foi desenvolvida seguindo a metodologia CRISP-DM, reconhecida por sua abordagem estruturada e ampla aceitação em mineração de dados. O estudo utilizou o conjunto de dados do International Software Benchmarking Standards Group (ISBSG), ao qual foram aplicados processos rigorosos de limpeza, transformação e seleção de variáveis. Quatro categorias de esforço foram definidas e variáveis-chave para sua classificação foram identificadas, como o tamanho funcional do software, a produtividade da equipe, a linguagem de programação e a plataforma de implementação. Oito modelos preditivos foram construídos utilizando algoritmos representativos de aprendizado supervisionado: AdaBoost, Árvores de Decisão, Florestas Aleatórias, SVM, Perceptron Multicamadas, KNN, Naive Bayes e Regressão Logística. Seu desempenho foi avaliado utilizando métricas como F1-score, MCC, ROC-AUC, índice de Gini, acurácia e desvios padrão, com o objetivo de mensurar o desempenho e a estabilidade. Os resultados mostram que os modelos baseados em árvores, especialmente a Floresta Aleatória, alcançam o melhor desempenho, com uma precisão de 80%. O estudo destaca a importância de se ter dados sistematizados e de alta qualidade para a construção de modelos preditivos confiáveis. Trabalhos futuros propõem explorar configurações de ensemble, incorporar novos algoritmos e utilizar versões atualizadas do ISBSG.</p>
			</trans-abstract>
			<kwd-group xml:lang="en">
				<title>Keywords:</title>
				<kwd>decision tree</kwd>
				<kwd>effort estimation</kwd>
				<kwd>machine learning</kwd>
				<kwd>random forest</kwd>
				<kwd>software development</kwd>
			</kwd-group>
			<kwd-group xml:lang="es">
				<title>Palabras clave:</title>
				<kwd>aprendizaje automático</kwd>
				<kwd>árbol de decisión</kwd>
				<kwd>bosque aleatorio</kwd>
				<kwd>desarrollo de software</kwd>
				<kwd>estimación de esfuerzo</kwd>
			</kwd-group>
			<kwd-group xml:lang="pt">
				<title>Palavras-chave:</title>
				<kwd>aprendizado de máquina</kwd>
				<kwd>árvore de decisão</kwd>
				<kwd>floresta aleatória</kwd>
				<kwd>desenvolvimento de software</kwd>
				<kwd>estimativa de esforço</kwd>
			</kwd-group>
			<counts>
				<fig-count count="2"/>
				<table-count count="5"/>
				<equation-count count="0"/>
				<ref-count count="18"/>
				<page-count count="8"/>
			</counts>
		</article-meta>
	</front>
	<body>
		<sec sec-type="intro">
			<title>1. INTRODUCTION</title>
			<p>Effort estimation in software projects is critical for resource allocation, scheduling, and cost control <sup>[</sup><xref ref-type="bibr" rid="B1"><sup>1</sup></xref><sup>,</sup><xref ref-type="bibr" rid="B2"><sup>2</sup></xref><sup>]</sup>. However, it remains difficult to predict accurately in dynamic and heterogeneous contexts <sup>[</sup><xref ref-type="bibr" rid="B3"><sup>3</sup></xref><sup>,</sup><xref ref-type="bibr" rid="B4"><sup>4</sup></xref><sup>]</sup>. Traditional approaches, such as parametric models or function points, often fail to capture project complexity or adapt to agile development practices <sup>[</sup><xref ref-type="bibr" rid="B5"><sup>5</sup></xref><sup>]</sup>. Machine learning techniques have been explored as alternatives; however, many studies rely on outdated datasets or evaluate only a few algorithms. Few studies have systematically assessed supervised models using recent ISBSG releases <sup>[</sup><xref ref-type="bibr" rid="B6"><sup>6</sup></xref><sup>,</sup><xref ref-type="bibr" rid="B7"><sup>7</sup></xref><sup>]</sup>. Existing evidence shows gains in accuracy and generalization <sup>[</sup><xref ref-type="bibr" rid="B1"><sup>1</sup></xref><sup>]</sup> through models such as decision trees <sup>[</sup><xref ref-type="bibr" rid="B8"><sup>8</sup></xref><sup>]</sup>, ensemble methods <sup>[</sup><xref ref-type="bibr" rid="B9"><sup>9</sup></xref><sup>,</sup><xref ref-type="bibr" rid="B10"><sup>10</sup></xref><sup>]</sup>, neural networks <sup>[</sup><xref ref-type="bibr" rid="B11"><sup>11</sup></xref><sup>]</sup>, analogy-based approaches <sup>[</sup><xref ref-type="bibr" rid="B12"><sup>12</sup></xref><sup>]</sup>, and hybrid techniques <sup>[</sup><xref ref-type="bibr" rid="B13"><sup>13</sup></xref><sup>]</sup>. Among these, Random Forest <sup>[</sup><xref ref-type="bibr" rid="B14"><sup>14</sup></xref><sup>]</sup> is highly effective due to its robustness with noisy and high-dimensional data.</p>
			<p>This study applies supervised classification models to ISBSG Release 2022R1 <sup>[</sup><xref ref-type="bibr" rid="B15"><sup>15</sup></xref><sup>]</sup> following the CRISPDM methodology <sup>[</sup><xref ref-type="bibr" rid="B16"><sup>16</sup></xref><sup>]</sup>. Eight algorithms were evaluated: AdaBoost, Decision Tree, Random Forest, SVM, Multilayer Perceptron, KNN, Naïve Bayes, and Logistic Regression. Performance was measured using the F1-score, MCC, Gini, AUC, and accuracy <sup>[</sup><xref ref-type="bibr" rid="B17"><sup>17</sup></xref><sup>]</sup>, complemented by standard deviations to assess stability and generalization. Effort was discretized into four balanced quantile-based categories, reducing the influence of extreme values and improving the interpretability for project managers. Within this framework, functional size, productivity, programming language, and development platform emerged as the most influential factors for distinguishing effort levels.</p>
		</sec>
		<sec sec-type="materials|methods">
			<title>2. MATERIALS AND METHODS</title>
			<p>This study used the ISBSG Release 1 dataset (July 2022), which compiles historical information on software projects and serves as the basis for model analysis and validation <sup>[</sup><xref ref-type="bibr" rid="B15"><sup>15</sup></xref><sup>]</sup>. Data processing and modeling were performed using Python 3 and specialized libraries, such as Pandas, NumPy, and Scikit-learn.</p>
			<p>This research followed the CRISP-DM methodology across five phases. In the business understanding phase, the problem of effort estimation in software projects was defined. Data understanding involved analyzing the ISBSG repository in terms of variable nature, distribution, and quality. </p>
			<p>Data preparation included selecting a representative subset through cleaning, imputation, encoding, normalization, outlier removal, and discretization of the target variable. In the modeling phase, eight supervised algorithms widely discussed in the literature were implemented: AdaBoost, Decision Trees, Random Forest, SVM, Multilayer Perceptron, KNN, Naïve Bayes, and Logistic Regression. </p>
			<p>Finally, in the evaluation phase, standard metrics (F1-score, MCC, ROC-AUC, Gini index, and accuracy) and their standard deviations were applied to assess the model robustness and generalization capability.</p>
		</sec>
		<sec sec-type="results">
			<title>3. RESULTS</title>
			<sec>
				<title>3.1 Exploratory Data Analysis</title>
				<p>From the ISBSG repository, a subset of 3,124 projects and 233 variables (95 numerical and 138 categorical) was extracted. The selection considered Main Frame, Personal Computer, and MultiPlatform environments; functional size ratings A or B; normalization rates between 0.9 and 1.3; and NESMA or IFPUG 4+ counting approaches, excluding lines of code. </p>
				<p>After applying a 30% missing-value threshold per variable, 26 variables were retained, of which 20 were selected based on low collinearity (&lt;0.84, see <xref ref-type="table" rid="t2">Table 2</xref>). Imputation strategies varied by data type and proportion of missing values: mean and median for numerical variables (&lt;10% and 10-30%), and mode or C4.5 decision tree for categorical variables (&lt;10% and 10-30%). </p>
				<p>Numerical variables were normalized to the [0,1] range, whereas Project Year and Resource Level were ordinally encoded. The other categorical variables were transformed using ordinal encoding. Outliers were handled through a combination of Winsorization, IQR, and Z-score techniques. Finally, the target variable (effort) was discretized into four categories according to its quartiles (<xref ref-type="table" rid="t1">Table 1</xref>).</p>
				<p>
					<table-wrap id="t1">
						<label>Table 1</label>
						<caption>
							<title>Categories based on quantiles of the statistical distribution</title>
						</caption>
						<table>
							<colgroup>
								<col/>
								<col/>
								<col/>
								<col/>
							</colgroup>
							<tbody>
								<tr>
									<td align="center">Category 1</td>
									<td align="center"><italic>-∞&lt;Value&lt;Q1</italic></td>
									<td align="center">Very low effort</td>
									<td align="center"><italic>-∞ &lt;Value&lt; 500 hours</italic></td>
								</tr>
								<tr>
									<td align="center">Category 2</td>
									<td align="center"><italic>Q1&lt;Value&lt;Median</italic></td>
									<td align="center">Moderate effort</td>
									<td align="center"><italic>500 hours &lt;Value&lt; 1050 hours</italic></td>
								</tr>
								<tr>
									<td align="center">Category 3</td>
									<td align="center"><italic>Median&lt;Value&lt;Q3</italic></td>
									<td align="center">High effort</td>
									<td align="center"><italic>1050 hours&lt;Value&lt;2153 hours</italic></td>
								</tr>
								<tr>
									<td align="center">Category 4</td>
									<td align="center"><italic>Q3&lt;Value&lt;∞</italic></td>
									<td align="center">Very high effort</td>
									<td align="center"><italic>2153 hours &lt;Value&lt; ∞</italic></td>
								</tr>
							</tbody>
						</table>
					</table-wrap>
				</p>
				<p>
					<table-wrap id="t2">
						<label>Table 2</label>
						<caption>
							<title>Final dataset variables</title>
						</caption>
						<table>
							<colgroup>
								<col/>
								<col/>
							</colgroup>
							<thead>
								<tr>
									<th align="center">Numeric variables</th>
									<th align="center">Non-numeric variables</th>
								</tr>
							</thead>
							<tbody>
								<tr>
									<td align="center">
										<p>
											<list list-type="bullet">
												<list-item>
													<p>Functional size</p>
												</list-item>
												<list-item>
													<p>Delivery speed</p>
												</list-item>
												<list-item>
													<p>Total project elapsed time</p>
												</list-item>
												<list-item>
													<p>Project execution year</p>
												</list-item>
												<list-item>
													<p>Adjusted function points</p>
												</list-item>
												<list-item>
													<p>Summary work effort in hours</p>
												</list-item>
												<list-item>
													<p>Resource level</p>
												</list-item>
											</list>
										</p>
									</td>
									<td align="center">
										<p>
											<list list-type="bullet">
												<list-item>
													<p>Industry sector</p>
												</list-item>
												<list-item>
													<p>Primary programming language</p>
												</list-item>
												<list-item>
													<p>Software architecture type</p>
												</list-item>
												<list-item>
													<p>Functional size rating</p>
												</list-item>
												<list-item>
													<p>Application group type</p>
												</list-item>
												<list-item>
													<p>Software development type</p>
												</list-item>
												<list-item>
													<p>Development platform</p>
												</list-item>
												<list-item>
													<p>Language type</p>
												</list-item>
												<list-item>
													<p>Counting approach</p>
												</list-item>
												<list-item>
													<p>Relative size</p>
												</list-item>
												<list-item>
													<p>Implementation date</p>
												</list-item>
												<list-item>
													<p>Recording method</p>
												</list-item>
												<list-item>
													<p>Function points standard</p>
												</list-item>
											</list>
										</p>
									</td>
								</tr>
							</tbody>
						</table>
					</table-wrap>
				</p>
			</sec>
			<sec>
				<title>3.2 Results</title>
				<p>Hyperparameter optimization was performed using a grid search to select the configurations with the highest cross-validation accuracy. As shown in <xref ref-type="table" rid="t3">Table 3</xref>, Random Forest and Decision Tree achieved the best results in accuracy, F1, and MCC, which is consistent with the findings of Nassif et al. <sup>[</sup><xref ref-type="bibr" rid="B6"><sup>6</sup></xref><sup>]</sup> and Zakrani et al. <sup>[</sup><xref ref-type="bibr" rid="B14"><sup>14</sup></xref><sup>]</sup>, who also highlighted the stability of Random Forest in high-dimensional settings. To assess generalization and robustness, three partitioning schemes were applied (10%, 20%, and 30% for validation), with the remainder allocated for training and testing. This procedure enabled a sensitivity analysis across different configurations and prevented the results from depending on a specific data split. Among the tested combinations, the 10% validation, 20% test, and 70% training split offered the best balance for MSE, accuracy, and stability, yielding more consistent metrics than the other alternatives. </p>
				<p>The detailed results of these configurations are presented in <xref ref-type="table" rid="t4">Tables 4</xref> and <xref ref-type="table" rid="t5">5</xref>, corresponding to the Decision Tree and Random Forest, respectively. <xref ref-type="fig" rid="f1">Figures 1</xref> and <xref ref-type="fig" rid="f2">2</xref> illustrate the evolution of the mean squared error (MSE) for the training, validation, and test sets as the model depth increases.</p>
				<p>
					<table-wrap id="t3">
						<label>Table 3</label>
						<caption>
							<title>Metrics result for machine learning model selection</title>
						</caption>
						<table>
							<colgroup>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
							</colgroup>
							<thead>
								<tr>
									<th align="center">Model</th>
									<th align="center">Accuracy</th>
									<th align="center">F1</th>
									<th align="center">MCC</th>
									<th align="center">Gini</th>
									<th align="center">ROC</th>
								</tr>
							</thead>
							<tbody>
								<tr>
									<td align="center"><italic>Adaboost</italic></td>
									<td align="center">0.6906</td>
									<td align="center">0.6944</td>
									<td align="center">0.5872</td>
									<td align="center">0.5634</td>
									<td align="center">0.8271</td>
								</tr>
								<tr>
									<td align="center"><italic>Decision Tree</italic></td>
									<td align="center">0.7771</td>
									<td align="center">0.7791</td>
									<td align="center">0.7033</td>
									<td align="center">0.7999</td>
									<td align="center">0.8999</td>
								</tr>
								<tr>
									<td align="center"><italic>Random Forest</italic></td>
									<td align="center">0.8035</td>
									<td align="center">0.8063</td>
									<td align="center">0.7377</td>
									<td align="center">0.8844</td>
									<td align="center">0.9422</td>
								</tr>
								<tr>
									<td align="center"><italic>SVM</italic></td>
									<td align="center">0.5411</td>
									<td align="center">0.5432</td>
									<td align="center">0.3934</td>
									<td align="center">0.4980</td>
									<td align="center">0.7490</td>
								</tr>
								<tr>
									<td align="center"><italic>Multilayer Perceptron</italic></td>
									<td align="center">0.6906</td>
									<td align="center">0.6949</td>
									<td align="center">0.5872</td>
									<td align="center">0.7889</td>
									<td align="center">0.8945</td>
								</tr>
								<tr>
									<td align="center"><italic>KNN</italic></td>
									<td align="center">0.5117</td>
									<td align="center">0.5095</td>
									<td align="center">0.3508</td>
									<td align="center">0.5089</td>
									<td align="center">0.7545</td>
								</tr>
								<tr>
									<td align="center"><italic>Naive Bayes</italic></td>
									<td align="center">0.3006</td>
									<td align="center">0.2488</td>
									<td align="center">0.0897</td>
									<td align="center">0.1921</td>
									<td align="center">0.5960</td>
								</tr>
								<tr>
									<td align="center"><italic>Logistic</italic></td>
									<td align="center">0.3974</td>
									<td align="center">0.3826</td>
									<td align="center">0.2061</td>
									<td align="center">0.3244</td>
									<td align="center">0.6622</td>
								</tr>
							</tbody>
						</table>
					</table-wrap>
				</p>
				<p>
					<table-wrap id="t4">
						<label>Table 4</label>
						<caption>
							<title>Metrics result for the best partition in Decision Tree</title>
						</caption>
						<table>
							<colgroup>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
							</colgroup>
							<tbody>
								<tr>
									<td align="center">Dataset</td>
									<td align="center">%Validation</td>
									<td align="center">%Test</td>
									<td align="center">MSE</td>
									<td align="center">Accuracy</td>
									<td align="center">Recall</td>
									<td align="center">F1</td>
									<td align="center">Std MSE</td>
									<td align="center">Std Accuracy</td>
								</tr>
								<tr>
									<td align="center">Validation</td>
									<td align="center">10</td>
									<td align="center">20</td>
									<td align="center">0.415</td>
									<td align="center">0.732</td>
									<td align="center">0.736</td>
									<td align="center">0.730</td>
									<td align="center">0.333</td>
									<td align="center">0.104</td>
								</tr>
								<tr>
									<td align="center">Training</td>
									<td align="center">10</td>
									<td align="center">20</td>
									<td align="center">0.202</td>
									<td align="center">0.877</td>
									<td align="center">0.876</td>
									<td align="center">0.870</td>
									<td align="center">0.368</td>
									<td align="center">0.139</td>
								</tr>
								<tr>
									<td align="center">Test</td>
									<td align="center">10</td>
									<td align="center">20</td>
									<td align="center">0.329</td>
									<td align="center">0.787</td>
									<td align="center">0.786</td>
									<td align="center">0.781</td>
									<td align="center">0.346</td>
									<td align="center">0.112</td>
								</tr>
							</tbody>
						</table>
					</table-wrap>
				</p>
				<p>
					<table-wrap id="t5">
						<label>Table 5</label>
						<caption>
							<title>Metrics result for the best partition in Random Forest</title>
						</caption>
						<table>
							<colgroup>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
								<col/>
							</colgroup>
							<tbody>
								<tr>
									<td align="center">Dataset</td>
									<td align="center">%Validation</td>
									<td align="center">%Test</td>
									<td align="center">MSE</td>
									<td align="center">Accuracy</td>
									<td align="center">Recall</td>
									<td align="center">F1</td>
									<td align="center">Std MSE</td>
									<td align="center">Std Accuracy</td>
								</tr>
								<tr>
									<td align="center">Validation</td>
									<td align="center">10</td>
									<td align="center">20</td>
									<td align="center">0.240</td>
									<td align="center">0.821</td>
									<td align="center">0.819</td>
									<td align="center">0.823</td>
									<td align="center">0.024</td>
									<td align="center">0.009</td>
								</tr>
								<tr>
									<td align="center">Training</td>
									<td align="center">10</td>
									<td align="center">20</td>
									<td align="center">0.022</td>
									<td align="center">0.984</td>
									<td align="center">0.984</td>
									<td align="center">0.984</td>
									<td align="center">0.003</td>
									<td align="center">0.002</td>
								</tr>
								<tr>
									<td align="center">Test</td>
									<td align="center">10</td>
									<td align="center">20</td>
									<td align="center">0.193</td>
									<td align="center">0.848</td>
									<td align="center">0.847</td>
									<td align="center">0.848</td>
									<td align="center">0.006</td>
									<td align="center">0.006</td>
								</tr>
							</tbody>
						</table>
					</table-wrap>
				</p>
				<p>
					<fig id="f1">
						<label>Figure 1</label>
						<caption>
							<title><italic>MSE vs. Decision Tree Depth</italic></title>
						</caption>
						<graphic xlink:href="https://revistas.uptc.edu.co/index.php/ingenieria/article/download/20194/version/17733/16804/105541/0121-1129-rfing-34-73-a5-gf1.jpg"/>
					</fig>
				</p>
				<p>
					<fig id="f2">
						<label>Figure 2</label>
						<caption>
							<title><italic>MSE vs Random Forest Depth</italic></title>
						</caption>
						<graphic xlink:href="https://revistas.uptc.edu.co/index.php/ingenieria/article/download/20194/version/17733/16804/105542/0121-1129-rfing-34-73-a5-gf2.jpg"/>
					</fig>
				</p>
			</sec>
		</sec>
		<sec sec-type="discussion">
			<title>4. DISCUSSION</title>
			<p>Based on the results in <xref ref-type="table" rid="t2">Table 2</xref>, Random Forest emerged as the most robust model, achieving an accuracy of 0.8035, a macro F1 of 0.8063, the highest MCC (0.7377), a Gini coefficient of 0.8844, and an ROC-AUC of 0.9422, demonstrating stable classification capability. Decision Tree ranked second with an accuracy of 0.7771, macro F1 of 0.7791, MCC of 0.7033, Gini of 0.7999, and ROC-AUC of 0.8999. In both cases, tree-based models significantly outperformed AdaBoost and Multilayer Perceptron (accuracy near 0.69 and MCC of 0.5872), while SVM, KNN, Naïve Bayes, and Logistic Regression showed weaker performance, with accuracy not exceeding 0.40 in some cases, which limits their generalization capability. Beyond competitive performance, Decision Tree enabled the derivation of interpretable rules linking Adjusted Function Points (AFP), Productivity Delivery Rate (PDR), execution time, architecture, and programming language with four effort categories: (1) very low (≤ 500 hours), associated with small AFP, PDR ≤ 4.24, standard architectures, and short durations; (2) moderate (500- 1050 hours), characterized by intermediate PDR, longer delivery times, and common languages; (3) high (1050-2153 hours), defined by extended execution, accelerated PDR, and specialized languages or complex architectures; and (4) very high (&gt; 2153 hours), associated with moderate AFP, PDR &gt; 25, longer durations, and less common platforms, implying greater uncertainty and planning risk.</p>
			<p>Random Forest, in contrast, showed signs of overfitting, with training accuracy near 98.5% versus 84.7% in testing, but maintained stability across metrics such as MSE and accuracy, with reduced standard deviations. The performance was sensitive to the validation and test proportions. The 10% validation and 20% test split yielded the best balance (84.7% accuracy and MSE of 0.192), whereas larger proportions reduced the learning efficiency. The number of estimators improved robustness until stabilizing between 50 and 100, in combination with moderate depths (8-12 nodes), where generalization was optimized without excessive computational cost.</p>
			<p>In summary, both Decision Tree and Random Forest proved to be the most effective approaches. The former, for its interpretability and ability to generate useful classification rules, and the latter, for its higher stability, accuracy, and robustness, though requiring careful hyperparameter tuning to avoid overfitting and to maintain a balance between performance and efficiency. While Decision Tree provided interpretability and computational efficiency, its lower generalization and sensitivity to data partitioning reduce its reliability in demanding scenarios. In contrast, Random Forest, delivered a more consistent and robust performance, with lower variance and stability across variable configurations, and an optimal range of 50-100 estimators that balances accuracy and computational cost. These characteristics position Random Forest as the most suitable option for effort estimation on the ISBSG dataset, offering a superior balance of accuracy, stability, and practical applicability in real-world contexts.</p>
		</sec>
		<sec sec-type="conclusions">
			<title>5. CONCLUSIONS</title>
			<p>Discretizing effort into four balanced classes facilitated the classification and improved interpretability. Random Forest proved to be the most effective model (80.3% accuracy), standing out for its generalization capability and stability within an optimal range of 50-100 estimators. Decision Tree, although less accurate, provided interpretable rules that may be valuable in contexts with limited resources. The other models exhibited overfitting or performance below 55%.</p>
			<p>The optimal configuration was identified as 10% validation and 20% testing. The key variables influencing the classification included software size, team productivity, development platform, and programming language. A relevant limitation was the reduction of the ISBSG dataset from 233 to 26 variables due to missing data, underscoring the need to improve dataset quality.</p>
			<p>From a practical perspective, a model with 80% accuracy can reduce uncertainty in the early project phases and support better resource allocation. Future work will explore hybrid models integrating agility metrics <sup>[</sup><xref ref-type="bibr" rid="B2"><sup>2</sup></xref><sup>,</sup><xref ref-type="bibr" rid="B18"><sup>18</sup></xref><sup>]</sup> and explainable AI approaches to strengthen the utility and transparency of effort estimation.</p>
		</sec>
	</body>
	<back>
		<ref-list>
			<title>REFERENCES</title>
			<ref id="B1">
				<label>[1]</label>
				<mixed-citation>[1] V. Thakur, K. Dutta, “Machine learning based effort estimation models for software development projects related datasets with diverse features,” in <italic>Proceedings of the</italic> 2<italic>nd</italic> 
 <italic>International Conference on Computational Intelligence, Communication Technology and Networks</italic>, Ghaziabad, India, 2025, pp. 807813. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/CICTN64563.2025.10932601">https://doi.org/10.1109/CICTN64563.2025.10932601</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>Thakur</surname>
							<given-names>V.</given-names>
						</name>
						<name>
							<surname>Dutta</surname>
							<given-names>K.</given-names>
						</name>
					</person-group>
					<source>Machine learning based effort estimation models for software development projects related datasets with diverse features</source><italic>Proceedings of the</italic> 
 <italic>nd</italic><conf-name>2International Conference on Computational Intelligence, Communication Technology and Networks</conf-name>
					<publisher-loc>Ghaziabad, India</publisher-loc>
					<year>2025</year>
					<fpage>807813</fpage>
					<lpage>807813</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/CICTN64563.2025.10932601">https://doi.org/10.1109/CICTN64563.2025.10932601</ext-link>
				</element-citation>
			</ref>
			<ref id="B2">
				<label>[2]</label>
				<mixed-citation>[2] X. Zhao, X. Xiong, Z. Mansor, R. Razali, M. Z. Ahmad Nazri, L. Li “A data-driven cost estimation model for agile development based on Kolmogorov-Arnold networks and AdamW optimization,” <italic>Journal of King Saud University - Computer and Information Sciences</italic>, <italic>vol.</italic> 
 <italic>37</italic>, e85, 2025. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s44443-025-00058-7">https://doi.org/10.1007/s44443-025-00058-7</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Zhao</surname>
							<given-names>X.</given-names>
						</name>
						<name>
							<surname>Xiong</surname>
							<given-names>X.</given-names>
						</name>
						<name>
							<surname>Mansor</surname>
							<given-names>Z.</given-names>
						</name>
						<name>
							<surname>Razali</surname>
							<given-names>R.</given-names>
						</name>
						<name>
							<surname>Ahmad Nazri</surname>
							<given-names>M. Z.</given-names>
						</name>
						<name>
							<surname>Li</surname>
							<given-names>L.</given-names>
						</name>
					</person-group>
					<article-title>A data-driven cost estimation model for agile development based on Kolmogorov-Arnold networks and AdamW optimization</article-title>
					<source>Journal of King Saud University - Computer and Information Sciences</source>
					<volume>37</volume>
					<size units="pages">e85</size>
					<year>2025</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s44443-025-00058-7">https://doi.org/10.1007/s44443-025-00058-7</ext-link>
				</element-citation>
			</ref>
			<ref id="B3">
				<label>[3]</label>
				<mixed-citation>[3] J. A. Timana Peña, C. Piñeros Rodríguez, L. Sierra Martínez, D. Peluffo Ordóñez, “Effort estimation in agile software development: A systematic map study,” <italic>INGE CUC</italic>, <italic>vol.</italic> 
 <italic>19</italic>, no. 1, pp. 22-36, 2023. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17981/ingecuc.19.1.2023.03">https://doi.org/10.17981/ingecuc.19.1.2023.03</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Timana Peña</surname>
							<given-names>J. A.</given-names>
						</name>
						<name>
							<surname>Piñeros Rodríguez</surname>
							<given-names>C.</given-names>
						</name>
						<name>
							<surname>Sierra Martínez</surname>
							<given-names>L.</given-names>
						</name>
						<name>
							<surname>Peluffo Ordóñez</surname>
							<given-names>D.</given-names>
						</name>
					</person-group>
					<article-title>Effort estimation in agile software development: A systematic map study</article-title>
					<source>INGE CUC</source>
					<volume>19</volume>
					<issue>1</issue>
					<fpage>22</fpage>
					<lpage>36</lpage>
					<year>2023</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17981/ingecuc.19.1.2023.03">https://doi.org/10.17981/ingecuc.19.1.2023.03</ext-link>
				</element-citation>
			</ref>
			<ref id="B4">
				<label>[4]</label>
				<mixed-citation>[4] M. Perkusich, L. C. e Silva, A. Costa, F. Ramos, R. Saraiva, A. Freire, <italic>et al</italic>., “Intelligent software engineering in the context of agile software development: A systematic literature review,” <italic>Information and Software Technology</italic>, <italic>vol.</italic> 
 <italic>119</italic>, e106241, Mar. 2020. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.infsof.2019.106241">https://doi.org/10.1016/j.infsof.2019.106241</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Perkusich</surname>
							<given-names>M.</given-names>
						</name>
						<name>
							<surname>e Silva</surname>
							<given-names>L. C.</given-names>
						</name>
						<name>
							<surname>Costa</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Ramos</surname>
							<given-names>F.</given-names>
						</name>
						<name>
							<surname>Saraiva</surname>
							<given-names>R.</given-names>
						</name>
						<name>
							<surname>Freire</surname>
							<given-names>A.</given-names>
						</name>
						<etal/>
					</person-group>
					<article-title>Intelligent software engineering in the context of agile software development: A systematic literature review</article-title>
					<source>Information and Software Technology</source>
					<volume>119</volume>
					<size units="pages">e106241</size>
					<year>2020</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.infsof.2019.106241">https://doi.org/10.1016/j.infsof.2019.106241</ext-link>
				</element-citation>
			</ref>
			<ref id="B5">
				<label>[5]</label>
				<mixed-citation>[5] M. Fernández-Diego, F. González-Ladrón-de-Guevara, “Potential and limitations of the ISBSG dataset in enhancing software engineering research: A mapping review,” <italic>Information and Software Technology</italic>, <italic>vol.</italic> 
 <italic>56</italic>, no. 6, pp. 527-544, Jun. 2014. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.infsof.2014.01.003">https://doi.org/10.1016/j.infsof.2014.01.003</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Fernández-Diego</surname>
							<given-names>M.</given-names>
						</name>
						<name>
							<surname>González-Ladrón-de-Guevara</surname>
							<given-names>F.</given-names>
						</name>
					</person-group>
					<article-title>Potential and limitations of the ISBSG dataset in enhancing software engineering research: A mapping review</article-title>
					<source>Information and Software Technology</source>
					<volume>56</volume>
					<issue>6</issue>
					<fpage>527</fpage>
					<lpage>544</lpage>
					<year>2014</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.infsof.2014.01.003">https://doi.org/10.1016/j.infsof.2014.01.003</ext-link>
				</element-citation>
			</ref>
			<ref id="B6">
				<label>[6]</label>
				<mixed-citation>[6] A. B. Nassif, M. Azzeh, L. F. Capretz, D. Ho, “A comparison between decision trees and decision tree forest models for software development effort estimation,” in <italic>Third</italic> 
 <italic>International Conference on Communications and Information Technology (ICCIT)</italic>, Beirut, Lebanon, 2013, pp. 220-224. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/ICCITechnology.2013.6579553">https://doi.org/10.1109/ICCITechnology.2013.6579553</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>Nassif</surname>
							<given-names>A. B.</given-names>
						</name>
						<name>
							<surname>Azzeh</surname>
							<given-names>M.</given-names>
						</name>
						<name>
							<surname>Capretz</surname>
							<given-names>L. F.</given-names>
						</name>
						<name>
							<surname>Ho</surname>
							<given-names>D.</given-names>
						</name>
					</person-group>
					<source>A comparison between decision trees and decision tree forest models for software development effort estimation</source>
					<conf-name>ThirdInternational Conference on Communications and Information Technology (ICCIT)</conf-name>
					<publisher-loc>Beirut, Lebanon</publisher-loc>
					<year>2013</year>
					<fpage>220</fpage>
					<lpage>224</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/ICCITechnology.2013.6579553">https://doi.org/10.1109/ICCITechnology.2013.6579553</ext-link>
				</element-citation>
			</ref>
			<ref id="B7">
				<label>[7]</label>
				<mixed-citation>[7] Ritu and P. Bhambri, “Enhancing software development effort estimation with a cloud-based data framework using use case points, fuzzy logic, and machine learning,” <italic>Discover Computing</italic>, <italic>vol.</italic> 
 <italic>28</italic>, e143, 2025. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10791-025-09668-1">https://doi.org/10.1007/s10791-025-09668-1</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Ritu</surname>
							<given-names/>
						</name>
						<name>
							<surname>Bhambri</surname>
							<given-names>P.</given-names>
						</name>
					</person-group>
					<article-title>Enhancing software development effort estimation with a cloud-based data framework using use case points, fuzzy logic, and machine learning</article-title>
					<source>Discover Computing</source>
					<volume>28</volume>
					<size units="pages">e143</size>
					<year>2025</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10791-025-09668-1">https://doi.org/10.1007/s10791-025-09668-1</ext-link>
				</element-citation>
			</ref>
			<ref id="B8">
				<label>[8]</label>
				<mixed-citation>[8] A. Najm, A. Zakrani, A. Marzak, “Decision trees based software development effort estimation: A systematic mapping study,” in <italic>Proceedings of the International Conference on Computer Science and Renewable Energies</italic>, Agadir, Morocco, 2019, pp. 1-6. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/ICCSRE.2019.8807544">https://doi.org/10.1109/ICCSRE.2019.8807544</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>Najm</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Zakrani</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Marzak</surname>
							<given-names>A.</given-names>
						</name>
					</person-group>
					<source>Decision trees based software development effort estimation: A systematic mapping study</source>
					<conf-name>Proceedings of the International Conference on Computer Science and Renewable Energies</conf-name>
					<publisher-loc>Agadir, Morocco</publisher-loc>
					<year>2019</year>
					<fpage>1</fpage>
					<lpage>6</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/ICCSRE.2019.8807544">https://doi.org/10.1109/ICCSRE.2019.8807544</ext-link>
				</element-citation>
			</ref>
			<ref id="B9">
				<label>[9]</label>
				<mixed-citation>[9] M. Hosni, A. Idri, A. Abran, “Investigating heterogeneous ensembles with filter feature selection for software effort estimation,” in <italic>Proceedings of the ACM International Conference on Software Engineering and Knowledge Engineering</italic>, Pittsburgh, USA, Jul. 2017, pp. 207-220. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/3143434.3143456">https://doi.org/10.1145/3143434.3143456</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>Hosni</surname>
							<given-names>M.</given-names>
						</name>
						<name>
							<surname>Idri</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Abran</surname>
							<given-names>A.</given-names>
						</name>
					</person-group>
					<source>Investigating heterogeneous ensembles with filter feature selection for software effort estimation</source>
					<conf-name>Proceedings of the ACM International Conference on Software Engineering and Knowledge Engineering</conf-name>
					<publisher-loc>Pittsburgh, USA</publisher-loc>
					<year>2017</year>
					<fpage>207</fpage>
					<lpage>220</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/3143434.3143456">https://doi.org/10.1145/3143434.3143456</ext-link>
				</element-citation>
			</ref>
			<ref id="B10">
				<label>[10]</label>
				<mixed-citation>[10] I. A. Al-Naimy, M. A. Al-Jawaherry, “Software effort estimation using ensemble learning methods,” <italic>AIP Conference Proceedings</italic>, <italic>vol.</italic> 
 <italic>3264</italic>, no. 1, e040021, Mar. 2025. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1063/5.0259225">https://doi.org/10.1063/5.0259225</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Al-Naimy</surname>
							<given-names>I. A.</given-names>
						</name>
						<name>
							<surname>Al-Jawaherry</surname>
							<given-names>M. A.</given-names>
						</name>
					</person-group>
					<article-title>Software effort estimation using ensemble learning methods</article-title>
					<source>AIP Conference Proceedings</source>
					<volume>3264</volume>
					<issue>1</issue>
					<size units="pages">e040021</size>
					<year>2025</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1063/5.0259225">https://doi.org/10.1063/5.0259225</ext-link>
				</element-citation>
			</ref>
			<ref id="B11">
				<label>[11]</label>
				<mixed-citation>[11] A. G. Priya Varshini, A. Kumari J. Ramprasath, S. Rishi R, S. Balakrishnan, D. Deepak, “Optimized Convolutional Neural Network Model for Software Effort Estimation,” in <italic>Proceedings of the 2024 Third International Conference on Smart Technologies in Systems and Networking Computing</italic>, Villupuram, India, 2024, pp. 1-6. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/ICSTSN61422.2024.10671053">https://doi.org/10.1109/ICSTSN61422.2024.10671053</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>Priya Varshini</surname>
							<given-names>A. G.</given-names>
						</name>
						<name>
							<surname>Kumari</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Ramprasath</surname>
							<given-names>J.</given-names>
						</name>
						<name>
							<surname>Rishi</surname>
							<given-names>S.</given-names>
						</name>
						<name>
							<surname>Balakrishnan</surname>
							<given-names>R, S.</given-names>
						</name>
						<name>
							<surname>Deepak</surname>
							<given-names>D.</given-names>
						</name>
					</person-group>
					<source>Optimized Convolutional Neural Network Model for Software Effort Estimation</source>
					<conf-name>Proceedings of the 2024 Third International Conference on Smart Technologies in Systems and Networking Computing</conf-name>
					<publisher-loc>Villupuram, India</publisher-loc>
					<year>2024</year>
					<fpage>1</fpage>
					<lpage>6</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/ICSTSN61422.2024.10671053">https://doi.org/10.1109/ICSTSN61422.2024.10671053</ext-link>
				</element-citation>
			</ref>
			<ref id="B12">
				<label>[12]</label>
				<mixed-citation>[12] A. Idri , I. Abnane, “Fuzzy analogy based effort estimation: An empirical comparative study,” in <italic>Proceedings of the IEEE International Conference on Computer and Information Technology</italic>, Helsinki, Finland, 2017, pp. 114-121. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/CIT.2017.29">https://doi.org/10.1109/CIT.2017.29</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>Idri</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Abnane</surname>
							<given-names>I.</given-names>
						</name>
					</person-group>
					<source>Fuzzy analogy based effort estimation: An empirical comparative study</source>
					<conf-name>Proceedings of the IEEE International Conference on Computer and Information Technology</conf-name>
					<publisher-loc>Helsinki, Finland</publisher-loc>
					<year>2017</year>
					<fpage>114</fpage>
					<lpage>121</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/CIT.2017.29">https://doi.org/10.1109/CIT.2017.29</ext-link>
				</element-citation>
			</ref>
			<ref id="B13">
				<label>[13]</label>
				<mixed-citation>[13] P. Rai, D. K. Verma, S. Kumar, “A hybrid model for prediction of software effort based on team size,” <italic>IET Software</italic>, <italic>vol.</italic> 
 <italic>15</italic>, no. 6, pp. 546-556, Dec. 2021. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1049/sfw2.12048">https://doi.org/10.1049/sfw2.12048</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Rai</surname>
							<given-names>P.</given-names>
						</name>
						<name>
							<surname>Verma</surname>
							<given-names>D. K.</given-names>
						</name>
						<name>
							<surname>Kumar</surname>
							<given-names>S.</given-names>
						</name>
					</person-group>
					<article-title>A hybrid model for prediction of software effort based on team size</article-title>
					<source>IET Software</source>
					<volume>15</volume>
					<issue>6</issue>
					<fpage>546</fpage>
					<lpage>556</lpage>
					<year>2021</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1049/sfw2.12048">https://doi.org/10.1049/sfw2.12048</ext-link>
				</element-citation>
			</ref>
			<ref id="B14">
				<label>[14]</label>
				<mixed-citation>[14] A. Zakrani, M. Hain, A. Namir, “Software development effort estimation using random forests: An empirical study and evaluation,” <italic>International Journal of Intelligent Engineering and Systems</italic>, <italic>vol.</italic> 
 <italic>11</italic>, no. 6, pp. 300-311, Dec. 2018. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.22266/ijies2018.1231.30">https://doi.org/10.22266/ijies2018.1231.30</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Zakrani</surname>
							<given-names>A.</given-names>
						</name>
						<name>
							<surname>Hain</surname>
							<given-names>M.</given-names>
						</name>
						<name>
							<surname>Namir</surname>
							<given-names>A.</given-names>
						</name>
					</person-group>
					<article-title>Software development effort estimation using random forests: An empirical study and evaluation</article-title>
					<source>International Journal of Intelligent Engineering and Systems</source>
					<volume>11</volume>
					<issue>6</issue>
					<fpage>300</fpage>
					<lpage>311</lpage>
					<year>2018</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.22266/ijies2018.1231.30">https://doi.org/10.22266/ijies2018.1231.30</ext-link>
				</element-citation>
			</ref>
			<ref id="B15">
				<label>[15]</label>
				<mixed-citation>[15] ISBSG, <italic>Academic research projects</italic>, 2023. <ext-link ext-link-type="uri" xlink:href="https://www.isbsg.org">https://www.isbsg.org</ext-link>
				</mixed-citation>
				<element-citation publication-type="book">
					<person-group person-group-type="author">
						<collab>ISBSG</collab>
					</person-group>
					<source>Academic research projects</source>
					<year>2023</year>
					<ext-link ext-link-type="uri" xlink:href="https://www.isbsg.org">https://www.isbsg.org</ext-link>
				</element-citation>
			</ref>
			<ref id="B16">
				<label>[16]</label>
				<mixed-citation>[16] A. M. Shimaoka, R. C. Ferreira, A. Goldman, “The evolution of CRISP-DM for Data Science: Methods, Processes and Frameworks,” <italic>SBC Reviews</italic>, <italic>vol.</italic> 4, no. 1, pp. 28-43, Oct. 2024. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5753/reviews.2024.3757">https://doi.org/10.5753/reviews.2024.3757</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Shimaoka</surname>
							<given-names>A. M.</given-names>
						</name>
						<name>
							<surname>Ferreira</surname>
							<given-names>R. C.</given-names>
						</name>
						<name>
							<surname>Goldman</surname>
							<given-names>A.</given-names>
						</name>
					</person-group>
					<article-title>The evolution of CRISP-DM for Data Science: Methods, Processes and Frameworks</article-title>
					<source>SBC Reviews</source>
					<volume>4</volume>
					<issue>1</issue>
					<fpage>28</fpage>
					<lpage>43</lpage>
					<year>2024</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5753/reviews.2024.3757">https://doi.org/10.5753/reviews.2024.3757</ext-link>
				</element-citation>
			</ref>
			<ref id="B17">
				<label>[17]</label>
				<mixed-citation>[17] F. González-Ladrón-de-Guevara, M. Fernández-Diego, “ISBSG variables most frequently used for software effort estimation: A mapping review,” in <italic>Proceedings of the 8th ACM/IEEE International Symposium on Empirical Software Engineering and Measurement</italic>, Torino, Italy, Sep. 2014, pp. 1-4. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/2652524.2652550">https://doi.org/10.1145/2652524.2652550</ext-link>
				</mixed-citation>
				<element-citation publication-type="confproc">
					<person-group person-group-type="author">
						<name>
							<surname>González-Ladrón-de-Guevara</surname>
							<given-names>F.</given-names>
						</name>
						<name>
							<surname>Fernández-Diego</surname>
							<given-names>M.</given-names>
						</name>
					</person-group>
					<source>ISBSG variables most frequently used for software effort estimation: A mapping review</source>
					<conf-name>Proceedings of the 8th ACM/IEEE International Symposium on Empirical Software Engineering and Measurement</conf-name>
					<publisher-loc>Torino, Italy</publisher-loc>
					<year>2014</year>
					<fpage>1</fpage>
					<lpage>4</lpage>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/2652524.2652550">https://doi.org/10.1145/2652524.2652550</ext-link>
				</element-citation>
			</ref>
			<ref id="B18">
				<label>[18]</label>
				<mixed-citation>[18] B. Alsaadi, K. Saeedi, “Data-driven effort estimation techniques of agile user stories: A systematic literature review,” <italic>Artificial Intelligence Review</italic>, vol. 55, no. 7, pp. 5485-5516, Oct. 2022. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10462-021-10132-x">https://doi.org/10.1007/s10462-021-10132-x</ext-link>
				</mixed-citation>
				<element-citation publication-type="journal">
					<person-group person-group-type="author">
						<name>
							<surname>Alsaadi</surname>
							<given-names>B.</given-names>
						</name>
						<name>
							<surname>Saeedi</surname>
							<given-names>K.</given-names>
						</name>
					</person-group>
					<article-title>Data-driven effort estimation techniques of agile user stories: A systematic literature review</article-title>
					<source>Artificial Intelligence Review</source>
					<volume>55</volume>
					<issue>7</issue>
					<fpage>5485</fpage>
					<lpage>5516</lpage>
					<year>2022</year>
					<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10462-021-10132-x">https://doi.org/10.1007/s10462-021-10132-x</ext-link>
				</element-citation>
			</ref>
		</ref-list>
		<fn-group>
			<fn fn-type="other" id="fn1">
				<label>How to cite:</label>
				<p> J. Getial-Barragán; R. Timarán-Pereira &amp; D. R. Bastidas-Torres “Effort Estimation in Software Development Projects Using Supervised Machine Learning Techniques”. <italic>Revista Facultad de Ingeniería</italic>, vol. 34, no. 73, e20194, 2025. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.19053/01211129.v34.n73.2025.20194">https://doi.org/10.19053/01211129.v34.n73.2025.20194</ext-link>
				</p>
			</fn>
		</fn-group>
		<fn-group>
			<fn fn-type="financial-disclosure" id="fn4">
				<label>FUNDING</label>
				<p> No external funding was received for this study.</p>
			</fn>
		</fn-group>
	</back>
</article>